# Introduction

This notebook is to download the [Chesapeake Bay Project buoy data](https://buoybay.noaa.gov/data).

The key can change, but for now, it should work.

In [1]:
import requests
import json
import os

# Define API parameters
API_KEY = "f159959c117f473477edbdf3245cc2a4831ac61f"
BASE_URL = "https://mw.buoybay.noaa.gov/api/v1/json/query/"

# Define the time range (for all of 2020)
START_DATE = "2020-01-01T00:00:00Z"
END_DATE = "2020-12-31T23:59:59Z"

# Define a list of all station IDs
stations = ["UP", "GR", "J", "FL", "SR", "PL", "AN", "YS", "N", "SN", "S"]

# Define a list of all variables
variables = [
    "air_pressure", "air_temperature", "wind_speed", "wind_speed_of_gust", 
    "wind_from_direction", "relative_humidity", "latitude_decimal", "longitude_decimal", 
    "sea_water_temperature", "sea_water_electrical_conductivity", "mml_avg_nitrates", 
    "simple_turbidity", "seanettle_prob", "mass_concentration_of_chlorophyll_in_sea_water", 
    "mass_concentration_of_oxygen_in_sea_water", "sea_water_salinity", 
    "sea_surface_wind_wave_period", "wave_direction_spread", 
    "sea_surface_wave_from_direction", "sea_surface_wave_significant_height", 
    "sea_surface_wave_mean_height"
]

# Create a directory to save the data if it doesn't exist
os.makedirs("buoy_data", exist_ok=True)

def fetch_buoy_data(station_id, variable):
    """Fetch data from the buoy API for a given station and variable."""
    url = f"{BASE_URL}{station_id}?key={API_KEY}&sd={START_DATE}&ed={END_DATE}&var={variable}"
    print(f"Fetching data from URL: {url}")

    try:
        response = requests.get(url)
        response.raise_for_status()  # Raise exception for HTTP errors
        return response.json()  # Return the entire response as JSON
    except requests.exceptions.RequestException as e:
        print(f"Error fetching data for station {station_id}, variable {variable}: {e}")
        return None

def save_station_data(station_id, station_data):
    """Save the list of entire JSON responses for each variable to a file."""
    filepath = os.path.join("buoy_data", f"{station_id}_data.json")
    
    # Save the entire list of JSON responses to the station's file
    with open(filepath, "w") as file:
        json.dump(station_data, file, indent=4)
    print(f"Data saved for station {station_id}")

def fetch_and_save_all_data():
    """Fetch and save data for all stations and variables."""
    for station in stations:
        print(f"Fetching data for station {station}...")
        
        station_data = []  # This will hold the full JSON response for each variable

        for variable in variables:
            print(f"  Fetching variable: {variable}")
            data = fetch_buoy_data(station, variable)
            
            if data is not None:
                # Add the entire JSON response to the station's data list
                station_data.append(data)
        
        # After fetching all variables for the station, save the station's data
        if station_data:
            save_station_data(station, station_data)

# Run the data fetching process
fetch_and_save_all_data()


Fetching data for station UP...
  Fetching variable: air_pressure
Fetching data from URL: https://mw.buoybay.noaa.gov/api/v1/json/query/UP?key=f159959c117f473477edbdf3245cc2a4831ac61f&sd=2020-01-01T00:00:00Z&ed=2020-12-31T23:59:59Z&var=air_pressure
  Fetching variable: air_temperature
Fetching data from URL: https://mw.buoybay.noaa.gov/api/v1/json/query/UP?key=f159959c117f473477edbdf3245cc2a4831ac61f&sd=2020-01-01T00:00:00Z&ed=2020-12-31T23:59:59Z&var=air_temperature
  Fetching variable: wind_speed
Fetching data from URL: https://mw.buoybay.noaa.gov/api/v1/json/query/UP?key=f159959c117f473477edbdf3245cc2a4831ac61f&sd=2020-01-01T00:00:00Z&ed=2020-12-31T23:59:59Z&var=wind_speed
  Fetching variable: wind_speed_of_gust
Fetching data from URL: https://mw.buoybay.noaa.gov/api/v1/json/query/UP?key=f159959c117f473477edbdf3245cc2a4831ac61f&sd=2020-01-01T00:00:00Z&ed=2020-12-31T23:59:59Z&var=wind_speed_of_gust
  Fetching variable: wind_from_direction
Fetching data from URL: https://mw.buoybay.no

In [5]:
stations = pd.read_csv('../../notebooks/buoyStationCode.csv')

In [13]:
start_date = '2020-01-01T10:00:00z'
end_date = '2020-12-31T10:06:00z'

buoys = pd.DataFrame()

for name in stations['Station Short Name']:
    data = api_by_buoy(name,testingKey,start_date,end_date)
    buoys = pd.concat([buoys, data], ignore_index=True)

https://mw.buoybay.noaa.gov/api/v1/json/query/UP?key=f159959c117f473477edbdf3245cc2a4831ac61f&sd=2020-01-01T10:00:00Z&ed=2020-01-08T10:00:00Z&var=all
https://mw.buoybay.noaa.gov/api/v1/json/query/UP?key=f159959c117f473477edbdf3245cc2a4831ac61f&sd=2020-01-08T10:00:00Z&ed=2020-01-15T10:00:00Z&var=all
https://mw.buoybay.noaa.gov/api/v1/json/query/UP?key=f159959c117f473477edbdf3245cc2a4831ac61f&sd=2020-01-15T10:00:00Z&ed=2020-01-22T10:00:00Z&var=all
https://mw.buoybay.noaa.gov/api/v1/json/query/UP?key=f159959c117f473477edbdf3245cc2a4831ac61f&sd=2020-01-22T10:00:00Z&ed=2020-01-29T10:00:00Z&var=all
https://mw.buoybay.noaa.gov/api/v1/json/query/UP?key=f159959c117f473477edbdf3245cc2a4831ac61f&sd=2020-01-29T10:00:00Z&ed=2020-02-05T10:00:00Z&var=all
https://mw.buoybay.noaa.gov/api/v1/json/query/UP?key=f159959c117f473477edbdf3245cc2a4831ac61f&sd=2020-02-05T10:00:00Z&ed=2020-02-12T10:00:00Z&var=all
https://mw.buoybay.noaa.gov/api/v1/json/query/UP?key=f159959c117f473477edbdf3245cc2a4831ac61f&sd=202

In [2]:
import requests
import pandas as pd
from datetime import datetime, timedelta
from concurrent.futures import ThreadPoolExecutor, as_completed

# Function to build the API URL
def build_query_url(station, start_dt, end_dt, api_key, base_url):
    start_str = start_dt.strftime('%Y-%m-%dT%H:%M:%SZ')
    end_str = end_dt.strftime('%Y-%m-%dT%H:%M:%SZ')
    return f"{base_url}/json/query/{station}?key={api_key}&sd={start_str}&ed={end_str}&var=all"

# Function to fetch data from the API
def fetch_data(url):
    try:
        response = requests.get(url)
        response.raise_for_status()  # Check for HTTP errors
        data = response.json()
        if 'stations' in data and len(data['stations']) > 0:
            return data['stations'][0]['variable']
    except requests.exceptions.RequestException as e:
        print(f"Error fetching data from {url}: {e}")
    return None

# Function to process the raw JSON data into a DataFrame
def process_data(data):
    rows = []
    for variable in data:
        param_name = variable['actualName']
        for measurement in variable['measurements']:
            rows.append({
                'time': measurement['time'],
                param_name: measurement['value'],
                'unit': variable['units'],
                'QA': measurement.get('QA', None)  # Quality assurance if available
            })

    df = pd.DataFrame(rows)
    df['time'] = pd.to_datetime(df['time'])  # Convert time to datetime
    df.set_index('time', inplace=True)  # Set time as index
    return df

# Function to fetch all data for a station in parallel using threads
def get_station_data_parallel(station, start_date, end_date, api_key, base_url, interval_days=3, max_workers=10):
    all_data = pd.DataFrame()
    start_dt = datetime.strptime(start_date, '%Y-%m-%dT%H:%M:%SZ')
    end_dt = datetime.strptime(end_date, '%Y-%m-%dT%H:%M:%SZ')

    # List to store future tasks
    tasks = []

    # ThreadPoolExecutor for parallel processing
    with ThreadPoolExecutor(max_workers=max_workers) as executor:
        # Loop over the time intervals
        current_start = start_dt
        while current_start < end_dt:
            current_end = min(current_start + timedelta(days=interval_days), end_dt)  # 3-day intervals

            # Build the URL and submit the request to the thread pool
            url = build_query_url(station, current_start, current_end, api_key, base_url)
            print(url)
            tasks.append(executor.submit(fetch_data, url))

            # Move the time window forward
            current_start = current_end

        # Process the results as they complete
        for future in as_completed(tasks):
            data = future.result()
            if data:
                df = process_data(data)
                all_data = pd.concat([all_data, df])

    return all_data

# Example usage
if __name__ == "__main__":
    api_key = 'f159959c117f473477edbdf3245cc2a4831ac61f'  # Example API key
    base_url = 'https://mw.buoybay.noaa.gov/api/v1'

    # Define start and end dates for each station
    stations_info = {
        'J': ("2020-01-01T00:00:00Z", "2020-01-31T00:00:00Z"),  # Jamestown (since April 2007)
        # # 'N': ("2008-09-01T00:00:00Z", "2016-05-01T00:00:00Z"),  # Norfolk (Sept 2008 - May 2016)
        # 'FL': ("2020-01-01T00:00:00Z", "2023-01-01T00:00:00Z"), # First Landing (since 2011)
        # 'YS': ("2020-01-01T00:00:00Z", "2023-01-01T00:00:00Z"), # York Spit (since July 2016)
        # 'PL': ("2020-01-01T00:00:00Z", "2023-01-01T00:00:00Z"), # Potomac (since June 2007)
        # 'GR': ("2020-01-01T00:00:00Z", "2023-01-01T00:00:00Z"), # Goose Reef (since July 2010)
        # # 'UP': ("2010-05-01T00:00:00Z", "2017-12-31T23:59:59Z"), # Upper Potomac (May 2010 - Dec 2017)
        # 'AN': ("2020-01-01T00:00:00Z", "2023-01-01T00:00:00Z"), # Annapolis (since November 2009)
        # # 'SN': ("2007-07-01T00:00:00Z", "2017-12-31T23:59:59Z"), # Patapsco (July 2007 - Dec 2017)
        # # 'S': ("2007-09-01T00:00:00Z", "2017-12-31T23:59:59Z"),  # Susquehanna (Sept 2007 - Dec 2017)
    }

    # Loop over each station and fetch data
    for station, (start_date, end_date) in stations_info.items():
        print(f"Fetching data for station: {station}")
        station_data = get_station_data_parallel(station, start_date, end_date, api_key, base_url, interval_days=3, max_workers=20)
        
        # Save the data to CSV
        station_data.to_csv(f'{station}_data.csv', index=True)
        print(f"Data for station {station} saved to {station}_data.csv")


Fetching data for station: J
https://mw.buoybay.noaa.gov/api/v1/json/query/J?key=f159959c117f473477edbdf3245cc2a4831ac61f&sd=2020-01-01T00:00:00Z&ed=2020-01-04T00:00:00Z&var=all
https://mw.buoybay.noaa.gov/api/v1/json/query/J?key=f159959c117f473477edbdf3245cc2a4831ac61f&sd=2020-01-04T00:00:00Z&ed=2020-01-07T00:00:00Z&var=all
https://mw.buoybay.noaa.gov/api/v1/json/query/J?key=f159959c117f473477edbdf3245cc2a4831ac61f&sd=2020-01-07T00:00:00Z&ed=2020-01-10T00:00:00Z&var=all
https://mw.buoybay.noaa.gov/api/v1/json/query/J?key=f159959c117f473477edbdf3245cc2a4831ac61f&sd=2020-01-10T00:00:00Z&ed=2020-01-13T00:00:00Z&var=all
https://mw.buoybay.noaa.gov/api/v1/json/query/J?key=f159959c117f473477edbdf3245cc2a4831ac61f&sd=2020-01-13T00:00:00Z&ed=2020-01-16T00:00:00Z&var=all
https://mw.buoybay.noaa.gov/api/v1/json/query/J?key=f159959c117f473477edbdf3245cc2a4831ac61f&sd=2020-01-16T00:00:00Z&ed=2020-01-19T00:00:00Z&var=all
https://mw.buoybay.noaa.gov/api/v1/json/query/J?key=f159959c117f473477edbdf32