In [1]:
import os

from ndbc_api import NdbcApi
import pandas as pd

REFRESH = False #Set to True to remake all files.
LATITUDES = 41.37754, 42.09690 #Vickery and Amherstburg
LONGITUDES = -83.51131, -82.67909 #Toledo and Kellys Island
START_DATE, END_DATE = '2015-01-01', '2025-01-01'
FILES = {
    'station_detail': 'Erie Western Basin Stations.csv',
    'station_data_roster': 'Erie Western Basin Station Data Roster.csv',
    'standard_met_data': 'Erie Western Basin Meteorological Data.csv'
    }
api = NdbcApi()

if REFRESH:
    for i in FILES.items():
        os.remove(i)

try:
    stations = pd.read_csv(FILES['station_detail'], index_col='Station')
except FileNotFoundError:
    print('Generating new station detail file.')
    stations = (
        api
        .stations()
        .loc[
            lambda x: x.Lat.between(*LATITUDES) & x.Lon.between(*LONGITUDES),
            :
            ]
        .set_index('Station')
        )
    assert stations.index.is_unique, 'Station IDs are not unique.'
    stations.to_csv(FILES['station_detail'])

stations

Unnamed: 0_level_0,Lat,Lon,Elevation,Name,Owner,Program,Type,Includes Meteorology,Includes Currents,Includes Water Quality,DART Program
Station,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
45165,41.702,-83.261,174.0,"Toledo Water Intake Buoy, Oregon, OH",Limno Tech,IOOS Partners,buoy,False,False,False,False
45200,41.724,-83.37,174.0,Maumee Bay Buoy,Cleveland Water Alliance,IOOS Partners,buoy,False,False,False,False
45201,41.601,-82.781,174.0,Erie Islands Buoy,Cleveland Water Alliance,IOOS Partners,buoy,False,False,False,False
45202,41.532,-82.941,174.0,Port Clinton Buoy,Cleveland Water Alliance,IOOS Partners,buoy,False,False,False,False
cmpo1,41.547,-83.015,175.0,"Camp Perry, OH",NWS Eastern Region,IOOS Partners,fixed,True,False,False,False
mrho1,41.544,-82.731,176.0,"9063079 - Marblehead, OH",NOS,NOS/CO-OPS,fixed,True,False,False,False
sbio1,41.629,-82.841,176.8,"South Bass Island, OH",NDBC,NDBC Meteorological/Ocean,fixed,True,False,False,False
thlo1,41.826,-83.194,173.5,Toledo Light No. 2 OH,GLERL,IOOS Partners,fixed,True,False,False,False
thro1,41.694,-83.473,176.2,"9063085 - Toledo, OH",NOS,NOS/CO-OPS,fixed,True,False,False,False
twco1,41.699,-83.259,173.5,"Toledo Crib, OH",Limno Tech,IOOS Partners,fixed,True,False,False,False


In [2]:
def create_data_roster() -> pd.DataFrame:
    station_datas = {}

    for idx in stations.index:
        try:
            station_datas[idx] = list(
                api
                .available_historical(idx)
                .keys()
                )
        except Exception:
            print(f'Error getting data for station {idx}')
            continue

    columns = []

    for i in station_datas.values():
        columns += i

    columns = list(set(columns)) #Use set to remove duplicates and order.

    data_roster = (
        pd
        .DataFrame()
        .from_dict(
            data={
                k: [True if c in v else False for c in columns]
                for k, v in station_datas.items()
                },
            columns=list(set(columns)), orient='index')
        .rename_axis(index='Station')
        )
    return data_roster


try:
    data_roster = pd.read_csv(
        FILES['station_data_roster'], index_col='Station')
except FileNotFoundError:
    print('Generating new data roster file.')
    data_roster = create_data_roster()
    data_roster.to_csv(FILES['station_data_roster'])

data_roster

Unnamed: 0_level_0,Solar radiation data,Standard meteorological data,Ocean current data,Continuous winds data
Station,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
45165,True,True,True,False
45200,False,True,False,False
45201,True,True,False,False
45202,True,True,False,False
cmpo1,False,True,False,False
sbio1,False,True,False,True
thlo1,False,True,False,False
twco1,False,True,False,False


In [3]:
try:
    standard_meteorological_data = pd.read_csv(
        FILES['standard_met_data'], index_col=['timestamp', 'station_id'])
except FileNotFoundError:
    print('Generating new data.')
    standard_meteorological_data = api.get_data(
        station_ids=(
            data_roster
            .loc[data_roster['Standard meteorological data']]
            .index
            .to_list()
            ),
        mode='stdmet', start_time=START_DATE, end_time=END_DATE)
    standard_meteorological_data.to_csv(FILES['standard_met_data'])

standard_meteorological_data

  standard_meteorological_data = pd.read_csv(


Unnamed: 0_level_0,Unnamed: 1_level_0,WDIR,WSPD,GST,WVHT,DPD,APD,MWD,PRES,ATMP,WTMP,DEWP,VIS,TIDE
timestamp,station_id,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
2023-06-16 16:10:00,45200,357.0,6.0,7.9,0.46,3.25,,49.0,1012.2,18.6,19.4,14.1,,
2023-06-16 16:20:00,45200,9.0,5.8,8.3,0.49,3.32,,45.0,1012.2,18.6,19.4,14.1,,
2023-06-16 16:40:00,45200,19.0,5.2,7.9,,,,,1012.1,18.7,,14.2,,
2023-06-16 16:50:00,45200,8.0,5.5,7.1,,,,,1012.2,18.9,,14.0,,
2023-06-16 17:00:00,45200,11.0,5.0,6.8,0.43,3.14,,49.0,1012.2,18.9,19.5,14.2,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-12-31 23:19:00,twco1,,,,,,,,,2.7,,2.1,,
2024-12-31 23:29:00,twco1,,,,,,,,,2.6,,1.9,,
2024-12-31 23:39:00,twco1,,,,,,,,,2.5,,1.9,,
2024-12-31 23:49:00,twco1,,,,,,,,,2.5,,1.9,,
