Automates the download of NOAA water elevation data. 

See https://api.tidesandcurrents.noaa.gov/api/prod/ for the web API. Only 31 days max of tide data can be downloaded at once, so this notebook makes that much less painful. Coordinates of each tide station are also retrieved and saved to a shapefile.

In [1]:
import pandas as pd

noaa_tide_url = "https://api.tidesandcurrents.noaa.gov/api/prod/datagetter?product=water_level&application=NOS.COOPS.TAC.WL&begin_date={start_date}&end_date={end_date}&datum={datum}&station={station_id}&time_zone=GMT&units=metric&format=csv"
# Station info was reverse engineered from the website, so it's undocumented and may not work forever
noaa_station_info_url = "https://api.tidesandcurrents.noaa.gov/mdapi/prod/webapi/stations/{station_id}.json"
noaa_station_datums_url = "https://api.tidesandcurrents.noaa.gov/mdapi/prod/webapi/stations/{station_id}/datums.json"

stations = (9447130, 9446484, 9444900, 9449424, 9449880, 9444090, 9443090)
max_days_per_request = 31
start_date = pd.Timestamp(2014, 1, 1)
end_date = pd.Timestamp(2015, 1, 2)

dest = "data/tides/{name}.csv.gz"
station_info_file = "data/tides/station_info.shp"

import urllib.request
import json
import geopandas as gpd
from shapely.geometry import Point

In [2]:
periods = pd.period_range(start_date, end_date, freq=f'{max_days_per_request}D')
station_infos = []
for station_id in stations:
    dfs = []
    # Retrieve station info
    with urllib.request.urlopen(noaa_station_info_url.format(station_id = station_id)) as f:
        data = json.load(f)
    if data['count'] != 1:
        print(data)
        raise ValueError('Unexpected result returned for station ID')
    s = data['stations'][0]
    name = s['name']
    print(f'{name} ({station_id})')
    # Retrieve available datums
    with urllib.request.urlopen(noaa_station_datums_url.format(station_id = station_id)) as f:
        datum_data = json.load(f)
    datums = pd.DataFrame(datum_data["datums"])
    if len(datums.loc[datums["name"] == "NAVD88"]) == 0:
        print("Skipping station as no valid datum is available")
        continue
    datum = "NAVD"
    station_infos.append({
        "id": s['id'],
        "name": s['name'],
        "geometry": Point(s['lng'], s['lat'])
    })
    for p in periods:
        start = p.start_time
        end = min(p.end_time, end_date)
        print(f'{start} to {end}')
        # TODO need to implement retries for timeouts
        dfs.append(pd.read_csv(noaa_tide_url.format(station_id = station_id, datum = datum,
            start_date = start.strftime("%Y%m%d"), end_date = end.strftime("%Y%m%d")
        ), parse_dates=[0]))
    df = pd.concat(dfs, ignore_index=True).set_index("Date Time")
    df.to_csv(dest.format(name=name), compression="gzip")
    print(f'Retrieved {len(df)} observations for station {name}')
    
stations_df = gpd.GeoDataFrame(station_infos).set_index('id')
stations_df

Seattle (9447130)
2014-01-01 00:00:00 to 2014-01-31 23:59:59.999999999
2014-02-01 00:00:00 to 2014-03-03 23:59:59.999999999
2014-03-04 00:00:00 to 2014-04-03 23:59:59.999999999
2014-04-04 00:00:00 to 2014-05-04 23:59:59.999999999
2014-05-05 00:00:00 to 2014-06-04 23:59:59.999999999
2014-06-05 00:00:00 to 2014-07-05 23:59:59.999999999
2014-07-06 00:00:00 to 2014-08-05 23:59:59.999999999
2014-08-06 00:00:00 to 2014-09-05 23:59:59.999999999
2014-09-06 00:00:00 to 2014-10-06 23:59:59.999999999
2014-10-07 00:00:00 to 2014-11-06 23:59:59.999999999
2014-11-07 00:00:00 to 2014-12-07 23:59:59.999999999
2014-12-08 00:00:00 to 2015-01-02 00:00:00
Retrieved 88080 observations for station Seattle
Tacoma (9446484)
2014-01-01 00:00:00 to 2014-01-31 23:59:59.999999999
2014-02-01 00:00:00 to 2014-03-03 23:59:59.999999999
2014-03-04 00:00:00 to 2014-04-03 23:59:59.999999999
2014-04-04 00:00:00 to 2014-05-04 23:59:59.999999999
2014-05-05 00:00:00 to 2014-06-04 23:59:59.999999999
2014-06-05 00:00:00 to 20

Unnamed: 0_level_0,name,geometry
id,Unnamed: 1_level_1,Unnamed: 2_level_1
9447130,Seattle,POINT (-122.33917 47.60194)
9446484,Tacoma,POINT (-122.41300 47.27000)
9444090,Port Angeles,POINT (-123.44000 48.12500)
9443090,Neah Bay,POINT (-124.60194 48.37028)


In [3]:
stations_df.crs = 'epsg:4326'
stations_df.to_file(station_info_file)