### request

* example request: https://api.tidesandcurrents.noaa.gov/api/prod/datagetter?begin_date=20180101&end_date=20181231&station=8735180&product=hourly_height&datum=STND&time_zone=lst_ldt&units=metric&format=json

* template request: https://api.tidesandcurrents.noaa.gov/api/prod/datagetter?begin_date= {00000000} &end_date= {00000000} &station= {station ID} &product=hourly_height&datum=STND&time_zone=lst_ldt&units=metric&format=json

* Note: Requested period should be no more than 365 days.

In [None]:
t_req = "https://api.tidesandcurrents.noaa.gov/api/prod/datagetter?begin_date={d1}&end_date={d2}&station={sid}&product=hourly_height&datum=STND&time_zone=lst_ldt&units=metric&format=json"

In [None]:
t_req.format(d1 = '20180101', d2 = '20181231', sid = '8735180')

### MetaData

In [None]:
# type = waterlevels gives the list of stations active in meassuring.
station_list_json_url = "https://api.tidesandcurrents.noaa.gov/mdapi/prod/webapi/stations.json?type=waterlevels"

In [None]:
import pandas as pd

In [None]:
station_df = pd.read_json(station_list_json_url)
station_df

In [None]:
station_li = []
for station in station_df['stations']:
    station_li += [dict(station)]
station_df = pd.DataFrame(station_li)

In [None]:
station_li = station_df[['id', 'name', 'lat', 'lng']].set_index('id')
station_li

### Download

In [None]:
ids = station_li['id'].tolist()
years = [y for y in range(2018, 2024)]

In [None]:
def download(sid, year):
    url = t_req.format(d1 = str(year)+'0101', d2 = str(year)+'1231', sid = sid)
    data = requests.get(url)
    print(data.content[:100])
    with open('./yearly_by_staion/'+str(sid) + '_' + str(year) + '.json', 'wb+') as out:
        out.write(data.content)

In [None]:
for i in ids:
    for y in years:
        download(i, y)

### Clean

In [None]:
def getYearlyData(year):
    res = pd.DataFrame()
    for i in ids:
        # print(i)
        with open('./yearly by station/{}_{}.json'.format(i, year)) as f:
            j = json.load(f)
            if 'data' not in j.keys():
                print('station {} no data'.format(i))
                continue
            df = pd.DataFrame(j['data']).set_index('t')['v']
            res[str(i)] = df
    return res

In [None]:
getYearlyData(2018)

In [None]:
df = pd.concat([getYearlyData(2018), getYearlyData(2019)])
df = pd.concat([df, getYearlyData(2020)])
df = pd.concat([df, getYearlyData(2021)])
df = pd.concat([df, getYearlyData(2022)])
df = pd.concat([df, getYearlyData(2023)])
df

### Dump

In [None]:
def yearlyDataDumper(year):
    data = getYearlyData(year)
    with open('all_' + str(year) + '.json', 'w+') as out:
        out.write(data.to_json())
    print('{} dump complete'.format(year))

In [None]:
for i in range(2018, 2024):
    yearlyDataDumper(i)

In [None]:
with open('stations.json','w+') as out:
    out.write(station_li.to_json())

In [None]:
with open('all.json', 'w+') as out:
    out.write(df.to_json())