In [1]:
%reload_ext autotime
import pandas as pd
import requests
import geopandas as gpd
from tqdm.auto import tqdm
import time
import os
from glob import glob

In [2]:
poly = gpd.read_file("polygons.geojson")
poly = poly[poly.id.str.startswith("nzd")]
poly.set_index("id", inplace=True)
poly

Unnamed: 0_level_0,area,id_sorted,northing,geometry
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
nzd0001,3.069093e+06,nzd0001,-4.085604e+06,"POLYGON ((172.96406 -34.43054, 172.99324 -34.4..."
nzd0002,1.507931e+06,nzd0002,-4.086100e+06,"POLYGON ((172.93456 -34.42857, 172.95856 -34.4..."
nzd0003,2.484730e+06,nzd0003,-4.087232e+06,"POLYGON ((173.0098 -34.42055, 173.00088 -34.44..."
nzd0006,9.619509e+05,nzd0004,-4.090732e+06,"POLYGON ((173.00532 -34.46694, 173.00612 -34.4..."
nzd0007,3.109433e+06,nzd0005,-4.095516e+06,"POLYGON ((172.99847 -34.48102, 173.00081 -34.5..."
...,...,...,...,...
nzd0317,9.986110e+05,nzd0556,-4.971830e+06,"POLYGON ((172.32302 -40.7187, 172.3121 -40.728..."
nzd0316,4.300058e+06,nzd0557,-4.966165e+06,"POLYGON ((172.37076 -40.70229, 172.39571 -40.6..."
nzd0313,2.291974e+07,nzd0558,-4.939929e+06,"POLYGON ((172.89281 -40.50518, 172.73306 -40.5..."
nzd0314,3.124363e+07,nzd0559,-4.943102e+06,"POLYGON ((173.04122 -40.54521, 172.89309 -40.5..."


In [3]:
files = pd.DataFrame({"filename": sorted(glob("data/*/transect_time_series.csv"))})
files["sitename"] = files.filename.str.split("/").str[1]
files["have_tides"] = files.sitename.apply(lambda s: os.path.isfile(f"data/{s}/tides.csv"))
files

Unnamed: 0,filename,sitename,have_tides
0,data/nzd0001/transect_time_series.csv,nzd0001,True
1,data/nzd0002/transect_time_series.csv,nzd0002,False
2,data/nzd0003/transect_time_series.csv,nzd0003,False
3,data/nzd0006/transect_time_series.csv,nzd0006,False
4,data/nzd0007/transect_time_series.csv,nzd0007,False
5,data/nzd0010/transect_time_series.csv,nzd0010,False
6,data/nzd0012/transect_time_series.csv,nzd0012,False
7,data/nzd0013/transect_time_series.csv,nzd0013,False
8,data/nzd0014/transect_time_series.csv,nzd0014,False
9,data/nzd0015/transect_time_series.csv,nzd0015,False


In [4]:
def get_tide_for_dt(point, datetime):
    while True:
        r = requests.get("https://api.niwa.co.nz/tides/data", params={
            "lat": point.y,
            "long": point.x,
            "numberOfDays": 2,
            "startDate": str(datetime.date()),
            "datum": "MSL",
            "interval": 10, # 10 minute resolution
            "apikey": os.environ["NIWA_API_KEY"]
        }, timeout=(30,30))
        if r.status_code == 200:
            df = pd.DataFrame(r.json()["values"])
            df.index = pd.to_datetime(df.time)
            return df.value[datetime]
        elif r.status_code == 429:
            sleep_seconds = 30
            # sleep for x seconds to refresh the count
            print(f'Num of API reqs exceeded, Sleeping for: {sleep_seconds} seconds...')
            time.sleep(sleep_seconds)

for sitename in tqdm(files[~files.have_tides].sitename):
    dates = pd.to_datetime(pd.read_csv(f"data/{sitename}/transect_time_series.csv").dates).dt.round("10min")
    point = poly.geometry[sitename].centroid

    results = []
    for date in tqdm(dates):
        result = get_tide_for_dt(point, date)
        results.append({
            "dates": date,
            "tide": result
        })
    df = pd.DataFrame(results)
    df.set_index("dates", inplace=True)
    df.to_csv(f"data/{sitename}/tides.csv")

  0%|          | 0/22 [00:00<?, ?it/s]

  0%|          | 0/159 [00:00<?, ?it/s]

  0%|          | 0/151 [00:00<?, ?it/s]

Num of API reqs exceeded, Sleeping for: 30 seconds...
