In [None]:
import pickle
from pathlib import Path
from collections import namedtuple
from itertools import chain
import pandas as pd
from timeseries_point import extract_series

GlobHelper = namedtuple("GlobHelper", ["date", "hour"])
NamedCoord = namedtuple("NamedCoord", ["name", "lat", "lon"])

In [8]:
usgs_data_path = Path("usgs/usgs_Cook County.pkl")

with open(usgs_data_path, "rb") as f:
    data = pickle.load(f)
    daily, inst, site_info, pcodes = data.values()

print(f"{len(site_info)} lat/lon pairs will be queried")

30 lat/lon pairs will be queried


In [5]:
site_info.head()

Unnamed: 0,agency_cd,site_no,station_nm,site_tp_cd,lat_va,long_va,dec_lat_va,dec_long_va,coord_meth_cd,coord_acy_cd,...,reliability_cd,gw_file_cd,nat_aqfr_cd,aqfr_cd,aqfr_type_cd,well_depth_va,hole_depth_va,depth_src_cd,project_no,geometry
0,USGS,413113087342201,"RAIN GAGE NEAR CHICAGO HEIGHTS, IL",AT,413115,873525,41.520868,-87.590321,M,S,...,,NNNNNNNN,,,,,,,00100,POINT (-87.59032 41.52087)
1,USGS,413115087352501,"RAIN GAGE AT DEER CREEK NEAR CHICAGO HEIGHTS, IL",AT,413115,873525,41.520833,-87.590278,N,S,...,,,,,,,,,CAWS00,POINT (-87.59028 41.52083)
2,USGS,413510087380201,"RAIN GAGE AT HARVEY, IL",AT,413510,873802,41.586111,-87.633889,N,S,...,,,,,,,,,CAWS00,POINT (-87.63389 41.58611)
3,USGS,413514087523501,"RAIN GAGE AT ORLAND PARK, IL",AT,413514,875235,41.587222,-87.876389,N,S,...,,,,,,,,,CAWS000,POINT (-87.87639 41.58722)
4,USGS,413516087442101,"RAIN GAGE AT OAK FOREST, IL",AT,413516,874421,41.587778,-87.739167,N,S,...,,,,,,,,,CAWS00,POINT (-87.73917 41.58778)


In [None]:
START_DATE = pd.Timestamp("20210301", tz="utc")
END_DATE = pd.Timestamp("20210831", tz="utc")

for i, site in site_info.iterrows():
    """"""
    ## Get site coordinates and number
    site_no = site["site_no"]
    coord = NamedCoord(
        site["station_nm"],
        site["dec_lat_va"],
        360 + site["dec_long_va"],
    )

    ## Filter hours with rain - skip zeros from query
    files = []
    resampled = inst.xs(site_no).loc[START_DATE:END_DATE].resample("1h").sum()
    filtered = resampled[resampled["00045"] > 0]
    hours_with_storms = [GlobHelper(t.strftime("%Y%m%d"), t.strftime("%H")) for t in filtered.index]

    for hd in hours_with_storms:
        pattern = f"{hd.date}/*{hd.date}-{hd.hour}*.gz"
        ls = Path("../data").glob(pattern)
        ls = sorted(ls)
        files.append(ls)

    files = list(chain(*files))

    ## Execute GRIB2 query
    df = extract_series(files, coord.lat, coord.lon)
    dest_folder = Path("./timeseries")
    dest_folder.mkdir(parents=True, exist_ok=True)
    df.to_parquet(dest_folder / f"{coord.name}.parquet")
    break

In [88]:
pd.read_parquet(dest_folder / f"{coord.name}.parquet")

Unnamed: 0_level_0,value
timestamp,Unnamed: 1_level_1
2021-03-11 09:00:00,0.0
2021-03-11 09:02:00,0.0
2021-03-11 09:04:00,0.0
2021-03-11 09:06:00,0.0
2021-03-11 09:08:00,0.0
...,...
2021-03-27 13:24:00,0.0
2021-03-27 13:26:00,0.0
2021-03-27 13:28:00,0.0
2021-03-27 13:30:00,0.0
