In [1]:
import pickle
from pathlib import Path
from collections import namedtuple
from itertools import chain
import pandas as pd
from timeseries_point import extract_series

GlobHelper = namedtuple("GlobHelper", ["date", "hour"])
NamedCoord = namedtuple("NamedCoord", ["name", "lat", "lon"])

In [2]:
usgs_data_path = Path("usgs/usgs_Cook County.pkl")

with open(usgs_data_path, "rb") as f:
    data = pickle.load(f)
    daily, inst, site_info, pcodes = data.values()

print(f"{len(site_info)} lat/lon pairs will be queried")

33 lat/lon pairs will be queried


In [3]:
site_info.head()

Unnamed: 0,agency_cd,site_no,station_nm,site_tp_cd,lat_va,long_va,dec_lat_va,dec_long_va,coord_meth_cd,coord_acy_cd,...,reliability_cd,gw_file_cd,nat_aqfr_cd,aqfr_cd,aqfr_type_cd,well_depth_va,hole_depth_va,depth_src_cd,project_no,geometry
0,USGS,5530990,"SALT CREEK AT ROLLING MEADOWS, IL",ST,420337.41,880059.97,42.060392,-88.016658,N,5,...,,NNNNNNNN,,,,,,,,POINT (-88.01666 42.06039)
1,USGS,5536290,"LITTLE CALUMET RIVER AT SOUTH HOLLAND, IL",ST,413625.3,873551.3,41.607028,-87.597583,X,F,...,,NNNNNNNN,,,,,,,,POINT (-87.59758 41.60703)
2,USGS,413104087440001,"RAIN GAGE AT MATTESON, IL",AT,413104.0,874400.0,41.517778,-87.733333,N,S,...,,,,,,,,,CAWS0,POINT (-87.73333 41.51778)
3,USGS,413113087342201,"RAIN GAGE NEAR CHICAGO HEIGHTS, IL",AT,413115.0,873525.0,41.520868,-87.590321,M,S,...,,NNNNNNNN,,,,,,,00100,POINT (-87.59032 41.52087)
4,USGS,413115087352501,"RAIN GAGE AT DEER CREEK NEAR CHICAGO HEIGHTS, IL",AT,413115.0,873525.0,41.520833,-87.590278,N,S,...,,,,,,,,,CAWS00,POINT (-87.59028 41.52083)


In [4]:
START_DATE = pd.Timestamp("20210301", tz="utc")
END_DATE = pd.Timestamp("20210831", tz="utc")

for i, site in site_info.iterrows():
    """"""
    ## Get site coordinates and number
    site_no = site["site_no"]
    coord = NamedCoord(
        site["station_nm"],
        site["dec_lat_va"],
        360 + site["dec_long_va"],
    )
    dest_folder = Path("./timeseries")
    dest_folder.mkdir(parents=True, exist_ok=True)
    parquet_file = dest_folder / f"{coord.name}.parquet"

    if parquet_file.exists():
        continue

    ## Filter hours with rain - skip zeros from query
    files = []
    resampled = inst.xs(site_no).loc[START_DATE:END_DATE].resample("1h").sum()
    filtered = resampled[resampled["00045"] > 0]
    hours_with_storms = [GlobHelper(t.strftime("%Y%m%d"), t.strftime("%H")) for t in filtered.index]

    for hd in hours_with_storms:
        pattern = f"{hd.date}/*{hd.date}-{hd.hour}*.gz"
        ls = Path("../data").glob(pattern)
        ls = sorted(ls)
        files.append(ls)

    files = list(chain(*files))

    ## Execute GRIB2 query
    df = extract_series(files, coord.lat, coord.lon)
    df.to_parquet(parquet_file)
    break

Can't create file '/tmp/tmpaqacmhl0.grib2.5b7b6.idx'
Traceback (most recent call last):
  File "/home/edwin/.virtualenvs/zenv/lib/python3.12/site-packages/cfgrib/messages.py", line 274, in itervalues
    yield self.filestream.message_from_file(file, errors=errors)
          ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/edwin/.virtualenvs/zenv/lib/python3.12/site-packages/cfgrib/messages.py", line 341, in message_from_file
    return Message.from_file(file, offset, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/edwin/.virtualenvs/zenv/lib/python3.12/site-packages/cfgrib/messages.py", line 105, in from_file
    raise EOFError("End of file: %r" % file)
EOFError: End of file: <_io.BufferedReader name='/tmp/tmpaqacmhl0.grib2'>

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/home/edwin/.virtualenvs/zenv/lib/python3.12/site-packages/cfgrib/messages.py", line 539, in from_inde

EOFError: No valid message found: '/tmp/tmpaqacmhl0.grib2'

In [None]:
pd.read_parquet(dest_folder / f"{coord.name}.parquet")

Unnamed: 0_level_0,value
timestamp,Unnamed: 1_level_1
2021-03-11 09:00:00,0.0
2021-03-11 09:02:00,0.0
2021-03-11 09:04:00,0.0
2021-03-11 09:06:00,0.0
2021-03-11 09:08:00,0.0
...,...
2021-03-27 13:24:00,0.0
2021-03-27 13:26:00,0.0
2021-03-27 13:28:00,0.0
2021-03-27 13:30:00,0.0
