# define all locations, HRRR Variables, and time range, then fetch data

## imports

In [1]:
import logging
from pathlib import Path
from datetime import datetime

from pyhrrrzarr.schema import csv_to_locations
from pyhrrrzarr.hrrr.schema import HRRRVariable, VariableName, Level 
from pyhrrrzarr.hrrrzarr.requests import create_requests
from pyhrrrzarr.hrrrzarr.fetch import get_all_request_values
from pyhrrrzarr.hrrrzarr.postprocess import requests_to_df, add_wind_speed_and_direction



logger = logging.getLogger(__name__)
logging.basicConfig(level=logging.INFO)

## create all requests for data

In [2]:
csv_path = Path("../data/WiscoNet pre-2024 stations.csv").resolve()
locations = csv_to_locations(csv_path)
vars = [
    HRRRVariable(name=VariableName.TMP, level=Level._2M_ABOVE_GROUND),
    HRRRVariable(name=VariableName.DPT, level=Level._2M_ABOVE_GROUND),
    HRRRVariable(name=VariableName.APCP_1hr_acc_fcst, level=Level._SURFACE, type_model="fcst"),
    HRRRVariable(name=VariableName.RH, level=Level._2M_ABOVE_GROUND),
    HRRRVariable(name=VariableName.UGRD, level=Level._10M_ABOVE_GROUND),
    HRRRVariable(name=VariableName.VGRD, level=Level._10M_ABOVE_GROUND),
]
start=datetime(2024, 1, 1, 0, 0)
# end=datetime(2024, 9, 2, 0, 0)
end=datetime.now()
requests = create_requests(
    locations=locations,
    variables=vars,
    start=start,
    end=end
)

INFO:root:Created 624960 requests


# pull data from s3 and/or local cache

In [None]:
# around 4 hours for 620928 requests from s3, same number of requests in around 20 minutes from local cache
requests = await get_all_request_values(requests, batch_size=5500)


Fetching HRRR data:   0%|          | 0/624960 [00:00<?, ?it/s]

# post process data to dataframe, calculate wind speed and direction

In [None]:
df = requests_to_df([r for r in requests if r.value is not None])
df = add_wind_speed_and_direction(df)
df.tail()

INFO:pyhrrrzarr.hrrrzarr.postprocess:Converted 2016 requests to DataFrame rows
INFO:pyhrrrzarr.hrrrzarr.postprocess:Added 336 wind speed and 336 wind direction rows


Unnamed: 0,location,lat,lon,run_hour,var_name,var_level,value,units
2683,OJNR,43.025,-89.54,2024-09-01 23:00:00,WIND_DIR,10m_above_ground,358.158735,degrees
2684,RNLD,45.655,-89.27,2024-09-01 23:00:00,WIND_DIR,10m_above_ground,322.530417,degrees
2685,SODO,44.779,-87.36,2024-09-01 23:00:00,WIND_DIR,10m_above_ground,348.278333,degrees
2686,SPNR,45.827,-91.86,2024-09-01 23:00:00,WIND_DIR,10m_above_ground,311.739762,degrees
2687,WCRS,44.185,-90.74,2024-09-01 23:00:00,WIND_DIR,10m_above_ground,339.326424,degrees


# persist postprocessed data to disk

In [None]:
df.to_parquet("wisconet_HRRR.parquet")
df.to_csv("wisconet_HRRR.csv")