# define all locations, HRRR Variables, and time range, then fetch data

## imports

In [3]:
import logging
from pathlib import Path
from datetime import datetime

from pyhrrrzarr.schema import csv_to_locations
from pyhrrrzarr.hrrr.schema import HRRRVariable, VariableName, Level 
from pyhrrrzarr.hrrrzarr.requests import create_requests
from pyhrrrzarr.hrrrzarr.fetch import get_all_request_values
from pyhrrrzarr.hrrrzarr.postprocess import requests_to_df, add_wind_speed_and_direction



logger = logging.getLogger(__name__)
logging.basicConfig(level=logging.INFO)

## create all requests for data

In [4]:
csv_path = Path("../data/WiscoNet pre-2024 stations.csv").resolve()
locations = csv_to_locations(csv_path)
vars = [
    HRRRVariable(name=VariableName.TMP, level=Level._2M_ABOVE_GROUND),
    HRRRVariable(name=VariableName.DPT, level=Level._2M_ABOVE_GROUND),
    HRRRVariable(name=VariableName.APCP_1hr_acc_fcst, level=Level._SURFACE, type_model="fcst"),
    HRRRVariable(name=VariableName.RH, level=Level._2M_ABOVE_GROUND),
    HRRRVariable(name=VariableName.UGRD, level=Level._10M_ABOVE_GROUND),
    HRRRVariable(name=VariableName.VGRD, level=Level._10M_ABOVE_GROUND),
]
start=datetime(2024, 1, 1, 0, 0)
# end=datetime(2024, 9, 2, 0, 0)
end=datetime.now()
requests = create_requests(
    locations=locations,
    variables=vars,
    start=start,
    end=end
)

INFO:root:Created 624960 requests


In [5]:
locations

[Location(lat=43.297, lon=-89.38, name='ALTN'),
 Location(lat=42.899, lon=-88.74, name='COSP'),
 Location(lat=43.345, lon=-89.71, name='DFRC'),
 Location(lat=44.718, lon=-90.6, name='GNWD'),
 Location(lat=44.119, lon=-89.53, name='HNCK'),
 Location(lat=45.841, lon=-89.67, name='KEMP'),
 Location(lat=44.402, lon=-87.66, name='KWNE'),
 Location(lat=42.83, lon=-90.79, name='LNCT'),
 Location(lat=44.76, lon=-90.1, name='MRFD'),
 Location(lat=43.025, lon=-89.54, name='OJNR'),
 Location(lat=45.655, lon=-89.27, name='RNLD'),
 Location(lat=44.779, lon=-87.36, name='SODO'),
 Location(lat=45.827, lon=-91.86, name='SPNR'),
 Location(lat=44.185, lon=-90.74, name='WCRS')]

# pull data from s3 and/or local cache

In [3]:
# around 4 hours for 620928 requests from s3, same number of requests in around 20 minutes from local cache
requests = await get_all_request_values(requests, batch_size=5500)


Fetching HRRR data:   0%|          | 0/624960 [00:00<?, ?it/s]

decompressing:   0%|          | 0/624960 [00:00<?, ?it/s]

# post process data to dataframe, calculate wind speed and direction

In [4]:
df = requests_to_df([r for r in requests if r.value is not None])
df = add_wind_speed_and_direction(df)
df.tail()

INFO:pyhrrrzarr.hrrrzarr.postprocess:Converted 624960 requests to DataFrame rows
INFO:pyhrrrzarr.hrrrzarr.postprocess:Added 104160 wind speed and 104160 wind direction rows


Unnamed: 0,location,lat,lon,run_hour,var_name,var_level,value,units
833275,KWNE,44.402,-87.66,2024-11-05 23:00:00,WIND_DIR,10m_above_ground,175.864277,degrees
833276,RNLD,45.655,-89.27,2024-11-05 23:00:00,WIND_DIR,10m_above_ground,60.844721,degrees
833277,SODO,44.779,-87.36,2024-11-05 23:00:00,WIND_DIR,10m_above_ground,126.089714,degrees
833278,SPNR,45.827,-91.86,2024-11-05 23:00:00,WIND_DIR,10m_above_ground,324.536585,degrees
833279,WCRS,44.185,-90.74,2024-11-05 23:00:00,WIND_DIR,10m_above_ground,290.900604,degrees


# persist postprocessed data to disk

In [7]:
csv_path = Path("../data/wisconet_pre_2024_stations_HRRR_data.csv").resolve()
parquet_path = csv_path.with_suffix(".parquet")
df.to_parquet(parquet_path)
df.to_csv(csv_path)

In [2]:
import logging
from datetime import datetime

from pyhrrrzarr.main import fetch_hrrr_data
from pyhrrrzarr.hrrr.schema import HRRRVariable, VariableName, Level
from pyhrrrzarr.schema import Location


# choose HRRR variables to download
variables = [
    HRRRVariable(name=VariableName.TMP, level=Level._2M_ABOVE_GROUND),
    HRRRVariable(name=VariableName.DPT, level=Level._2M_ABOVE_GROUND),
    HRRRVariable(name=VariableName.APCP_1hr_acc_fcst, level=Level._SURFACE, type_model="fcst"),
    HRRRVariable(name=VariableName.RH, level=Level._2M_ABOVE_GROUND),
    HRRRVariable(name=VariableName.UGRD, level=Level._10M_ABOVE_GROUND),
    HRRRVariable(name=VariableName.VGRD, level=Level._10M_ABOVE_GROUND),
]
# choose locations to download those variables for
locations = [
    Location(name="Madison", lat=43.0731, lon=-89.4012),
    Location(name="Seattle", lat=47.6062, lon=-122.3321),
]
# choose timeframe to download variables
start=datetime(2024, 10, 1, 0, 0)
end=datetime(2024, 10, 2, 0, 0)

# pull data
df = fetch_hrrr_data(variables=variables, locations=locations, start=start, end=end)
df.head()


Fetching HRRR data:   0%|          | 0/288 [00:00<?, ?it/s]

decompressing:   0%|          | 0/288 [00:00<?, ?it/s]

Unnamed: 0,location,lat,lon,run_hour,var_name,var_level,value,units
0,Seattle,47.6062,-122.3321,2024-10-01 00:00:00,TMP,2m_above_ground,290.045135,K
1,Madison,43.0731,-89.4012,2024-10-01 00:00:00,TMP,2m_above_ground,296.420135,K
2,Madison,43.0731,-89.4012,2024-10-01 01:00:00,APCP_1hr_acc_fcst,surface,0.0,kg/m^2
3,Seattle,47.6062,-122.3321,2024-10-01 01:00:00,APCP_1hr_acc_fcst,surface,0.0,kg/m^2
4,Madison,43.0731,-89.4012,2024-10-01 01:00:00,RH,2m_above_ground,67.199997,%
