In [1]:
import polars as pl
from herbie import Herbie
import pandas as pd

In [2]:
H = Herbie("2025-11-01")
H.idx

✅ Found ┊ model=hrrr ┊ [3mproduct=sfc[0m ┊ [38;2;41;130;13m2025-Nov-01 00:00 UTC[92m F00[0m ┊ [38;2;255;153;0m[3mGRIB2 @ aws[0m ┊ [38;2;255;153;0m[3mIDX @ aws[0m


'https://noaa-hrrr-bdp-pds.s3.amazonaws.com/hrrr.20251101/conus/hrrr.t00z.wrfsfcf00.grib2.idx'

In [23]:
%%time
df = (
    pl.read_csv(
        H.idx,
        has_header=False,
        separator=":",
        new_columns=[
            "grib_message",
            "start_byte",
            "reference_time",
            "variable",
            "level",
            "forecast_time",
        ],
    )
    .with_columns(
        pl.col("reference_time").str.pad_end(14, "0").str.to_datetime("d=%Y%m%d%H%M")
    )
    .insert_column(2, (pl.col("start_byte").shift(-1) - 1).alias("end_byte"))
    .insert_column(
        3,
        pl.concat_str(
            pl.col("start_byte"),
            pl.lit("-"),
            pl.col("end_byte").cast(pl.String).fill_null(""),
        ).alias("byte_range"),
    )
    .insert_column(0, pl.lit(H.idx).alias("url"))
)

# Drop column with all nulls
df = df[[s.name for s in df if not (s.null_count() == df.height)]]
df

CPU times: user 39.2 ms, sys: 19.6 ms, total: 58.7 ms
Wall time: 362 ms


url,grib_message,start_byte,end_byte,byte_range,reference_time,variable,level,forecast_time
str,i64,i64,i64,str,datetime[μs],str,str,str
"""https://noaa-hrrr-bdp-pds.s3.a…",1,0,334294,"""0-334294""",2025-11-01 00:00:00,"""REFC""","""entire atmosphere""","""anl"""
"""https://noaa-hrrr-bdp-pds.s3.a…",2,334295,517716,"""334295-517716""",2025-11-01 00:00:00,"""RETOP""","""cloud top""","""anl"""
"""https://noaa-hrrr-bdp-pds.s3.a…",3,517717,912580,"""517717-912580""",2025-11-01 00:00:00,"""var discipline=0 center=7 loca…","""entire atmosphere""","""anl"""
"""https://noaa-hrrr-bdp-pds.s3.a…",4,912581,1203223,"""912581-1203223""",2025-11-01 00:00:00,"""VIL""","""entire atmosphere""","""anl"""
"""https://noaa-hrrr-bdp-pds.s3.a…",5,1203224,2610380,"""1203224-2610380""",2025-11-01 00:00:00,"""VIS""","""surface""","""anl"""
…,…,…,…,…,…,…,…,…
"""https://noaa-hrrr-bdp-pds.s3.a…",166,126934423,126934655,"""126934423-126934655""",2025-11-01 00:00:00,"""ICEC""","""surface""","""anl"""
"""https://noaa-hrrr-bdp-pds.s3.a…",167,126934656,128349936,"""126934656-128349936""",2025-11-01 00:00:00,"""SBT123""","""top of atmosphere""","""anl"""
"""https://noaa-hrrr-bdp-pds.s3.a…",168,128349937,130686198,"""128349937-130686198""",2025-11-01 00:00:00,"""SBT124""","""top of atmosphere""","""anl"""
"""https://noaa-hrrr-bdp-pds.s3.a…",169,130686199,131986523,"""130686199-131986523""",2025-11-01 00:00:00,"""SBT113""","""top of atmosphere""","""anl"""


In [24]:
df1 = df.filter(pl.col("variable").is_in(["UGRD", "VGRD"]))
df1

url,grib_message,start_byte,end_byte,byte_range,reference_time,variable,level,forecast_time
str,i64,i64,i64,str,datetime[μs],str,str,str
"""https://noaa-hrrr-bdp-pds.s3.a…",10,4269730,5018584,"""4269730-5018584""",2025-11-01 00:00:00,"""UGRD""","""250 mb""","""anl"""
"""https://noaa-hrrr-bdp-pds.s3.a…",11,5018585,5754631,"""5018585-5754631""",2025-11-01 00:00:00,"""VGRD""","""250 mb""","""anl"""
"""https://noaa-hrrr-bdp-pds.s3.a…",12,5754632,6498788,"""5754632-6498788""",2025-11-01 00:00:00,"""UGRD""","""300 mb""","""anl"""
"""https://noaa-hrrr-bdp-pds.s3.a…",13,6498789,7229386,"""6498789-7229386""",2025-11-01 00:00:00,"""VGRD""","""300 mb""","""anl"""
"""https://noaa-hrrr-bdp-pds.s3.a…",17,9405025,10001845,"""9405025-10001845""",2025-11-01 00:00:00,"""UGRD""","""500 mb""","""anl"""
…,…,…,…,…,…,…,…,…
"""https://noaa-hrrr-bdp-pds.s3.a…",37,23509355,24115064,"""23509355-24115064""",2025-11-01 00:00:00,"""VGRD""","""1000 mb""","""anl"""
"""https://noaa-hrrr-bdp-pds.s3.a…",60,26622182,27732612,"""26622182-27732612""",2025-11-01 00:00:00,"""UGRD""","""80 m above ground""","""anl"""
"""https://noaa-hrrr-bdp-pds.s3.a…",61,27732613,28825678,"""27732613-28825678""",2025-11-01 00:00:00,"""VGRD""","""80 m above ground""","""anl"""
"""https://noaa-hrrr-bdp-pds.s3.a…",77,42384646,44766260,"""42384646-44766260""",2025-11-01 00:00:00,"""UGRD""","""10 m above ground""","""anl"""


In [25]:
df1.with_columns(
    download_group=(pl.col("grib_message").diff().fill_null(1) != 1).cum_sum(),
).group_by("url", "download_group", maintain_order=True).agg(
    pl.col("start_byte").min(),
    pl.col("end_byte").max(),
    pl.col("grib_message"),
    pl.col("variable"),
    pl.col("level"),
).insert_column(
    3,
    pl.concat_str(
        pl.col("start_byte"),
        pl.lit("-"),
        pl.col("end_byte").cast(pl.String).fill_null(""),
    ).alias("byte_range"),
)

url,download_group,start_byte,byte_range,end_byte,grib_message,variable,level
str,u32,i64,str,i64,list[i64],list[str],list[str]
"""https://noaa-hrrr-bdp-pds.s3.a…",0,4269730,"""4269730-7229386""",7229386,"[10, 11, … 13]","[""UGRD"", ""VGRD"", … ""VGRD""]","[""250 mb"", ""250 mb"", … ""300 mb""]"
"""https://noaa-hrrr-bdp-pds.s3.a…",1,9405025,"""9405025-10588422""",10588422,"[17, 18]","[""UGRD"", ""VGRD""]","[""500 mb"", ""500 mb""]"
"""https://noaa-hrrr-bdp-pds.s3.a…",2,13278275,"""13278275-14473594""",14473594,"[23, 24]","[""UGRD"", ""VGRD""]","[""700 mb"", ""700 mb""]"
"""https://noaa-hrrr-bdp-pds.s3.a…",3,16961021,"""16961021-18199651""",18199651,"[28, 29]","[""UGRD"", ""VGRD""]","[""850 mb"", ""850 mb""]"
"""https://noaa-hrrr-bdp-pds.s3.a…",4,19920258,"""19920258-21161109""",21161109,"[32, 33]","[""UGRD"", ""VGRD""]","[""925 mb"", ""925 mb""]"
"""https://noaa-hrrr-bdp-pds.s3.a…",5,22879925,"""22879925-24115064""",24115064,"[36, 37]","[""UGRD"", ""VGRD""]","[""1000 mb"", ""1000 mb""]"
"""https://noaa-hrrr-bdp-pds.s3.a…",6,26622182,"""26622182-28825678""",28825678,"[60, 61]","[""UGRD"", ""VGRD""]","[""80 m above ground"", ""80 m above ground""]"
"""https://noaa-hrrr-bdp-pds.s3.a…",7,42384646,"""42384646-47147875""",47147875,"[77, 78]","[""UGRD"", ""VGRD""]","[""10 m above ground"", ""10 m above ground""]"


In [26]:
# Next steps
# 1. Download each item (use multithreading) into temp files
# 2. Join temp files into a single grib2 file