Here I wrote for this program to grab my hometown station, which I was a little bit suprised actually had a GHCH with data going back that far.
Also finding the GHCH ID for that station was way more difficult than I expected.

In [8]:
import pandas as pd, numpy as np
from pathlib import Path
import fsspec

S3_STATIONS_TXT   = "s3://noaa-ghcn-pds/ghcnd-stations.txt"
S3_INVENTORY_TXT  = "s3://noaa-ghcn-pds/ghcnd-inventory.txt"
S3_BY_STATION     = "s3://noaa-ghcn-pds/csv/by_station/{id}.csv"
STOR = {"anon": True}

OUTDIR = Path('../data'); OUTDIR.mkdir(parents=True, exist_ok=True)
OUT_PARQUET = OUTDIR / 'ghcn_K3T5_daily.parquet'
OUT_CSV = OUTDIR / 'ghcn_K3T5_daily.csv'


In [9]:
def load_station_daily(station_id: str) -> pd.DataFrame:
    url = S3_BY_STATION.format(id=station_id)
    df = pd.read_csv(
        url,
        storage_options=STOR,
        header=None,
        names=["ID","DATE","ELEMENT","DATA_VALUE","MFLAG","QFLAG","SFLAG","OBS_TIME"],
        dtype={"ID": str, "ELEMENT": str},
        low_memory=False
    )
    # Ensure numeric
    df["DATA_VALUE"] = pd.to_numeric(df["DATA_VALUE"], errors="coerce")
    df["DATA_VALUE"] = df["DATA_VALUE"].replace(-9999, np.nan)

    wide = df.pivot_table(index=["ID","DATE"], columns="ELEMENT", values="DATA_VALUE", aggfunc="first")

    # Convert to °C safely
    for c in ("TMAX","TMIN","TAVG"):
        if c in wide:
            wide[c] = pd.to_numeric(wide[c], errors="coerce") / 10.0
    if "PRCP" in wide:
        wide["PRCP"] = pd.to_numeric(wide["PRCP"], errors="coerce") / 10.0

    wide = wide.reset_index()
    wide["DATE"] = pd.to_datetime(wide["DATE"], format="%Y%m%d")
    wide["year"] = wide["DATE"].dt.year
    wide["month"] = wide["DATE"].dt.month
    wide["day"] = wide["DATE"].dt.day
    return wide

In [10]:
def station_climatology(station_id: str) -> pd.DataFrame:
    df = load_station_daily(station_id)

    # All-time records
    records = df.groupby(["month","day"]).agg(
        record_min_temp=("TMIN","min"),
        record_max_temp=("TMAX","max")
    )

    # Normals (1991–2020)
    normals = df[(df["year"]>=1991) & (df["year"]<=2020)]
    normals = normals.groupby(["month","day"]).agg(
        average_min_temp=("TMIN","mean"),
        average_max_temp=("TMAX","mean")
    )

    out = records.join(normals, how="inner")
    out = out[["record_min_temp","average_min_temp","average_max_temp","record_max_temp"]]
    out.index = [f"{m:02d}-{d:02d}" for m,d in out.index]
    return out



In [11]:
station_id = "USC00414903"
daily = load_station_daily(station_id)
daily.to_parquet(OUT_PARQUET, index=False)
daily.to_csv(OUT_CSV, index=False)
print("Wrote daily data:", OUT_PARQUET.resolve())

climo = station_climatology(station_id)
print(climo.head(10))

# Optionally save climatology
climo_out = OUTDIR / f"{station_id}_climatology.csv"
climo.to_csv(climo_out)
print("Wrote climatology:", climo_out.resolve())


Wrote daily data: C:\Users\tomy3\data\ghcn_K3T5_daily.parquet
       record_min_temp  average_min_temp  average_max_temp  record_max_temp
01-01             -5.0          6.386957         18.769565             26.7
01-02            -10.0          5.365217         16.504348             28.9
01-03             -7.2          4.768182         16.340909             26.7
01-04             -5.6          4.869565         17.421739             29.4
01-05             -7.2          5.043478         16.678261             25.0
01-06             -5.6          4.634783         16.695652             26.7
01-07             -8.3          4.800000         15.065217             27.2
01-08             -8.3          4.400000         16.639130             25.6
01-09             -9.4          4.800000         18.991304             26.7
01-10             -9.4          5.652174         18.469565             27.8
Wrote climatology: C:\Users\tomy3\data\USC00414903_climatology.csv


In [12]:
import plotly.graph_objects as go
import datetime

# Prepare climatology
climo_plot = climo.copy()
climo_plot["dayofyear"] = range(1, len(climo_plot)+1)

# Today's observed/forecast values (from MSN Weather feed)
today = datetime.date.today()
doy = today.timetuple().tm_yday
today_low_f  = 74
today_high_f = 95
current_f    = 82

# Convert to °C
today_low  = (today_low_f - 32) * 5/9
today_high = (today_high_f - 32) * 5/9
current    = (current_f - 32) * 5/9

# Build interactive figure
fig = go.Figure()

# Record envelope
fig.add_traces([
    go.Scatter(
        x=climo_plot["dayofyear"], y=climo_plot["record_max_temp"],
        line=dict(color="lightgray"), name="Record Max", mode="lines"
    ),
    go.Scatter(
        x=climo_plot["dayofyear"], y=climo_plot["record_min_temp"],
        fill='tonexty', line=dict(color="lightgray"), name="Record Min",
        mode="lines"
    )
])

# Normals
fig.add_trace(go.Scatter(
    x=climo_plot["dayofyear"], y=climo_plot["average_max_temp"],
    line=dict(color="red"), name="Avg Max (1991–2020)"
))
fig.add_trace(go.Scatter(
    x=climo_plot["dayofyear"], y=climo_plot["average_min_temp"],
    line=dict(color="blue"), name="Avg Min (1991–2020)"
))

# Today's values
fig.add_trace(go.Scatter(
    x=[doy], y=[today_high], mode="markers", marker=dict(color="red", size=12, symbol="triangle-up"),
    name="Today's High"
))
fig.add_trace(go.Scatter(
    x=[doy], y=[today_low], mode="markers", marker=dict(color="blue", size=12, symbol="triangle-down"),
    name="Today's Low"
))
fig.add_trace(go.Scatter(
    x=[doy], y=[current], mode="markers", marker=dict(color="black", size=12, symbol="circle"),
    name="Current Temp"
))

# Layout
fig.update_layout(
    title=f"Daily Temperature Climatology – La Grange, TX (USC00414903)<br>{today}",
    xaxis_title="Day of Year",
    yaxis_title="Temperature (°C)",
    hovermode="x unified",
    template="plotly_white"
)

fig.show()