Using the gribstream api, we download all the reforecast data for the past 5 years. We have to make a request per day, as we want the next day forecast at that specific point in time, with no future knowledge.

In [4]:
import datetime as dt
from io import StringIO
import datetime as dt


import pandas as pd
import requests

  # <-- no brackets




def fetch_nextday_tmax_lax(start_date, end_date, asof_hour_utc=12):
    API_TOKEN = "c60a377573a67faffeb88889da834a48508c8110"
    LAT_LAX = 33.942
    LON_LAX = -118.408
    BASE_URL = "https://gribstream.com/api/v2/nbm/history"
    headers = {
        "Content-Type": "application/json",
        "Accept": "text/csv",
        "Authorization": f"Bearer {API_TOKEN}",
    }

    rows = []
    curr = start_date
    total_days = (end_date - start_date).days + 1
    day_index = 1

    while curr <= end_date:
        # overwrite the same line every iteration
        print(f"\rFetching day {day_index}/{total_days} ({curr}) ...", end="", flush=True)

        from_dt = dt.datetime(curr.year, curr.month, curr.day, 0, 0, tzinfo=dt.timezone.utc)
        until_dt = from_dt + dt.timedelta(days=1)

        prev_day = from_dt - dt.timedelta(days=1)
        asof_dt = prev_day.replace(hour=asof_hour_utc, minute=0, second=0, microsecond=0)

        payload = {
            "fromTime": from_dt.isoformat().replace("+00:00", "Z"),
            "untilTime": until_dt.isoformat().replace("+00:00", "Z"),
            "asOf": asof_dt.isoformat().replace("+00:00", "Z"),
            "coordinates": [{"lat": LAT_LAX, "lon": LON_LAX, "name": "KLAX"}],
            "variables": [{"name": "TMP", "level": "2 m above ground", "info": "", "alias": "tempK"}],
        }

        try:
            resp = requests.post(BASE_URL, json=payload, headers=headers, timeout=15)
            resp.raise_for_status()
            df = pd.read_csv(StringIO(resp.text))
        except Exception:
            curr += dt.timedelta(days=1)
            day_index += 1
            continue

        if not df.empty:
            df["forecasted_time"] = pd.to_datetime(df["forecasted_time"])
            df["tempC"] = df["tempK"] - 273.15
            df["tempF"] = df["tempC"] * 9/5 + 32

            rows.append({
                "date_utc": curr.isoformat(),
                "asof_utc": asof_dt.isoformat().replace("+00:00", "Z"),
                "tmax_K": df["tempK"].max(),
                "tmax_C": df["tempC"].max(),
                "tmax_F": df["tempF"].max(),
            })

        curr += dt.timedelta(days=1)
        day_index += 1

    # remember to shift the dates or use the correct date
    return pd.DataFrame(rows)


start = dt.date(2021, 1, 1)
end = dt.date(2025, 8, 1)


monthly_tmax = fetch_nextday_tmax_lax(start, end, asof_hour_utc=20)
print(monthly_tmax.head())



     date_utc              asof_utc  tmax_K  tmax_C  tmax_F
0  2021-01-01  2020-12-31T20:00:00Z  291.28   18.13  64.634
1  2021-01-02  2021-01-01T20:00:00Z  290.31   17.16  62.888
2  2021-01-03  2021-01-02T20:00:00Z  288.16   15.01  59.018
3  2021-01-04  2021-01-03T20:00:00Z  288.55   15.40  59.720
4  2021-01-05  2021-01-04T20:00:00Z  290.26   17.11  62.798


In [5]:
monthly_tmax.to_csv("/Users/giulioelmi/Desktop/kelshi_trading/reforecast_data.csv")