# Weather generative model

Data-driven weather sampler per circuit/year using historical lap data. Produces per-lap weather fields for simulations.

In [1]:
from pathlib import Path
import numpy as np
import pandas as pd


In [2]:
class WeatherModel:
    """Lightweight generative weather model using historical stats per circuit/year."""

    WEATHER_COLS = [
        "track_temperature",
        "air_temperature",
        "humidity",
        "pressure",
        "rainfall",
        "wind_speed",
        "wind_direction",
    ]

    def __init__(self, df: pd.DataFrame | None = None, rng: np.random.Generator | None = None):
        self.rng = rng or np.random.default_rng()
        if df is None:
            df = self._load_default_df()
        self.df = df.copy()
        self.stats_circuit, self.stats_global = self._fit_stats(self.df)

    def _load_default_df(self):
        candidates = [
            Path("fastf1_lap_dataset.csv"),
            Path("models/fastf1_lap_dataset.csv"),
        ]
        path = next((p for p in candidates if p.exists()), None)
        if path is None:
            raise FileNotFoundError("fastf1_lap_dataset.csv not found")
        return pd.read_csv(path)

    def _fit_stats(self, df: pd.DataFrame):
        stats_circuit = {}
        stats_global = {}

        for col in self.WEATHER_COLS:
            if col not in df.columns:
                continue
            series = pd.to_numeric(df[col], errors="coerce")
            df[col] = series
            stats_global[col] = {
                "mean": float(series.mean(skipna=True)),
                "std": float(series.std(skipna=True) if series.std(skipna=True) > 0 else 1.0),
            }
            by_circ = df.groupby("circuit_id")[col].agg(["mean", "std"]).reset_index()
            stats_circuit[col] = {
                row["circuit_id"]: {
                    "mean": float(row["mean"]),
                    "std": float(row["std"] if row["std"] > 0 else stats_global[col]["std"]),
                }
                for _, row in by_circ.iterrows()
            }
        return stats_circuit, stats_global

    def _sample_value(self, circuit_id, col, size=1, rng=None):
        rng = rng or self.rng
        stats_c = self.stats_circuit.get(col, {}).get(circuit_id)
        stats_g = self.stats_global.get(col, {"mean": 0.0, "std": 1.0})
        mean = stats_c["mean"] if stats_c is not None else stats_g["mean"]
        std = stats_c["std"] if stats_c is not None else stats_g["std"]
        if col == "rainfall":
            prob = max(0.0, min(1.0, mean))
            return rng.binomial(1, prob, size=size)
        if col == "wind_direction":
            # wrap-around handling
            samples = (rng.normal(mean, std if std > 0 else 30.0, size=size)) % 360.0
            return samples
        samples = rng.normal(mean, std if std > 0 else 1.0, size=size)
        return samples

    def generate_race_weather(self, circuit_id: str, year: int | None, total_laps: int, rng: np.random.Generator | None = None):
        rng = rng or self.rng
        weather = {}
        for col in self.WEATHER_COLS:
            if col not in self.stats_global:
                continue
            samples = self._sample_value(circuit_id, col, size=total_laps, rng=rng)
            weather[col] = samples
        return weather


# Instantiate default model for notebook use
weather_model = WeatherModel()


In [3]:
# Sample multiple races to visualize generated weather
num_weather_samples = 5
sampled_weather = []

circuits = weather_model.df['circuit_id'].dropna().unique().tolist()
years_by_circuit = weather_model.df.groupby('circuit_id')['year'].unique().to_dict()

for i in range(num_weather_samples):
    rng = np.random.default_rng(weather_model.rng.integers(0, 1_000_000_000))
    circuit_id = rng.choice(circuits)
    year = int(rng.choice(years_by_circuit.get(circuit_id, [2025])))
    weather = weather_model.generate_race_weather(circuit_id, year, total_laps=50, rng=rng)
    summary = {"sample": i, "circuit_id": circuit_id, "year": year}
    for col, values in weather.items():
        arr = np.asarray(values, dtype=float)
        summary[f"{col}_mean"] = float(np.nanmean(arr))
        summary[f"{col}_min"] = float(np.nanmin(arr))
        summary[f"{col}_max"] = float(np.nanmax(arr))
    sampled_weather.append(summary)

weather_samples_df = pd.DataFrame(sampled_weather)
print("Weather samples (summary stats per race):")
print(weather_samples_df)


Weather samples (summary stats per race):
   sample    circuit_id  year  track_temperature_mean  track_temperature_min  \
0       0  le_castellet  2021               46.193351              25.931425   
1       1    marina_bay  2025               34.336830              27.368268   
2       2        jeddah  2023               32.097045              23.498225   
3       3         sochi  2019               32.095699              14.928297   
4       4         imola  2024               26.660531              -3.144634   

   track_temperature_max  air_temperature_mean  air_temperature_min  \
0              61.731654             27.065229            22.802601   
1              40.343481             29.441996            26.955938   
2              37.643829             27.166148            22.819013   
3              51.700962             23.590344            16.646440   
4              46.806708             19.013911             2.602852   

   air_temperature_max  humidity_mean  ...  pressu