In [None]:
from pathlib import Path
import pandas as pd
import numpy as np


# Safety car model
Estimate safety car (SC) and virtual safety car (VSC) lap hazards by circuit and year, then simulate SC/VSC events per lap.


In [None]:

import numpy as np
import pandas as pd
from pathlib import Path


class SafetyCarModel:
    """SC/VSC hazard model using per-circuit and per-year rates with phase blending."""

    def __init__(self, df=None, include_year=True, min_laps_prior=500.0):
        self.include_year = include_year
        self.min_laps_prior = float(min_laps_prior)

        if df is None:
            df = self._load_default_df()
        self.df = df.copy()

        self.laps = self._prepare_laps(self.df)
        (
            self.sc_stats,
            self.sc_hazard_circuit_phase,
            self.sc_hazard_year_phase,
            self.global_sc_hazard,
        ) = self._build_hazards(target="safety_car_this_lap")
        (
            self.vsc_stats,
            self.vsc_hazard_circuit_phase,
            self.vsc_hazard_year_phase,
            self.global_vsc_hazard,
        ) = self._build_hazards(target="virtual_sc_this_lap")

    def _load_default_df(self):
        candidates = [
            Path("fastf1_lap_dataset.csv"),
            Path("models/fastf1_lap_dataset.csv"),
            Path("driver_lap_dataset.csv"),
            Path("models/driver_lap_dataset.csv"),
        ]
        csv_path = next((p for p in candidates if p.exists()), None)
        if csv_path is None:
            raise FileNotFoundError("No lap dataset found (fastf1_lap_dataset.csv / driver_lap_dataset.csv).")
        df = pd.read_csv(csv_path)
        for col in ["safety_car_this_lap", "virtual_sc_this_lap"]:
            if col not in df.columns:
                df[col] = False
        return df

    def _prepare_laps(self, df: pd.DataFrame) -> pd.DataFrame:
        laps = df.copy()
        laps["safety_car_this_lap"] = laps["safety_car_this_lap"].astype(bool)
        laps["virtual_sc_this_lap"] = laps["virtual_sc_this_lap"].astype(bool)
        laps["lap_number"] = pd.to_numeric(laps["lap_number"], errors="coerce")
        laps = laps.dropna(subset=["lap_number", "total_race_laps", "circuit_id"])
        laps["lap_number"] = laps["lap_number"].astype(int)
        laps["total_race_laps"] = pd.to_numeric(laps["total_race_laps"], errors="coerce").fillna(1)
        laps["race_progress"] = laps["lap_number"] / laps["total_race_laps"].replace(0, np.nan).fillna(1.0)
        laps["phase"] = laps["race_progress"].apply(self._phase_bin)
        return laps

    @staticmethod
    def _phase_bin(progress: float) -> str:
        if progress < 0.33:
            return "early"
        if progress < 0.66:
            return "middle"
        return "late"

    def _aggregate(self, laps: pd.DataFrame, group_cols, target: str, prior_events: float, prior_laps: float):
        events = laps.groupby(group_cols)[target].sum()
        exposure = laps.groupby(group_cols)["lap_number"].count()
        stats = pd.concat([events, exposure], axis=1).rename(columns={target: "events", "lap_number": "exposure_laps"})
        stats["hazard"] = (stats["events"] + prior_events) / (stats["exposure_laps"] + prior_laps)
        stats["hazard"] = stats["hazard"].clip(1e-4, 0.5)
        return stats, stats["hazard"].to_dict()

    def _build_hazards(self, target: str):
        total_events = float(self.laps[target].sum())
        total_laps = float(len(self.laps))
        global_hazard = (total_events / total_laps) if total_laps > 0 else 0.0
        prior_laps = self.min_laps_prior
        prior_events = global_hazard * prior_laps

        stats_circuit_phase, hazard_circuit_phase = self._aggregate(
            self.laps,
            ["circuit_id", "phase"],
            target,
            prior_events,
            prior_laps,
        )

        stats_year_phase = hazard_year_phase = None
        if self.include_year and "year" in self.laps.columns:
            stats_year_phase, hazard_year_phase = self._aggregate(
                self.laps,
                ["year", "phase"],
                target,
                prior_events,
                prior_laps,
            )

        return stats_circuit_phase, hazard_circuit_phase, hazard_year_phase, global_hazard

    def _hazard_blend(self, hazard_circuit_phase, hazard_year_phase, global_hazard, circuit_id, year, phase):
        h_circ = hazard_circuit_phase.get((circuit_id, phase), global_hazard)
        if year is not None and hazard_year_phase is not None:
            h_year = hazard_year_phase.get((year, phase), global_hazard)
            return float(0.5 * (float(h_circ) + float(h_year)))
        return float(h_circ)

    def sc_hazard(self, circuit_id: str, year: int | None, progress: float) -> float:
        phase = self._phase_bin(progress)
        return self._hazard_blend(
            self.sc_hazard_circuit_phase,
            self.sc_hazard_year_phase,
            self.global_sc_hazard,
            circuit_id,
            year,
            phase,
        )

    def vsc_hazard(self, circuit_id: str, year: int | None, progress: float) -> float:
        phase = self._phase_bin(progress)
        return self._hazard_blend(
            self.vsc_hazard_circuit_phase,
            self.vsc_hazard_year_phase,
            self.global_vsc_hazard,
            circuit_id,
            year,
            phase,
        )

    def apply_safety_for_lap(
        self,
        circuit_id: str,
        lap_number: int,
        total_laps: int,
        year: int | None = None,
        rng: np.random.Generator | None = None,
    ):
        """Simulate SC/VSC flags for a lap."""
        if rng is None:
            rng = np.random.default_rng()
        progress = lap_number / max(total_laps, 1)
        sc_p = self.sc_hazard(circuit_id, year, progress)
        vsc_p = self.vsc_hazard(circuit_id, year, progress)
        sc_flag = bool(rng.random() < sc_p)
        vsc_flag = bool(rng.random() < vsc_p) if not sc_flag else False
        return sc_flag, vsc_flag


In [None]:
sc_model = SafetyCarModel(include_year=True)
sc_stats = sc_model.sc_stats
vsc_stats = sc_model.vsc_stats
sc_stats.head()


In [None]:
from IPython.display import display
print(f"Global SC hazard per lap: {sc_model.global_sc_hazard:.5f}")
print(f"Global VSC hazard per lap: {sc_model.global_vsc_hazard:.5f}")
print("\nTop circuits by SC hazard (phase-averaged):")
display(sc_stats.sort_values('hazard', ascending=False).head(10))
print("\nTop circuits by VSC hazard (phase-averaged):")
display(vsc_stats.sort_values('hazard', ascending=False).head(10))
