In [5]:
from pathlib import Path
import pandas as pd
import numpy as np


# DNF model
Compute simple per-circuit and circuit-year DNF hazards and simulate retirements during a race.
Uses the `dnf` flag in the lap dataset (one per driver/session) to derive per-lap hazard rates.


In [6]:

import numpy as np
import pandas as pd
from pathlib import Path


class DNFModel:
    """Encapsulated DNF hazard model using per-circuit and per-year rates.

    If df is None, the model will auto-load a lap dataset from common paths.
    """

    def __init__(
        self,
        df=None,
        include_year=True,
        min_laps_prior=500.0,
    ):
        self.include_year = include_year
        self.min_laps_prior = float(min_laps_prior)

        if df is None:
            df = self._load_default_df()
        self.df = df.copy()

        self.driver_runs = self._build_driver_runs(self.df)
        (
            self.dnf_stats,
            self.dnf_hazard,
            self.dnf_stats_by_year,
            self.dnf_hazard_by_year,
            self.global_hazard,
        ) = self._build_hazards()

    def _load_default_df(self) -> pd.DataFrame:
        candidates = [
            Path('driver_lap_dataset.csv'),
            Path('models/driver_lap_dataset.csv'),
            Path('fastf1_lap_dataset.csv'),
            Path('models/fastf1_lap_dataset.csv'),
        ]
        csv_path = next((p for p in candidates if p.exists()), None)
        if csv_path is None:
            raise FileNotFoundError('No lap dataset found (driver_lap_dataset.csv / fastf1_lap_dataset.csv).')
        df = pd.read_csv(csv_path)
        if 'dnf' not in df.columns:
            df['dnf'] = False
        return df

    def _build_driver_runs(self, df: pd.DataFrame) -> pd.DataFrame:
        grp_cols = ["session_key", "driver_id", "circuit_id", "total_race_laps", "year", "race_name"]
        runs = (
            df.groupby(grp_cols)
            .agg(last_lap=("lap_number", "max"), dnf=("dnf", "max"))
            .reset_index()
        )
        runs["dnf"] = runs["dnf"].fillna(False).astype(bool)
        runs["exposure_laps"] = runs["last_lap"].astype(float)
        return runs

    def _aggregate(self, runs: pd.DataFrame, group_cols):
        events = runs.groupby(group_cols)["dnf"].sum()
        exposure = runs.groupby(group_cols)["exposure_laps"].sum()
        stats = pd.concat([events, exposure], axis=1).rename(columns={"dnf": "dnfs", "exposure_laps": "exposure_laps"})

        prior_laps = self.min_laps_prior
        prior_events = self.global_hazard * prior_laps

        stats["hazard"] = (stats["dnfs"] + prior_events) / (stats["exposure_laps"] + prior_laps)
        stats["hazard"] = stats["hazard"].clip(lower=1e-4, upper=0.5)

        hazard_map = stats["hazard"].to_dict()
        return stats, hazard_map

    def _build_hazards(self):
        total_events = float(self.driver_runs["dnf"].sum())
        total_laps = float(self.driver_runs["exposure_laps"].sum())
        global_hazard = (total_events / total_laps) if total_laps > 0 else 0.0
        self.global_hazard = global_hazard

        stats_circuit, hazard_circuit = self._aggregate(self.driver_runs, ["circuit_id"])

        stats_year = hazard_year = None
        if self.include_year and "year" in self.driver_runs.columns:
            stats_year, hazard_year = self._aggregate(self.driver_runs, ["year"])

        return stats_circuit, hazard_circuit, stats_year, hazard_year, global_hazard

    def hazard(self, circuit_id: str, year: int | None = None) -> float:
        h_circ = self.dnf_hazard.get(circuit_id)
        if h_circ is None:
            h_circ = self.global_hazard
        if year is not None and self.dnf_hazard_by_year is not None:
            h_year = self.dnf_hazard_by_year.get(year, self.global_hazard)
            return float(0.5 * (float(h_circ) + float(h_year)))
        return float(h_circ)

    def apply_dnfs_for_lap(
        self,
        circuit_id: str,
        drivers_by_pos: list[dict],
        lap_number: int,
        year: int | None = None,
        rng: np.random.Generator | None = None,
    ):
        """
        Simulate DNFs for a lap. Expects drivers_by_pos entries to have a 'dnf' flag (bool).
        Returns updated drivers_by_pos and a list of bools for DNFs this lap.
        """
        if rng is None:
            rng = np.random.default_rng()

        h = self.hazard(circuit_id, year)
        dnfs_this_lap = []
        for driver in drivers_by_pos:
            if driver.get("dnf", False):
                dnfs_this_lap.append(False)
                continue
            dnf_now = bool(rng.random() < h)
            dnfs_this_lap.append(dnf_now)
            if dnf_now:
                driver["dnf"] = True
        return drivers_by_pos, dnfs_this_lap


In [7]:
dnf_model = DNFModel(include_year=True)
dnf_stats = dnf_model.dnf_stats
dnf_stats_by_year = dnf_model.dnf_stats_by_year
dnf_hazard = dnf_model.dnf_hazard
dnf_hazard_by_year = dnf_model.dnf_hazard_by_year
dnf_stats.head()


Unnamed: 0_level_0,dnfs,exposure_laps,hazard
circuit_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
albert_park,12,11737.0,0.001012
americas,15,13308.0,0.001114
bahrain,7,16600.0,0.000432
baku,9,8369.0,0.001058
buddh,3,2569.0,0.001102


In [8]:
from IPython.display import display

print(f"Global hazard per lap: {dnf_model.global_hazard:.5f}")

print("\nTop circuits by DNF hazard:")
display(dnf_stats.sort_values("hazard", ascending=False).head(10))

if dnf_stats_by_year is not None:
    print("\nTop years by DNF hazard:")
    display(dnf_stats_by_year.sort_values("hazard", ascending=False).head(10))


Global hazard per lap: 0.00076

Top circuits by DNF hazard:


Unnamed: 0_level_0,dnfs,exposure_laps,hazard
circuit_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
mugello,4,778.0,0.003428
valencia,3,1210.0,0.001977
yeongam,5,2276.0,0.001939
jeddah,7,4396.0,0.001508
monaco,23,18194.0,0.001251
istanbul,3,2223.0,0.001242
americas,15,13308.0,0.001114
buddh,3,2569.0,0.001102
marina_bay,15,13507.0,0.001098
silverstone,15,13969.0,0.001063



Top years by DNF hazard:


Unnamed: 0_level_0,dnfs,exposure_laps,hazard
year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2012,32,25343.0,0.001253
2019,26,23625.0,0.001094
2022,24,23529.0,0.001015
2013,22,22779.0,0.000961
2020,17,18400.0,0.00092
2018,20,22339.0,0.000892
2017,18,20307.0,0.000883
2021,20,23688.0,0.000843
2014,17,21053.0,0.000806
2016,19,24513.0,0.000775
