# Monte Carlo race simulation

Loads the trained XGBoost lap-time model plus overtaking, DNF, and safety-car models, then runs repeated race simulations with aggregated results.

In [32]:
from pathlib import Path
import numpy as np
import pandas as pd
import joblib


In [33]:
# Load bundled model and artifacts
bundle_path = Path("models/xgboost_laptime_bundle.joblib")
bundle = joblib.load(bundle_path)
model = bundle["model"]
driver_skill_map = bundle["driver_skill_map"]
circuit_cat = pd.CategoricalDtype(categories=bundle["circuit_categories"])
compound_cat = pd.CategoricalDtype(categories=bundle["compound_categories"])

# Override threshold from bundle when available
PACK_THRESHOLD = float(bundle.get("pack_threshold", 2.0))

# Load dataset for grid building and base lap times
csv_candidates = [Path("fastf1_lap_dataset.csv"), Path("models/fastf1_lap_dataset.csv")]
csv_path = next((p for p in csv_candidates if p.exists()), None)
if csv_path is None:
    raise FileNotFoundError("fastf1_lap_dataset.csv not found")
df = pd.read_csv(csv_path)

# Filter to clean laps for baseline pace
race_df = df[
    (~df["safety_car_this_lap"]) & (~df["virtual_sc_this_lap"]) & df["lap_time_s"].notna()
].copy()
circuit_median_map = race_df.groupby("circuit_id")["lap_time_s"].median().to_dict()

# Build a simple grid from the last session (order by first lap position)
last_session_key = df["session_key"].iloc[-1000]
grid_source = df[df["session_key"] == last_session_key]
first_lap_rows = grid_source[grid_source["lap_number"] == grid_source["lap_number"].min()]
grid_drivers = (
    first_lap_rows.sort_values("current_position")["driver_id"].drop_duplicates().tolist()
)
print(f"Grid built from session {last_session_key}: {grid_drivers}")




Grid built from session 2025_united_states_grand_prix_race: ['VER', 'LEC', 'NOR', 'HAM', 'PIA', 'RUS', 'ANT', 'BEA', 'SAI', 'TSU', 'HUL', 'ALO', 'LAW', 'GAS', 'OCO', 'BOR', 'COL', 'STR', 'HAD', 'ALB']


In [34]:
from pathlib import Path
# Initialize shared RNG and helper models
master_rng = np.random.default_rng(12345)

def _phase(progress):
    if progress < 0.33:
        return "early"
    if progress < 0.66:
        return "middle"
    return "late"

overtake_path = Path("models/overtaking_model.joblib")
if not overtake_path.exists():
    raise FileNotFoundError("Missing models/overtaking_model.joblib. Run models/overtaking_model.ipynb to train/export.")
overtake_bundle = joblib.load(overtake_path)
overtake_pipeline = overtake_bundle.get("pipeline")
overtake_include_year = bool(overtake_bundle.get("include_year", True))
overtake_gap_threshold = float(overtake_bundle.get("gap_threshold", 1.0))
overtake_base_rate = float(overtake_bundle.get("base_rate", 0.05))

def overtake_success_probability(attacker_state, defender_state, circuit_id, gap_start, year=None):
    if overtake_pipeline is None:
        return float(np.clip(overtake_base_rate, 0.01, 0.95))

    def _safe_num(value, default=0.0):
        if value is None or pd.isna(value):
            return default
        return float(value)

    skill_att = float(driver_skill_map.get(attacker_state.get("driver_id"), 0.0))
    skill_def = float(driver_skill_map.get(defender_state.get("driver_id"), 0.0))
    skill_diff = skill_att - skill_def

    att_laps = _safe_num(attacker_state.get("laps_on_current_tyre", 0.0), 0.0)
    def_laps = _safe_num(defender_state.get("laps_on_current_tyre", att_laps), att_laps)
    tyre_adv_laps = def_laps - att_laps

    gap_value = _safe_num(gap_start, overtake_gap_threshold)

    feature_row = {
        "circuit_id": str(circuit_id) if circuit_id is not None else "unknown",
        "gap_start": float(max(gap_value, 0.0)),
        "tyre_age_diff": float(tyre_adv_laps),
        "skill_diff": float(skill_diff),
    }
    if overtake_include_year:
        if year is None or pd.isna(year):
            year_value = "unknown"
        else:
            try:
                year_value = str(int(year))
            except (TypeError, ValueError):
                year_value = str(year)
        feature_row["year"] = year_value

    X = pd.DataFrame([feature_row])
    prob = float(overtake_pipeline.predict_proba(X)[0, 1])
    return float(np.clip(prob, 0.01, 0.95))

def apply_overtakes_for_lap(
    circuit_id,
    drivers_by_pos,
    lap_times,
    pred_deltas,
    base_lap,
    year=None,
    close_gap_threshold=None,
    fail_gap=0.3,
    rng=None,
):
    """Apply overtaking effects to one lap prediction for ordered drivers."""
    lap_times = np.asarray(lap_times, dtype=float).copy()
    pred_deltas = np.asarray(pred_deltas, dtype=float).copy()
    n = len(drivers_by_pos)

    overtake_attempts = np.zeros(n, dtype=bool)
    if close_gap_threshold is None:
        close_gap_threshold = overtake_gap_threshold

    rng = rng or master_rng

    for idx in range(1, n):
        follower = drivers_by_pos[idx]
        leader = drivers_by_pos[idx - 1]

        gap_start = float(follower["gap_to_ahead"])
        leader_time = lap_times[idx - 1]
        follower_time = lap_times[idx]
        gap_end_raw = gap_start + (follower_time - leader_time)

        going_to_pass_raw = gap_end_raw < 0.0
        close_enough = gap_start <= close_gap_threshold

        if not going_to_pass_raw and not close_enough:
            continue

        overtake_attempts[idx] = True

        margin = max(0.0, -gap_end_raw)
        p_success = overtake_success_probability(
            attacker_state=follower,
            defender_state=leader,
            circuit_id=circuit_id,
            gap_start=gap_start,
            year=year,
        )
        p_success = float(min(0.99, p_success + 0.15 * min(margin / 0.5, 1.0)))

        success = (rng.random() < p_success) and going_to_pass_raw
        if success:
            continue

        desired_follower_time = leader_time + fail_gap - gap_start
        if desired_follower_time > follower_time:
            lap_times[idx] = desired_follower_time

    pred_deltas = lap_times - float(base_lap)
    return lap_times, pred_deltas, overtake_attempts

dnf_path = Path("models/dnf_model.joblib")
if not dnf_path.exists():
    raise FileNotFoundError("Missing models/dnf_model.joblib. Run models/dnf_model.ipynb to train/export.")
dnf_bundle = joblib.load(dnf_path)
dnf_pipeline = dnf_bundle["pipeline"]
dnf_include_year = bool(dnf_bundle.get("include_year", True))
dnf_avg_total_laps = float(dnf_bundle.get("avg_total_laps", 50.0))

def dnf_hazard(circuit_id, lap_number, year=None, total_race_laps=None):
    total_laps = float(total_race_laps) if total_race_laps else float(dnf_avg_total_laps or 1.0)
    lap_num = float(lap_number or 0)
    progress = lap_num / total_laps if total_laps > 0 else 0.0

    data = {
        "circuit_id": str(circuit_id) if circuit_id is not None else "unknown",
        "lap_number": lap_num,
        "total_race_laps": total_laps,
        "progress": progress,
    }
    if dnf_include_year:
        data["year"] = str(year) if year is not None else "unknown"
    X = pd.DataFrame([data])
    prob = float(dnf_pipeline.predict_proba(X)[0, 1])
    return float(np.clip(prob, 1e-6, 0.5))

def apply_dnfs_for_lap(circuit_id, drivers_by_pos, lap_number, year=None, total_race_laps=None, rng=None):
    rng = rng or master_rng
    h = dnf_hazard(circuit_id, lap_number, year=year, total_race_laps=total_race_laps)
    dnfs_this_lap = []
    for driver in drivers_by_pos:
        if driver.get("dnf", False):
            dnfs_this_lap.append(False)
            continue
        dnf_now = bool(rng.random() < h)
        dnfs_this_lap.append(dnf_now)
        if dnf_now:
            driver["dnf"] = True
    return drivers_by_pos, dnfs_this_lap

safety_path = Path("models/safety_car_model.joblib")
if not safety_path.exists():
    raise FileNotFoundError("Missing models/safety_car_model.joblib. Run models/safety_car_model.ipynb to train/export.")
safety_bundle = joblib.load(safety_path)
safety_pipeline = safety_bundle["pipeline"]
safety_include_year = bool(safety_bundle.get("include_year", True))
safety_max_len_bucket = int(safety_bundle.get("max_len_bucket", 12))

def sc_transition_probs(state, stint_len, circuit_id, year, progress, lap_number):
    stint_bucket = 0 if state == "green" else min(int(stint_len), safety_max_len_bucket)
    phase = _phase(progress)
    row = {
        "state": state,
        "stint_bucket": float(stint_bucket),
        "race_progress": float(progress),
        "lap_number": float(lap_number),
        "circuit_id": str(circuit_id) if circuit_id is not None else "unknown",
        "phase": phase,
    }
    if safety_include_year:
        if year is None or pd.isna(year):
            year_value = "unknown"
        else:
            try:
                year_value = str(int(year))
            except (TypeError, ValueError):
                year_value = str(year)
        row["year"] = year_value
    else:
        row["year"] = "unknown"
    X = pd.DataFrame([row])
    probs = safety_pipeline.predict_proba(X)[0]
    class_map = dict(zip(safety_pipeline.classes_, probs))
    return {
        "green": float(class_map.get("green", 0.0)),
        "vsc": float(class_map.get("vsc", 0.0)),
        "sc": float(class_map.get("sc", 0.0)),
    }

def sc_next_state(state, stint_len, circuit_id, year, progress, lap_number, rng=None):
    rng = rng or master_rng
    probs = sc_transition_probs(state, stint_len, circuit_id, year, progress, lap_number)
    r = rng.random()
    if r < probs["green"]:
        next_state = "green"
    elif r < probs["green"] + probs["vsc"]:
        next_state = "vsc"
    else:
        next_state = "sc"

    if next_state == state and state in ("vsc", "sc"):
        next_len = int(stint_len) + 1
    elif next_state in ("vsc", "sc"):
        next_len = 1
    else:
        next_len = 0
    return next_state, next_len





In [35]:
def simulate_race(
    circuit_id,
    grid_drivers,
    total_laps=50,
    year=2025,
    global_strategy=None,
    driver_strategies=None,
    safety_car_laps=None,
    rain_laps=None,
    pit_loss=20.0,
    rng=None,
):
    """Simulate a race and return (race_log_df, safety_car_lap_set)."""

    rng = rng or np.random.default_rng()

    base_seed = int(rng.integers(0, 1_000_000_000))
    sc_rng = np.random.default_rng(base_seed + 2)
    overtake_rng = np.random.default_rng(base_seed + 3)
    dnf_rng = np.random.default_rng(base_seed + 4)

    if global_strategy is None:
        raise ValueError("global_strategy must be provided, e.g. [(20, 'MEDIUM'), (40, 'SOFT')]")
    if driver_strategies is None:
        driver_strategies = {}

    if safety_car_laps is None:
        auto_sc_laps = set()
        state, stint_len = 'green', 0
        for lap in range(1, total_laps + 1):
            if state == 'sc':
                auto_sc_laps.add(lap)
            progress = lap / total_laps
            state, stint_len = sc_next_state(state, stint_len, circuit_id, year, progress, lap, rng=sc_rng)
        safety_car_laps = auto_sc_laps
    else:
        safety_car_laps = set(safety_car_laps)

    if rain_laps is None:
        rain_laps = set()
    else:
        rain_laps = set(rain_laps)

    base_lap = circuit_median_map.get(circuit_id)
    if base_lap is None:
        raise ValueError(f"No circuit_median_lap available for circuit_id={circuit_id!r}")

    grid_pos_map = {drv: idx + 1 for idx, drv in enumerate(grid_drivers)}

    drivers_state = []
    for idx, drv in enumerate(grid_drivers):
        strat = driver_strategies.get(drv, global_strategy)
        stops_map = {int(lap): compound for lap, compound in strat}
        if 0 not in stops_map:
            raise ValueError(f"Strategy for {drv} must include lap 0 entry for starting tyre")
        starting_tyre = stops_map.pop(0)
        drivers_state.append(
            {
                "driver_id": drv,
                "grid_position": idx + 1,
                "position": idx + 1,
                "cumul_time": float(idx * 0.3),
                "laps_on_current_tyre": 1,
                "tyre_compound": starting_tyre,
                "gap_to_ahead": 0.0,
                "stops": stops_map,
                "history": [],
                "dnf": False,
            }
        )



    race_log = []

    for lap in range(1, total_laps + 1):
        prev_positions = {s["driver_id"]: s["position"] for s in drivers_state}
        drivers_by_pos = sorted(
            [s for s in drivers_state if not s.get("dnf", False)],
            key=lambda s: s["position"],
        )

        for idx, s in enumerate(drivers_by_pos):
            if idx == 0:
                s["gap_to_ahead"] = 0.0
            else:
                ahead = drivers_by_pos[idx - 1]
                s["gap_to_ahead"] = s["cumul_time"] - ahead["cumul_time"]

        behind_gaps = []
        for idx, s in enumerate(drivers_by_pos):
            if idx < len(drivers_by_pos) - 1:
                behind_gaps.append(float(drivers_by_pos[idx + 1]["gap_to_ahead"]))
            else:
                behind_gaps.append(np.inf)

        rows = []
        laps_on_tyre_for_update = []
        for idx, s in enumerate(drivers_by_pos):
            tyre_age_feature = s["laps_on_current_tyre"]
            laps_on_current_tyre_next = tyre_age_feature + 1
            race_progress = lap / total_laps
            rain_flag = 0
            gap_ahead = float(s["gap_to_ahead"])
            gap_behind = float(behind_gaps[idx]) if idx < len(behind_gaps) else np.inf
            pack_density = int((gap_ahead <= PACK_THRESHOLD) + (gap_behind <= PACK_THRESHOLD))

            rows.append(
                {
                    "circuit_id": circuit_id,
                    "laps_on_current_tyre": tyre_age_feature,
                    "tyre_compound": s["tyre_compound"],
                    "race_progress": race_progress,
                    "rainfall": float(rain_flag),
                    "gap_to_ahead_s": gap_ahead,
                    "pack_density": pack_density,
                    "year": year,
                    "driver_skill": driver_skill_map.get(s["driver_id"], 0.0),
                }
            )
            laps_on_tyre_for_update.append(laps_on_current_tyre_next)

        scenario_df = pd.DataFrame(rows)
        scenario_df["circuit_id"] = scenario_df["circuit_id"].astype(circuit_cat)
        scenario_df["tyre_compound"] = scenario_df["tyre_compound"].astype(compound_cat)

        pred_deltas = model.predict(scenario_df)
        pred_deltas = np.asarray(pred_deltas, dtype=float)
        lap_times = base_lap + pred_deltas

        safety_car_active = lap in safety_car_laps
        if safety_car_active:
            lap_times = np.asarray(lap_times, dtype=float)
            leader_time = lap_times[0] * 1.35
            sc_lap_times = [leader_time]
            for idx in range(1, len(drivers_by_pos)):
                candidate = float(lap_times[idx])
                start_gap = float(drivers_by_pos[idx]["gap_to_ahead"])
                gap_end = start_gap + (candidate - sc_lap_times[idx - 1])
                if gap_end < 0.0:
                    candidate = candidate + abs(gap_end) + 0.3
                sc_lap_times.append(candidate)
            lap_times = np.array(sc_lap_times)
            pred_deltas = lap_times - float(base_lap)
            overtake_attempts = np.zeros(len(drivers_by_pos), dtype=bool)
        else:
            lap_times, pred_deltas, overtake_attempts = apply_overtakes_for_lap(
                circuit_id=circuit_id,
                drivers_by_pos=drivers_by_pos,
                lap_times=lap_times,
                pred_deltas=pred_deltas,
                base_lap=base_lap,
                year=year,
                close_gap_threshold=1.0,
                fail_gap=0.3,
                rng=overtake_rng,
            )

        drivers_by_pos, dnfs_this_lap = apply_dnfs_for_lap(
            circuit_id=circuit_id,
            drivers_by_pos=drivers_by_pos,
            lap_number=lap,
            total_race_laps=total_laps,
            year=year,
            rng=overtake_rng,
        )

        attempts_this_lap = {
            drivers_by_pos[i]["driver_id"]: bool(overtake_attempts[i])
            for i in range(len(drivers_by_pos))
        }
        dnfs_map_this_lap = {
            drivers_by_pos[i]["driver_id"]: bool(dnfs_this_lap[i])
            for i in range(len(drivers_by_pos))
        }

        for idx, s in enumerate(drivers_by_pos):
            lap_time = float(lap_times[idx])
            delta = float(pred_deltas[idx])
            laps_on_current_tyre_next = int(laps_on_tyre_for_update[idx])

            compound_this_lap = s["tyre_compound"]
            pit_compound = s["stops"].get(lap)
            pitted = False
            if pit_compound is not None:
                lap_time += pit_loss
                pitted = True

            dnf_now = dnfs_map_this_lap.get(s["driver_id"], False)
            s["dnf"] = bool(s.get("dnf", False) or dnf_now)

            if not s["dnf"]:
                s["laps_on_current_tyre"] = laps_on_current_tyre_next
                s["cumul_time"] += lap_time

            s["history"].append(
                {
                    "lap": lap,
                    "lap_time": lap_time if not dnf_now else None,
                    "delta": delta if not dnf_now else None,
                    "tyre_compound": compound_this_lap,
                    "pitted": pitted,
                    "overtake_attempt": attempts_this_lap.get(s["driver_id"], False),
                    "dnf": dnf_now,
                }
            )

            if s["dnf"]:
                continue
            if pit_compound is not None:
                s["tyre_compound"] = pit_compound
                s["laps_on_current_tyre"] = 1

        drivers_state = sorted(
            drivers_state,
            key=lambda s: (s.get("dnf", False), s["cumul_time"], s["grid_position"]),
        )
        for pos, s in enumerate(drivers_state, start=1):
            s["position"] = pos

        leader_time = drivers_state[0]["cumul_time"]
        for s in drivers_state:
            last_lap = s["history"][-1]
            gap_to_leader = s["cumul_time"] - leader_time
            pitted = last_lap["pitted"]
            attempted = last_lap["overtake_attempt"]
            dnf_now = last_lap.get("dnf", False)
            lap_time = last_lap["lap_time"]
            delta = last_lap["delta"]

            race_log.append(
                {
                    "lap": lap,
                    "position": s["position"],
                    "driver_id": s["driver_id"],
                    "lap_time": lap_time,
                    "delta": delta,
                    "tyre_compound": last_lap["tyre_compound"],
                    "pitted": pitted,
                    "gap_to_leader": gap_to_leader,
                    "cumul_time": s["cumul_time"],
                    "overtake_attempt": attempted,
                    "dnf": dnf_now or s.get("dnf", False),
                    "pos_change_lap": prev_positions[s["driver_id"]] - s["position"],
                    "pos_change_total": grid_pos_map[s["driver_id"]] - s["position"],
                    "safety_car": safety_car_active,
                }
            )

    return pd.DataFrame(race_log), safety_car_laps








# Single race monte carlo simulation

In [40]:
# Monte Carlo execution
num_runs = 20
race_length = 50
global_strategy = [(0, "SOFT"), (20, "MEDIUM"), (40, "SOFT")]
results = []
summary_rows = []
master_rng = np.random.default_rng(12315)
driver_strategies = {
     "VER": [
         (0, "SOFT"),
         (15, "SOFT"),
         (25, "HARD"),
         (26, "HARD"),
         (27, "HARD"),
         (28, "HARD"),
         (29, "HARD"),
         (30, "HARD"),
         (31, "HARD"),
         (32, "HARD"),
         (33, "HARD"),
         (34, "HARD"),
         (35, "HARD"),
         (45, "HARD"),
     ]
}
circuits = df["circuit_id"].dropna().unique().tolist()
years_by_circuit = df.groupby("circuit_id")["year"].unique().to_dict()
for run in range(num_runs):
    run_rng = np.random.default_rng(master_rng.integers(0, 1_000_000_000))
    circuit_id = run_rng.choice(circuits)
    year = int(run_rng.choice(years_by_circuit.get(circuit_id, [2025])))
    race_log, sc_laps = simulate_race(
        circuit_id=circuit_id,
        grid_drivers=grid_drivers,
        total_laps=race_length,
        year=year,
        global_strategy=global_strategy,
        driver_strategies=driver_strategies,
        safety_car_laps=None,
        rain_laps=None,
        pit_loss=20.0,
        rng=run_rng,
    )
    race_log["run"] = run
    race_log["circuit_id"] = circuit_id
    race_log["year"] = year
    results.append(race_log)
    last_lap = race_log["lap"].max()
    final_class = race_log[race_log["lap"] == last_lap].sort_values("position")
    for _, row in final_class.iterrows():
        summary_rows.append(
            {
                "run": run,
                "circuit_id": circuit_id,
                "year": year,
                "driver_id": row["driver_id"],
                "finish_pos": row["position"],
                "dnf": bool(row["dnf"]),
                "sc_laps": len(sc_laps),
            }
        )
all_logs = pd.concat(results, ignore_index=True)
summary_df = pd.DataFrame(summary_rows)
# Aggregate overview
overview = (
    summary_df.groupby("driver_id")
    .agg(
        runs=("run", "nunique"),
        wins=("finish_pos", lambda s: (s == 1).sum()),
        podiums=("finish_pos", lambda s: (s <= 3).sum()),
        avg_finish=("finish_pos", "mean"),
        dnfs=("dnf", "sum"),
    )
    .sort_values(["wins", "podiums"], ascending=[False, False])
)
print("Overview per driver:\n", overview)
print("\nSample final classification from last run:\n", summary_df[summary_df["run"] == (num_runs - 1)].sort_values("finish_pos"))


Overview per driver:
            runs  wins  podiums  avg_finish  dnfs
driver_id                                       
SAI          20     4        6        7.10     0
ALO          20     2        6        8.15     0
NOR          20     2        6        8.55     1
HAM          20     2        5        9.65     2
OCO          20     2        3       10.85     1
TSU          20     2        3       10.50     0
ANT          20     1        7        7.10     1
PIA          20     1        4        7.90     1
LEC          20     1        3        9.45     1
STR          20     1        3        8.95     0
BOR          20     1        2       13.60     1
RUS          20     1        1        9.50     1
BEA          20     0        2       10.65     1
COL          20     0        2       12.50     1
GAS          20     0        2        9.70     0
HUL          20     0        2       11.40     1
LAW          20     0        2       12.40     1
ALB          20     0        1       11.55     

# Compare two strategies with monte carlo


In [44]:
# Strategy comparison (side-by-side Monte Carlo) with per-driver overrides
from IPython.display import display, clear_output
from tqdm.auto import tqdm

strategy_a_global = [(0, 'SOFT'), (15, 'MEDIUM'), (35, 'SOFT')]
strategy_b_global = [(0, 'SOFT'), (15, 'MEDIUM'), (35, 'SOFT')]

# Optional per-driver overrides; leave empty to fall back to global strategy

_strategy_a_driver = {
     "VER": [
         (0, "SOFT"),
         (15, "SOFT"),
         (25, "HARD"),
         (26, "HARD"),
         (27, "HARD"),
         (28, "HARD"),
         (29, "HARD"),
         (30, "HARD"),
         (31, "HARD"),
         (32, "HARD"),
         (33, "HARD"),
         (34, "HARD"),
         (35, "HARD"),
         (45, "HARD"),
     ]
}
strategy_a_driver = {}  
strategy_b_driver = {
    "VER": [(0, 'HARD'), (30, 'MEDIUM')],
}

num_runs_compare = 2000
race_length = 45
summary_comp = []

for run in tqdm(range(num_runs_compare), desc="Strategy comparison"):
    run_rng = np.random.default_rng(master_rng.integers(0, 1_000_000_000))
    circuit_id = run_rng.choice(circuits)
    year = int(run_rng.choice(years_by_circuit.get(circuit_id, [2025])))

    configs = [
        ('A', strategy_a_global, strategy_a_driver),
        ('B', strategy_b_global, strategy_b_driver),
    ]

    base_seed = run_rng.integers(0, 1_000_000_000)
    for label, glob_strat, driver_strats in configs:
        rng_run = np.random.default_rng(base_seed)
        race_log, sc_laps = simulate_race(
            circuit_id=circuit_id,
            grid_drivers=grid_drivers,
            total_laps=race_length,
            year=year,
            global_strategy=glob_strat,
            driver_strategies=driver_strats,
            safety_car_laps=None,
            rain_laps=None,
            pit_loss=20.0,
            rng=rng_run,
        )
        last_lap = race_log['lap'].max()
        final_class = race_log[race_log['lap'] == last_lap].sort_values('position')
        for _, row in final_class.iterrows():
            summary_comp.append({
                'run': run,
                'strategy': label,
                'circuit_id': circuit_id,
                'year': year,
                'driver_id': row['driver_id'],
                'finish_pos': row['position'],
                'dnf': bool(row['dnf']),
                'sc_laps': len(sc_laps),
            })

    if (run + 1) % 10 == 0 or run == num_runs_compare - 1:
        summary_comp_df = pd.DataFrame(summary_comp)
        wins = summary_comp_df[summary_comp_df['finish_pos'] == 1].groupby('strategy')['driver_id'].count()
        avg_finish = summary_comp_df.groupby(['driver_id', 'strategy'])['finish_pos'].mean().unstack()
        avg_finish['delta_B_minus_A'] = avg_finish.get('B', np.nan) - avg_finish.get('A', np.nan)
        clear_output(wait=True)
        print(f"Progress: {run + 1}/{num_runs_compare}")
        print("Wins per strategy:", wins)
        print("Average finish per driver (A vs B, lower is better):")
        display(avg_finish.sort_values('delta_B_minus_A'))

summary_comp_df = pd.DataFrame(summary_comp)

wins = summary_comp_df[summary_comp_df['finish_pos'] == 1].groupby('strategy')['driver_id'].count()
avg_finish = summary_comp_df.groupby(['driver_id', 'strategy'])['finish_pos'].mean().unstack()
avg_finish['delta_B_minus_A'] = avg_finish.get('B', np.nan) - avg_finish.get('A', np.nan)

print('Wins per strategy:', wins)
print('Average finish per driver (A vs B, lower is better): ', avg_finish.sort_values('delta_B_minus_A'))


Progress: 2000/2000
Wins per strategy: strategy
A    2000
B    2000
Name: driver_id, dtype: int64
Average finish per driver (A vs B, lower is better):


strategy,A,B,delta_B_minus_A
driver_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
VER,5.624,3.9285,-1.6955
STR,10.512,10.1765,-0.3355
BEA,11.1975,10.936,-0.2615
ANT,9.6985,9.559,-0.1395
HAD,11.9315,11.8125,-0.119
GAS,11.4005,11.33,-0.0705
COL,14.065,14.019,-0.046
ALO,11.377,11.3495,-0.0275
OCO,12.482,12.4575,-0.0245
SAI,10.2015,10.1875,-0.014


Strategy comparison: 100%|██████████| 2000/2000 [37:05<00:00,  1.11s/it]

Wins per strategy: strategy
A    2000
B    2000
Name: driver_id, dtype: int64
Average finish per driver (A vs B, lower is better):  strategy         A        B  delta_B_minus_A
driver_id                                   
VER         5.6240   3.9285          -1.6955
STR        10.5120  10.1765          -0.3355
BEA        11.1975  10.9360          -0.2615
ANT         9.6985   9.5590          -0.1395
HAD        11.9315  11.8125          -0.1190
GAS        11.4005  11.3300          -0.0705
COL        14.0650  14.0190          -0.0460
ALO        11.3770  11.3495          -0.0275
OCO        12.4820  12.4575          -0.0245
SAI        10.2015  10.1875          -0.0140
TSU        11.2820  11.2745          -0.0075
RUS        10.9255  10.9390           0.0135
HUL        11.6015  11.6510           0.0495
ALB        11.6955  11.7805           0.0850
LAW        11.4315  11.5970           0.1655
BOR        10.4505  10.7535           0.3030
LEC         8.2975   8.7550           0.4575
NOR         9


