In [1]:
# import libraries
import pandas as pd
import numpy as np
from itertools import product
from pathlib import Path

from sklearn.model_selection import train_test_split
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.impute import SimpleImputer
from sklearn.ensemble import RandomForestRegressor

TOTAL_LAPS = 60            # standardized race length for scenario generation
IN_CSV = "Final_Project.csv"
np.random.seed(42)         # reproducible scenario sampling

def build_ohe():
    # version-safe OneHotEncoder
    try:
        return OneHotEncoder(handle_unknown="ignore", sparse_output=False)
    except TypeError:
        return OneHotEncoder(handle_unknown="ignore", sparse_output=False)

compound_map = {"Soft": "S", "Medium": "M", "Hard": "H"}

# realistic stint ranges per compound
STINT_RANGES = {
    "Soft":   (10, 15),
    "Medium": (18, 25),
    "Hard":   (28, 35)
}

def clamp_possibly_negative(x):
    # safety: prevent negative stint due to offsets
    return int(max(0, x))

def generate_scenarios(track, grid, bias_two_stop=False, pit_offset=0):
    """
    Create plausible 1-stop and 2-stop strategies with realistic stint lengths.
    pit_offset shifts the first stop to model early/late pit sensitivity.
    """
    base = {"TrackType": track, "GridPosition": grid}
    strategies = []

    # 1-stop plans (skip when biasing toward 2-stop)
    if not bias_two_stop:
        for c1, c2 in [("Medium", "Hard"), ("Hard", "Medium")]:
            stint1 = np.random.randint(*STINT_RANGES[c1])
            stint1 = clamp_possibly_negative(stint1 + pit_offset)
            stint2 = clamp_possibly_negative(TOTAL_LAPS - stint1)

            strategies.append({
                **base,
                "StrategyType": "1-stop",
                "OpeningCompound": c1,
                "MiddleCompound": None,
                "LastCompound": c2,
                "NumStops": 1,
                "FirstPitLap": stint1,
                "SecondPitLap": None,
                "Stint1Length": stint1,
                "Stint2Length": stint2,
                "Stint3Length": 0,
                "AvgStintLength": TOTAL_LAPS / 2
            })

    # 2-stop plans (enforce at least two different compounds)
    for c1, c2, c3 in product(["Soft", "Medium", "Hard"], repeat=3):
        if len({c1, c2, c3}) < 2:  # at least two different compounds mandated
            continue
        if c1 == c2 or c2 == c3:   # avoid back-to-back identical stint compounds
            continue

        stint1 = np.random.randint(*STINT_RANGES[c1])
        stint2 = np.random.randint(*STINT_RANGES[c2])
        stint1 = clamp_possibly_negative(stint1 + pit_offset)
        stint3 = clamp_possibly_negative(TOTAL_LAPS - stint1 - stint2)

        # drop clearly infeasible plans
        if stint1 <= 0 or stint2 <= 0 or stint3 <= 0:
            continue

        strategies.append({
            **base,
            "StrategyType": "2-stop",
            "OpeningCompound": c1,
            "MiddleCompound": c2,
            "LastCompound": c3,
            "NumStops": 2,
            "FirstPitLap": stint1,
            "SecondPitLap": stint1 + stint2,
            "Stint1Length": stint1,
            "Stint2Length": stint2,
            "Stint3Length": stint3,
            "AvgStintLength": TOTAL_LAPS / 3
        })

    return pd.DataFrame(strategies)

# load dataset
df = pd.read_csv(Path(IN_CSV))

# define features and target
features = [
    "TrackType","GridPosition","StrategyType",
    "OpeningCompound","LastCompound","NumStops",
    "FirstPitLap","AvgStintLength"
]
target = "RacePoints"

df = df.dropna(subset=[target])
X = df[[c for c in features if c in df.columns]].copy()
y = df[target].astype(float)

# numeric/categorical split
num_feats = [c for c in ["GridPosition","NumStops","FirstPitLap","AvgStintLength"] if c in X.columns]
cat_feats = [c for c in ["TrackType","StrategyType","OpeningCompound","LastCompound"] if c in X.columns]

# preprocessing
num_pipe = Pipeline([
    ("imputer", SimpleImputer(strategy="median")),
    ("scaler", StandardScaler())
])
cat_pipe = Pipeline([
    ("imputer", SimpleImputer(strategy="most_frequent")),
    ("ohe", build_ohe())
])
pre = ColumnTransformer([
    ("num", num_pipe, num_feats),
    ("cat", cat_pipe, cat_feats)
], remainder="drop")

# train predictor (points)
X_tr, X_te, y_tr, y_te = train_test_split(X, y, test_size=0.2, random_state=42)
model = Pipeline([
    ("pre", pre),
    ("rf", RandomForestRegressor(n_estimators=200, random_state=42, n_jobs=-1))
])
model.fit(X_tr, y_tr)

# prescribe strategies by track type
results = []
max_points = int(df["RacePoints"].max()) if "RacePoints" in df.columns else 25

for track in df["TrackType"].dropna().unique():
    # example grid assumptions per track (can be replaced with forecasted grids)
    grid_A, grid_B = 1, 2

    # scenario generation
    A = generate_scenarios(track, grid_A, bias_two_stop=False, pit_offset=0)
    B = generate_scenarios(track, grid_B, bias_two_stop=(grid_B >= 6), pit_offset=1)

    # ensure required model features exist on scenarios
    for col in features:
        if col not in A.columns: A[col] = np.nan
        if col not in B.columns: B[col] = np.nan

    # predict points
    A["Points"] = model.predict(A[features]).round().astype(int)
    B["Points"] = model.predict(B[features]).round().astype(int)

    # cap to reflect that only one car can take maximum at once (simple realism guard)
    topA = A.sort_values("Points", ascending=False).head(20).reset_index(drop=True)
    topB = B.sort_values("Points", ascending=False).head(20).reset_index(drop=True)
    if len(topA) and len(topB):
        topB["Points"] = np.where(
            topA.iloc[0]["Points"] >= max_points,
            np.minimum(topB["Points"], max_points - 1),
            topB["Points"]
        )

    # combine top plans (cartesian)
    topA["key"] = 1
    topB["key"] = 1
    combo = pd.merge(topA, topB, on="key", suffixes=("_A", "_B"))
    combo["TotalPoints"] = combo["Points_A"] + combo["Points_B"]

    if combo.empty:
        continue

    best = combo.sort_values("TotalPoints", ascending=False).iloc[0]

    # format compounds, stints, pit laps for readability
    compA = "/".join([compound_map[c] for c in [best["OpeningCompound_A"], best["MiddleCompound_A"], best["LastCompound_A"]] if pd.notna(c)])
    compB = "/".join([compound_map[c] for c in [best["OpeningCompound_B"], best["MiddleCompound_B"], best["LastCompound_B"]] if pd.notna(c)])

    stintA = f"{best['Stint1Length_A']}/{best['Stint2Length_A']}" if best["NumStops_A"] == 1 else f"{best['Stint1Length_A']}/{best['Stint2Length_A']}/{best['Stint3Length_A']}"
    stintB = f"{best['Stint1Length_B']}/{best['Stint2Length_B']}" if best["NumStops_B"] == 1 else f"{best['Stint1Length_B']}/{best['Stint2Length_B']}/{best['Stint3Length_B']}"

    pitA = f"{int(best['FirstPitLap_A'])}" if best["NumStops_A"] == 1 else f"{int(best['FirstPitLap_A'])}/{int(best['SecondPitLap_A'])}"
    pitB = f"{int(best['FirstPitLap_B'])}" if best["NumStops_B"] == 1 else f"{int(best['FirstPitLap_B'])}/{int(best['SecondPitLap_B'])}"

    results.append({
        "TrackType": track,
        "DriverA_Grid": best["GridPosition_A"],
        "DriverA_StrategyType": best["StrategyType_A"],
        "DriverA_Compounds": compA,
        "DriverA_Stints": stintA,
        "DriverA_PitLaps": pitA,
        "DriverA_Points": int(best["Points_A"]),
        "DriverB_Grid": best["GridPosition_B"],
        "DriverB_StrategyType": best["StrategyType_B"],
        "DriverB_Compounds": compB,
        "DriverB_Stints": stintB,
        "DriverB_PitLaps": pitB,
        "DriverB_Points": int(best["Points_B"]),
        "TeamTotalPoints": int(best["TotalPoints"])
    })

# save and display results
results_df = pd.DataFrame(results).sort_values(["TrackType","TeamTotalPoints"], ascending=[True, False])

print(f"Found optimum strategies to maximize team points haul by track type:")
if not results_df.empty:
    display(results_df)
else:
    print("No feasible strategies generated. Check feature availability and scenario ranges.")


Found optimum strategies to maximize team points haul by track type:


Unnamed: 0,TrackType,DriverA_Grid,DriverA_StrategyType,DriverA_Compounds,DriverA_Stints,DriverA_PitLaps,DriverA_Points,DriverB_Grid,DriverB_StrategyType,DriverB_Compounds,DriverB_Stints,DriverB_PitLaps,DriverB_Points,TeamTotalPoints
1,High-Degradation,1,2-stop,M/H/M,21/29/10,21/50,18,2,2-stop,M/H/M,20/28/12,20/48,20,38
4,Other,1,2-stop,M/S/H,21/13/26,21/34,17,2,2-stop,M/H/S,21/28/11,21/49,17,34
3,Power-Sensitive,1,2-stop,M/S/H,21/14/25,21/35,22,2,2-stop,M/H/S,21/33/6,21/54,22,44
0,Street/Semi-Street,1,2-stop,M/S/H,20/14/26,20/34,22,2,2-stop,M/S/H,22/13/25,22/35,22,44
2,Technical/Traction-Limited,1,2-stop,M/S/H,20/14/26,20/34,17,2,2-stop,M/S/H,19/14/27,19/33,17,34
