In [None]:
import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression, Ridge
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import warnings
warnings.filterwarnings('ignore', category=FutureWarning)
import pickle
from sklearn.model_selection import GroupShuffleSplit
from sklearn.preprocessing import StandardScaler, LabelEncoder, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.model_selection import GroupShuffleSplit
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline

In [2]:
df_merged = pd.read_excel("csv_output/Filtered_final.xlsx")

In [None]:

df = df_merged.copy()

# ─────────────────────────────────────────────────────────────
# 1 Stable identifiers
# ─────────────────────────────────────────────────────────────
df['race_id'] = (
    df['year'].astype(str) + '_' +
    df['round'].astype(str) + '_' +
    df['Grand_Prix'].astype(str)
)

group_cols = ['race_id', 'RacingNumber', 'Stint']

# Choose the best time column available inside a stint
time_col = 'LapInStint' if 'LapInStint' in df.columns else 'lap_number'

# Sort so splits are strictly chronological inside each driver-stint
df = df.sort_values(group_cols + [time_col]).reset_index(drop=True)

# ─────────────────────────────────────────────────────────────
# 2) Time-ordered 70/15/15 split inside each (race, driver, stint)
#    - For very short stints, we don't create a test slice
# ─────────────────────────────────────────────────────────────
MIN_TEST = 3   # minimum laps we want in test if we create a test segment
MIN_VAL  = 2   # minimum laps we want in val if we create a val segment
MIN_SPLIT_N = 10  # below this, keep in train (or train+val), avoid tiny test

def assign_time_split(g: pd.DataFrame) -> pd.DataFrame:
    n = len(g)

    # Too short -> keep it in train (avoids unstable tiny val/test)
    if n < MIN_SPLIT_N:
        g['split'] = 'train'
        return g

    # nominal cut points
    i1 = int(np.floor(0.70 * n))
    i2 = int(np.floor(0.85 * n))

    # enforce minimum sizes
    # ensure test has at least MIN_TEST
    i2 = min(i2, n - MIN_TEST)
    # ensure val has at least MIN_VAL
    i1 = min(i1, i2 - MIN_VAL)

    # ensure train has at least 1
    i1 = max(1, i1)
    # ensure val non-empty
    i2 = max(i1 + 1, i2)

    split = np.array(['train'] * n, dtype=object)
    split[i1:i2] = 'val'
    split[i2:] = 'test'
    g['split'] = split
    return g

df_with_split = df.groupby(group_cols, group_keys=False).apply(assign_time_split)

df_train = df_with_split[df_with_split['split'] == 'train'].drop(columns=['split']).copy()
df_val   = df_with_split[df_with_split['split'] == 'val'].drop(columns=['split']).copy()
df_test  = df_with_split[df_with_split['split'] == 'test'].drop(columns=['split']).copy()

# ─────────────────────────────────────────────────────────────
# 3 Reporting
# ─────────────────────────────────────────────────────────────
total = len(df_with_split)
print("Overall split (laps):")
print(f"  Train: {len(df_train)} ({len(df_train)/total*100:.1f}%)")
print(f"  Val:   {len(df_val)} ({len(df_val)/total*100:.1f}%)")
print(f"  Test:  {len(df_test)} ({len(df_test)/total*100:.1f}%)")

# How many groups were too short and went fully to train?
group_sizes = df_with_split.groupby(group_cols).size()
print(f"\nDriver-stint groups total: {len(group_sizes)}")
print(f"Groups with n < {MIN_SPLIT_N} put fully in train: {(group_sizes < MIN_SPLIT_N).sum()}")

# ─────────────────────────────────────────────────────────────
# 4 HARD sanity checks (these catch subtle bugs)
# ─────────────────────────────────────────────────────────────

# A Contiguity check: splits must be contiguous blocks (train -> val -> test)
def is_noncontiguous(s):
    # if labels change more times than the number of unique labels, it's non-contiguous
    changes = (s != s.shift()).sum()
    return changes > s.nunique()

bad_contig = (df_with_split.groupby(group_cols)['split'].apply(is_noncontiguous))
print("\nAny non-contiguous split blocks within a driver-stint group?:", bad_contig.any())

# B Time monotonicity check inside each group
bad_time = (df_with_split.groupby(group_cols)[time_col].apply(lambda s: not s.is_monotonic_increasing))
print("Any non-monotonic time ordering inside a driver-stint group?:", bad_time.any())

# C “Future in train” check: max train time should be <= min val time, etc.
def boundary_ok(g):
    t = g.loc[g['split']=='train', time_col]
    v = g.loc[g['split']=='val', time_col]
    te = g.loc[g['split']=='test', time_col]
    ok = True
    if len(v) > 0 and len(t) > 0:
        ok &= (t.max() <= v.min())
    if len(te) > 0 and len(v) > 0:
        ok &= (v.max() <= te.min())
    if len(te) > 0 and len(t) > 0 and len(v) == 0:
        ok &= (t.max() <= te.min())
    return ok

bad_boundary = ~(df_with_split.groupby(group_cols).apply(boundary_ok))
print("Any boundary violations (train after val/test)?:", bad_boundary.any())

# ─────────────────────────────────────────────────────────────
# 5 Save
# ─────────────────────────────────────────────────────────────
df_train.to_excel('csv_output/Train_set.xlsx', index=False)
df_val.to_excel('csv_output/Validation_set.xlsx', index=False)
df_test.to_excel('csv_output/Test_set.xlsx', index=False)


Overall split (laps):
  Train: 28425 (70.4%)
  Val:   5388 (13.3%)
  Test:  6583 (16.3%)

Driver-stint groups total: 2869
Groups with n < 10 put fully in train: 1019

Any non-contiguous split blocks within a driver-stint group?: False
Any non-monotonic time ordering inside a driver-stint group?: False
Any boundary violations (train after val/test)?: False


In [9]:

print(f"\nCreating synthetic features...")
for df_split in [df_train, df_val, df_test]:
    df_split['is_new_tyre'] = (df_split['TyreAgeAtStart'] == 0).astype(int)
print(f"   is_new_tyre created (1=fresh, 0=used)")


Creating synthetic features...
   is_new_tyre created (1=fresh, 0=used)


In [20]:


# ─────────────────────────────────────────────────────────────
# CONFIG & FEATURE SETS
# ─────────────────────────────────────────────────────────────
TARGET = "LapTime_next"

NUM_FEATS_FULL = [
    "is_new_tyre",
    "TyreLife",
    "TyreAgeAtStart",
    "is_leader",
    "in_drs_range",
    "in_clean_air",
    "in_dirty_air",
    "pushing",
    "AirTemp",
    "Humidity",
    "Pressure",
    "TrackTemp",
    "WindSpeed",
    "wind_sin",
    "wind_cos",
]

CAT_FEATS_FULL = ["RacingNumber", "Team", "Compound"]

# Define feature sets for comparison
FEATURE_SETS = {
    "Tyre+Stint": {
        "num": [
            "is_new_tyre",
            "TyreLife",
            "TyreAgeAtStart"
        ],
        "cat": ["Compound"]
    },
    
    "Tyre+Stint+Weather": {
        "num": [
            "is_new_tyre",
            "TyreLife",
            "TyreAgeAtStart",
            "AirTemp",
            "Humidity",
            "Pressure",
            "TrackTemp",
            "WindSpeed",
            "wind_sin",
            "wind_cos"
        ],
        "cat": ["Compound"]
    },
    


    # "Full (No Driver/Team)": {
    #     "num": NUM_FEATS_FULL,
    #     "cat": ["Compound"]  # Only tyre compound, NO driver/team
    # },
    
    "Full (Driver+Team+Compound)": {
        "num": NUM_FEATS_FULL,
        "cat": CAT_FEATS_FULL  # Includes RacingNumber, Team, Compound
    }
}

# Validation - only require WindDirection if needed
for name, cfg in FEATURE_SETS.items():
    all_cols = cfg["num"] + cfg["cat"] + ["year", "round", TARGET]
    # Only require WindDirection if wind features are used
    if ("wind_sin" in cfg["num"]) or ("wind_cos" in cfg["num"]):
        all_cols += ["WindDirection"]
    for c in all_cols:
        if c not in df_train.columns:
            raise ValueError(f"Missing column in df_train: {c} (needed for {name})")

In [21]:
# FEATURE PREP

def add_wind_trig(df: pd.DataFrame) -> pd.DataFrame:
    d = df.copy()
    theta = np.deg2rad(d["WindDirection"].astype(float))
    d["wind_sin"] = np.sin(theta)
    d["wind_cos"] = np.cos(theta)
    return d

def make_Xy(df: pd.DataFrame, num_feats, cat_feats, fit_cols=None, medians=None):
    """Make X, y matrices with dynamic feature selection."""
    # Only compute wind trig if wind features are used
    need_wind = ("wind_sin" in num_feats) or ("wind_cos" in num_feats)
    d = add_wind_trig(df) if need_wind else df.copy()

    # keep only needed columns
    X_raw = d[num_feats + cat_feats].copy()
    y = d[TARGET].astype(float).copy()

    # one-hot categorical
    X = pd.get_dummies(X_raw, columns=cat_feats, drop_first=True)

    # align columns to training set if provided
    if fit_cols is not None:
        X = X.reindex(columns=fit_cols, fill_value=0)

    # impute using provided medians if available, otherwise compute from X (train only)
    if medians is None:
        medians = X.median(numeric_only=True)
    X = X.fillna(medians)

    # drop NaN target rows
    m = y.notna()
    return X.loc[m], y.loc[m], medians


In [22]:
# ─────────────────────────────────────────────────────────────
# TRAIN ONE MODEL PER RACE FOR A GIVEN FEATURE SET
# ─────────────────────────────────────────────────────────────
def train_race_models(df_train, num_feats, cat_feats, model_type="linear", alpha=1.0, min_samples=None):
    """
    Train race-specific models for given features.
    
    Parameters
    ----------
    df_train : pd.DataFrame
        Training data
    num_feats : list
        Numerical features
    cat_feats : list
        Categorical features
    model_type : str
        "linear" for LinearRegression or "ridge" for Ridge
    alpha : float
        Regularization strength (only for Ridge)
    min_samples : int
        Minimum samples to train a race model. If None, use 100 or 5x num features.
    """
    # Auto-calculate min_samples if not provided
    if min_samples is None:
        n_features = len(num_feats) + len(cat_feats)
        min_samples = max(100, 5 * n_features)
    
    models = {}

    for (y_, r_), d_race in df_train.groupby(["year", "round"]):
        if len(d_race) < min_samples:
            continue

        Xtr, ytr, med = make_Xy(d_race, num_feats, cat_feats)
        if len(Xtr) < min_samples:
            continue

        # Choose model type
        if model_type == "ridge":
            model = Ridge(alpha=alpha)
        else:
            model = LinearRegression()
        
        model.fit(Xtr, ytr)

        models[(int(y_), int(r_))] = {
            "model": model,
            "cols": Xtr.columns,
            "med": med,
        }

    return models


In [24]:

# ─────────────────────────────────────────────────────────────
# PREDICT & EVALUATE
# ─────────────────────────────────────────────────────────────
def eval_feature_set(df_test, models, num_feats, cat_feats, name="model"):
    """Evaluate a feature set on test data, with coverage reporting."""
    preds, trues = [], []
    covered_races = 0
    total_races = df_test.groupby(["year", "round"]).ngroups

    for (y_, r_), d_race in df_test.groupby(["year", "round"]):
        key = (int(y_), int(r_))
        if key not in models:
            continue

        covered_races += 1
        fit = models[key]
        X, y, _ = make_Xy(d_race, num_feats, cat_feats,
                          fit_cols=fit["cols"], medians=fit["med"])
        yhat = fit["model"].predict(X)

        preds.append(yhat)
        trues.append(y.values)

    if not preds:
        return None

    yhat = np.concatenate(preds)
    ytrue = np.concatenate(trues)

    return {
        "model": name,
        "MAE": mean_absolute_error(ytrue, yhat),
        "RMSE": np.sqrt(mean_squared_error(ytrue, yhat)),
        "R2": r2_score(ytrue, yhat),
        "n_samples": len(yhat),
        "n_races_trained": len(models),
        "n_races_covered": covered_races,
        "test_race_coverage": f"{covered_races}/{total_races}"
    }


In [27]:
# ─────────────────────────────────────────────────────────────
# RUN COMPARISON: LINEAR BASELINES + RIDGE ON FULL MODEL ONLY
# ─────────────────────────────────────────────────────────────
print("="*100)
print("RACE-SPECIFIC MODELS: LINEAR BASELINES + OPTIMIZED RIDGE (Full Model Only)")
print("Step 1: Train LINEAR models on ALL feature sets")
print("Step 2: Optimize RIDGE alphas on FULL model only using VAL")
print("Step 3: Report best result")
print("="*100)

all_models_all = {}  # Store all trained models
test_results = []

# ─────────────────────────────────────────────────────────────
# PHASE 1: Train LINEAR models on all feature sets
# ─────────────────────────────────────────────────────────────
print("\nPhase 1: Training LINEAR baselines...\n")

for feature_name, cfg in FEATURE_SETS.items():
    print(f"[{feature_name}]", end=" ")
    
    # Train linear
    models = train_race_models(df_train, cfg["num"], cfg["cat"], 
                               model_type="linear", alpha=None)
    
    model_key = f"{feature_name} + Linear"
    all_models_all[model_key] = models
    
    print(f"✓ {len(models)} races", end="")
    
    # Eval on test
    test_res = eval_feature_set(df_test, models, cfg["num"], cfg["cat"], model_key)
    if test_res:
        print(f" | Test MAE: {test_res['MAE']:.4f}s")
        test_results.append(test_res)

# ─────────────────────────────────────────────────────────────
# PHASE 2: Optimize Ridge ONLY on Full (No Driver/Team) model
# ─────────────────────────────────────────────────────────────
print(f"\n{'─'*100}")
print("Phase 2: Optimizing RIDGE alphas on Full (No Driver/Team) model only...\n")

full_cfg = FEATURE_SETS[ "Full (Driver+Team+Compound)"]
ridge_alphas = [0.01, 0.1, 0.5, 1.0, 2.0, 5.0, 10.0]
val_results_ridge = {}

for alpha in ridge_alphas:
    print(f"[Ridge α={alpha}]", end=" ")
    
    # Train ridge
    models = train_race_models(df_train, full_cfg["num"], full_cfg["cat"], 
                               model_type="ridge", alpha=alpha)
    
    model_key = f"Full (No Driver/Team) + Ridge (α={alpha})"
    all_models_all[model_key] = models
    
    print(f"✓ {len(models)} races", end="")
    
    # Eval on val
    val_res = eval_feature_set(df_val, models, full_cfg["num"], full_cfg["cat"], model_key)
    if val_res:
        val_mae = val_res['MAE']
        val_results_ridge[alpha] = val_mae
        print(f" | Val MAE: {val_mae:.4f}s")

# Select best alpha
best_alpha = min(val_results_ridge, key=val_results_ridge.get)
best_val_mae = val_results_ridge[best_alpha]

print(f"\n✓ Best α on VAL: {best_alpha} (MAE: {best_val_mae:.4f}s)")

# Train best ridge model and evaluate on test
print(f"\n{'─'*100}")
print(f"Phase 3: Testing best Ridge model...\n")

best_models = train_race_models(df_train, full_cfg["num"], full_cfg["cat"], 
                                model_type="ridge", alpha=best_alpha)

best_model_key = f"Full (Driver+Team+Compound) + Ridge (α={best_alpha})"
test_res_best = eval_feature_set(df_test, best_models, full_cfg["num"], full_cfg["cat"], best_model_key)

if test_res_best:
    print(f"\n{best_model_key}")
    print(f"  [Val]  MAE: {best_val_mae:.4f}s")
    print(f"  [Test] MAE: {test_res_best['MAE']:.4f}s, RMSE: {test_res_best['RMSE']:.4f}s, R²: {test_res_best['R2']:.4f}")
    test_results.append(test_res_best)

# ─────────────────────────────────────────────────────────────
# SUMMARY
# ─────────────────────────────────────────────────────────────
print("\n" + "="*100)
print("FINAL RESULTS (TEST SET)")
print("="*100 + "\n")

if test_results:
    results_df = pd.DataFrame(test_results).sort_values("MAE")
    print(results_df[["model", "MAE", "RMSE", "R2", "n_samples", "test_race_coverage"]].to_string(index=False))
    
    print(f"\n✓ BEST MODEL: {results_df.iloc[0]['model']}")
    print(f"  Test MAE: {results_df.iloc[0]['MAE']:.4f}s")
    print(f"  Test RMSE: {results_df.iloc[0]['RMSE']:.4f}s")
    print(f"  Test R²: {results_df.iloc[0]['R2']:.4f}")

# Store best model info for feature importance analysis
best_model_key_final = best_model_key


RACE-SPECIFIC MODELS: LINEAR BASELINES + OPTIMIZED RIDGE (Full Model Only)
Step 1: Train LINEAR models on ALL feature sets
Step 2: Optimize RIDGE alphas on FULL model only using VAL
Step 3: Report best result

Phase 1: Training LINEAR baselines...

[Tyre+Stint] ✓ 64 races | Test MAE: 0.8825s
[Tyre+Stint+Weather] ✓ 64 races | Test MAE: 0.7795s
[Full (Driver+Team+Compound)] ✓ 64 races | Test MAE: 0.5508s

────────────────────────────────────────────────────────────────────────────────────────────────────
Phase 2: Optimizing RIDGE alphas on Full (No Driver/Team) model only...

[Ridge α=0.01] ✓ 64 races | Val MAE: 0.4198s
[Ridge α=0.1] ✓ 64 races | Val MAE: 0.4183s
[Ridge α=0.5] ✓ 64 races | Val MAE: 0.4150s
[Ridge α=1.0] ✓ 64 races | Val MAE: 0.4137s
[Ridge α=2.0] ✓ 64 races | Val MAE: 0.4141s
[Ridge α=5.0] ✓ 64 races | Val MAE: 0.4201s
[Ridge α=10.0] ✓ 64 races | Val MAE: 0.4323s

✓ Best α on VAL: 1.0 (MAE: 0.4137s)

───────────────────────────────────────────────────────────────────────

In [36]:
# DRIVER EFFECTS: SEASON AVERAGE
# ─────────────────────────────────────────────────────────────
def driver_effects_season_average(year, feature_set="Full (Driver+Team+Compound)", model_type="Linear", top=10):
    """
    Calculate AVERAGE driver effects across all races in a season.
    
    Aggregates driver coefficients across all races in the season,
    providing a season-wide measure of driver performance advantage/disadvantage.
    
    Parameters
    ----------
    year : int
        Race year (e.g., 2023)
    feature_set : str
        Feature set name (e.g., "Full (Driver+Team+Compound)")
    model_type : str
        Model type (e.g., "Linear" or "Ridge (α=1.0)")
    top : int
        Number of top/bottom drivers to show
    
    Returns
    -------
    pd.Series
        Driver coefficients averaged across season
    
    Example
    -------
    driver_effects_season_average(2023, 
                                 feature_set="Full (Driver+Team+Compound)", 
                                 model_type="Linear", 
                                 top=10)
    """
    model_key = f"{feature_set} + {model_type}"
    
    if model_key not in all_models_all:
        available = [k for k in all_models_all.keys() if feature_set in k]
        print(f"Model key '{model_key}' not found.")
        print(f"Available for {feature_set}: {available}")
        return None
    
    models = all_models_all[model_key]
    
    # Aggregate driver coefficients across all races of the season
    driver_coefs = {}
    driver_counts = {}
    races_found = 0
    
    for (model_year, round_num), model_info in models.items():
        if int(model_year) != int(year):
            continue
        
        races_found += 1
        model = model_info["model"]
        cols = model_info["cols"]
        
        # Get coefficients
        coefs = pd.Series(model.coef_, index=cols)
        
        # Extract driver coefficients
        driver_coefs_race = coefs[coefs.index.str.startswith("RacingNumber_")]
        
        for driver_feat, coef in driver_coefs_race.items():
            if driver_feat not in driver_coefs:
                driver_coefs[driver_feat] = 0
                driver_counts[driver_feat] = 0
            driver_coefs[driver_feat] += coef
            driver_counts[driver_feat] += 1
    
    if races_found == 0:
        print(f"No races found for year {year} in {model_key}")
        return None
    
    # Average across races
    avg_driver_coefs = pd.Series({driver: driver_coefs[driver] / driver_counts[driver] 
                                   for driver in driver_coefs})
    
    # Sort by coefficient (negative = faster)
    avg_driver_coefs = avg_driver_coefs.sort_values()
    
    print(f"\n{'='*100}")
    print(f"SEASON AVERAGE DRIVER EFFECTS - {year}")
    print(f"Model: {model_key}")
    print(f"Races analyzed: {races_found}")
    print(f"{'='*100}\n")
    print("Interpretation: negative = faster than reference driver, positive = slower\n")
    
    print(f"TOP {top} FASTEST DRIVERS:\n")
    print(f"{'Rank':<6} {'Driver':<30} {'Avg Coef':<15} {'Interpretation'}")
    print("─" * 100)
    for i, (driver_feat, coef) in enumerate(avg_driver_coefs.head(top).items(), 1):
        driver_num = driver_feat.replace("RacingNumber_", "")
        print(f"{i:<6} {driver_feat:<30} {coef:>14.6f}  {abs(coef):.4f}s faster than ref")
    
    print(f"\n\nTOP {top} SLOWEST DRIVERS:\n")
    print(f"{'Rank':<6} {'Driver':<30} {'Avg Coef':<15} {'Interpretation'}")
    print("─" * 100)
    for i, (driver_feat, coef) in enumerate(avg_driver_coefs.tail(top).items(), 1):
        driver_num = driver_feat.replace("RacingNumber_", "")
        print(f"{i:<6} {driver_feat:<30} {coef:>14.6f}  {abs(coef):.4f}s slower than ref")
    
    print(f"\n{'─'*100}")
    print(f"Statistics:")
    print(f"  Mean driver effect:     {avg_driver_coefs.mean():>10.6f}s")
    print(f"  Median driver effect:   {avg_driver_coefs.median():>10.6f}s")
    print(f"  Std dev:                {avg_driver_coefs.std():>10.6f}s")
    print(f"  Range (fastest-slowest): {(avg_driver_coefs.max() - avg_driver_coefs.min()):>10.6f}s")
    
    return avg_driver_coefs


# Example usage:
driver_effects_for_race(2024, 16, feature_set="Full (Driver+Team+Compound)", model_type="Linear", top=8)
#driver_effects_season_average(2025, feature_set="Full (Driver+Team+Compound)", model_type="Linear", top=20)


Driver effects: (2024, R16) - Full (Driver+Team+Compound) + Linear
→ negative = faster than reference driver

Fastest (most negative):
RacingNumber_16   -0.458380
RacingNumber_4    -0.402971
RacingNumber_44   -0.288267
RacingNumber_81   -0.233022
RacingNumber_55   -0.177226
RacingNumber_27   -0.035346
RacingNumber_63   -0.026490
RacingNumber_14   -0.018450

Slowest (most positive):
RacingNumber_20    0.146861
RacingNumber_43    0.180506
RacingNumber_18    0.190522
RacingNumber_11    0.223951
RacingNumber_24    0.278876
RacingNumber_77    0.377668
RacingNumber_10    0.461743
RacingNumber_31    0.554814


RacingNumber_16   -0.458380
RacingNumber_4    -0.402971
RacingNumber_44   -0.288267
RacingNumber_81   -0.233022
RacingNumber_55   -0.177226
RacingNumber_27   -0.035346
RacingNumber_63   -0.026490
RacingNumber_14   -0.018450
RacingNumber_23    0.047617
RacingNumber_3     0.133858
RacingNumber_20    0.146861
RacingNumber_43    0.180506
RacingNumber_18    0.190522
RacingNumber_11    0.223951
RacingNumber_24    0.278876
RacingNumber_77    0.377668
RacingNumber_10    0.461743
RacingNumber_31    0.554814
dtype: float64

Why Option C is the most interesting

What Option C really tests

A circuit-based split asks:

“Given everything I’ve learned about cars, drivers, tyres, weather, and track geometry, can I predict lap times on a circuit I have never seen before?”

That is real generalization.
Not “later laps of the same race”, not “another race I half-recognize”.

If you can show:
	•	train on Bahrain, Melbourne, Suzuka, Austin, etc.
	•	test on Monaco, Silverstone, Monza

…that’s a strong claim. Much stronger than race-level splits.
