In [8]:
"""
DECISION TREE - Circuit-Disjoint Split (Complete Standalone)

Compares Decision Tree against the LINEAR BASELINE:
   Tyre + Weather + State (Ridge/Linear)
  
Uses GroupKFold CV with race-balanced weights.
NO TEST LEAKAGE - test set touched only once after tuning.

This ensures a fair comparison:
  - Same target: LapTime_next_vs_stint_baseline
  - Same circuit-disjoint split
  - Same feature block (Tyre+Weather+State)
  - Different model class only (DT vs Linear)
"""
import os
import json
import warnings
import numpy as np
import pandas as pd
warnings.filterwarnings("ignore")

from sklearn.tree import DecisionTreeRegressor
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OneHotEncoder
from sklearn.impute import SimpleImputer
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.model_selection import GroupKFold, ParameterGrid
from sklearn.ensemble import RandomForestRegressor, HistGradientBoostingRegressor
from catboost import CatBoostRegressor


np.random.seed(42)

In [9]:

# CONFIGURATION

OUTDIR = "csv_output/nonlinear"
os.makedirs(OUTDIR, exist_ok=True)

TRAIN_PATH = "csv_output/Train_set.xlsx"
VAL_PATH = "csv_output/Validation_set.xlsx"
TEST_PATH = "csv_output/Test_set.xlsx"

TARGET = "LapTime_next_vs_stint_baseline"
N_CV_SPLITS = 5

NUM_TYRE = ["is_new_tyre", "TyreLife", "TyreAgeAtStart"]
NUM_WEATHER = ["AirTemp", "Humidity", "Pressure", "TrackTemp", "wind_sin", "wind_cos"]
NUM_STATE = [
    "is_leader", "in_drs_range", "in_clean_air", "in_dirty_air",
    "pushing", "laptime_rolling_std_3", "delta_laptime",
    "cumulative_degradation", "LapTime"
]
GEOM_COLS_ALL = [
    'num_drs_zones', 'length_m', 'num_turns',
    'slow_share', 'slow_cluster_max',
    'straight_ratio', 'straight_len_max_m', 'n_major_straights',
    'heavy_braking_zones', 'heavy_braking_mean_dv_kmh', 'hb_at_end_of_max',
    'avg_corner_angle', 'avg_corner_distance', 'drs_total_len_m'
]
CAT_FEATURES = ["Compound", "RacingNumber", "Team"]

In [10]:

# Helpers 

def keep_existing(cols, df):
    return [c for c in cols if c in df.columns]

def make_preprocessor(num_feats, cat_feats):
    transformers = [("num", SimpleImputer(strategy="median"), num_feats)]
    if len(cat_feats) > 0:
        try:
            ohe = OneHotEncoder(sparse_output=False, handle_unknown="ignore")
        except TypeError:
            ohe = OneHotEncoder(sparse=False, handle_unknown="ignore")
        transformers.append(("cat", ohe, cat_feats))
    return ColumnTransformer(transformers, remainder="drop")

def compute_race_balanced_weights(race_series):
    vc = race_series.value_counts()
    return race_series.map(lambda r: 1.0 / vc.loc[r])

def tune_with_groupkfold_cv(pipeline, param_grid, X_trainval, y_trainval, race_trainval, n_splits=5):
    best_score = np.inf
    best_params = None
    gkf = GroupKFold(n_splits=min(n_splits, race_trainval.nunique()))
    n_configs = len(list(ParameterGrid(param_grid)))
    
    for i, params in enumerate(ParameterGrid(param_grid), 1):
        fold_scores = []
        for tr_idx, va_idx in gkf.split(X_trainval, y_trainval, groups=race_trainval):
            race_tr = race_trainval.iloc[tr_idx]
            w_tr = compute_race_balanced_weights(race_tr)
            pipeline.set_params(**params)
            pipeline.fit(X_trainval.iloc[tr_idx], y_trainval.iloc[tr_idx], model__sample_weight=w_tr.values)
            yp = pipeline.predict(X_trainval.iloc[va_idx])
            race_va = race_trainval.iloc[va_idx]
            per_race = []
            for rid in race_va.unique():
                mask_race = (race_va == rid).values
                if mask_race.sum() > 0:
                    per_race.append(mean_absolute_error(y_trainval.iloc[va_idx].values[mask_race], yp[mask_race]))
            fold_scores.append(float(np.median(per_race)))
        
        score = float(np.median(fold_scores))
        if score < best_score:
            best_score = score
            best_params = params
        if i % 50 == 0 or i == n_configs:
            print(f"  [{i}/{n_configs}] Best: {best_score:.4f}s")
    
    return best_params, best_score

def refit_and_test_once(pipeline, best_params, X_trainval, y_trainval, race_trainval,
                        df_test_sorted, num_feats, cat_feats, target):
    w_trainval = compute_race_balanced_weights(race_trainval)
    pipeline.set_params(**best_params)
    pipeline.fit(X_trainval, y_trainval, model__sample_weight=w_trainval.values)
    
    X_test = df_test_sorted[num_feats + cat_feats].copy()
    y_test = df_test_sorted[target].astype(float).copy()
    test_pred_all = pipeline.predict(X_test)
    
    mask = y_test.notna()
    test_pred = test_pred_all[mask]
    y_test_valid = y_test[mask]
    
    test_mae = float(mean_absolute_error(y_test_valid, test_pred))
    test_rmse = float(np.sqrt(mean_squared_error(y_test_valid, test_pred)))
    test_r2 = float(r2_score(y_test_valid, test_pred))
    
    per_race_mae = []
    race_ids = []
    for (year, round_no, circuit), g in df_test_sorted.groupby(['year', 'round', 'name'], sort=False):
        group_indices = g.index.values
        group_mask = np.zeros(len(df_test_sorted), dtype=bool)
        group_mask[group_indices] = True
        group_mask = group_mask & mask.values
        if group_mask.sum() > 0:
            race_mae = mean_absolute_error(df_test_sorted[target].values[group_mask], test_pred_all[group_mask])
            per_race_mae.append(race_mae)
            race_ids.append(f"{year}_{round_no}_{circuit}")
    
    median_mae_per_race = float(np.median(per_race_mae))
    
    return pipeline, {
        "MAE": test_mae,
        "RMSE": test_rmse,
        "R2": test_r2,
        "median_MAE_per_race": median_mae_per_race,
        "per_race_mae": per_race_mae,
        "race_ids": race_ids,
    }

In [11]:
# Main

print("="*100)
print("DECISION TREE - Baseline vs Baseline+Geometry")
print("="*100)

# Load data
print("\nLoading data...")
df_train = pd.read_excel(TRAIN_PATH)
df_val = pd.read_excel(VAL_PATH)
df_test = pd.read_excel(TEST_PATH)
print(f"Train: {len(df_train):,} | Val: {len(df_val):,} | Test: {len(df_test):,}")

assert TARGET in df_train.columns, f"Target '{TARGET}' not found!"

# SANITY CHECK: Verify circuit-disjoint splits (no circuit overlap)
train_c = set(df_train["name"].unique())
val_c   = set(df_val["name"].unique())
test_c  = set(df_test["name"].unique())
assert train_c.isdisjoint(val_c) and train_c.isdisjoint(test_c) and val_c.isdisjoint(test_c), "ERROR: Circuit overlap in splits!"
print("✓ Verified: no circuit overlap across Train/Val/Test")

# Ensure is_new_tyre
for df in [df_train, df_val, df_test]:
    if 'is_new_tyre' not in df.columns and 'TyreAgeAtStart' in df.columns:
        df['is_new_tyre'] = (df['TyreAgeAtStart'] == 0).astype(int)

# Create race_id (for per-race reporting) and circuit_id (for unseen-circuit CV grouping)
for df in [df_train, df_val, df_test]:
    df['race_id'] = (
        df['year'].astype(str) + '_' + 
        df['round'].astype(str) + '_' + 
        df['name'].astype(str)
    )
    df['circuit_id'] = df['name'].astype(str)  # For unseen-circuit CV grouping

# Prepare features
num_tyre = keep_existing(NUM_TYRE, df_train)
num_weather = keep_existing(NUM_WEATHER, df_train)
num_state = keep_existing(NUM_STATE, df_train)
num_geometry = [c for c in GEOM_COLS_ALL if c in df_train.columns and c != "hb_at_end_of_max"]
CAT = keep_existing(CAT_FEATURES, df_train)

feat_baseline = num_tyre + num_weather + num_state
feat_with_geom = num_tyre + num_weather + num_state + num_geometry

print(f"\nFeature sets:")
print(f"  Baseline (Tyre+Weather+State): {len(feat_baseline)} features")
print(f"  +Geometry: {len(feat_with_geom)} features (+{len(feat_with_geom)-len(feat_baseline)})")

# Combine train+val
df_trainval = pd.concat([df_train, df_val], axis=0, ignore_index=True)
print(f"\nTrain+Val: {len(df_trainval):,} samples, {df_trainval['circuit_id'].nunique()} unique circuits")

# Sort test
sort_cols = ['year', 'round', 'name']
if 'lap_number' in df_test.columns:
    sort_cols.append('lap_number')
elif 'LapNumber' in df_test.columns:
    sort_cols.append('LapNumber')
df_test_sorted = df_test.sort_values(sort_cols).reset_index(drop=True)

# Hyperparameter grid
param_dt = {
    "model__max_depth": [6, 10],
    "model__min_samples_leaf": [5, 10],
    "model__max_features": ["sqrt", 0.5],
    "model__ccp_alpha": [0.0, 1e-4],
}

# For more extensive search (commented out as in original):
# param_dt = {
#     "model__max_depth": [4, 6, 8, 12],
#     "model__min_samples_leaf": [5, 10, 20],
#     "model__min_samples_split": [2, 10],
#     "model__max_features": [None, "sqrt", 0.5],
#     "model__max_leaf_nodes": [None, 64, 128, 256],
#     "model__ccp_alpha": [0.0, 1e-4, 5e-4],
# }


n_configs = len(list(ParameterGrid(param_dt)))
print(f"Hyperparameter grid: {n_configs} configs\n")

# =========================================================
# RUN BOTH FEATURE SETS
# =========================================================
results = []

for set_name, num_feats in [("Tyre+Weather+State", feat_baseline), 
                             ("Tyre+Weather+State+Geometry", feat_with_geom)]:
    print("="*100)
    print(f"{set_name}")
    print("="*100)
    
    X_trainval = df_trainval[num_feats + CAT].copy()
    y_trainval = df_trainval[TARGET].astype(float).copy()
    circuit_trainval = df_trainval['circuit_id'].copy()  # Unseen-circuit grouping
    race_trainval = df_trainval['race_id'].copy()        # For per-race reporting
    
    mask_trainval = y_trainval.notna()
    X_trainval = X_trainval.loc[mask_trainval]
    y_trainval = y_trainval.loc[mask_trainval]
    circuit_trainval = circuit_trainval.loc[mask_trainval]
    race_trainval = race_trainval.loc[mask_trainval]
    
    print(f"Samples: {len(X_trainval):,}")
    print(f"Unique circuits (CV groups): {circuit_trainval.nunique()}")
    
    preprocessor = make_preprocessor(num_feats, CAT)
    pipeline = Pipeline([
        ("preprocess", preprocessor),
        ("model", DecisionTreeRegressor(random_state=42))
    ])
    
    print(f"Tuning ({len(X_trainval):,} train samples)...")
    best_params, best_cv_score = tune_with_groupkfold_cv(
        pipeline=pipeline,
        param_grid=param_dt,
        X_trainval=X_trainval,
        y_trainval=y_trainval,
        race_trainval=circuit_trainval,  # Group by CIRCUIT for unseen-circuit CV
        n_splits=N_CV_SPLITS
    )
    
    pipeline_fitted, test_results = refit_and_test_once(
        pipeline=pipeline,
        best_params=best_params,
        X_trainval=X_trainval,
        y_trainval=y_trainval,
        race_trainval=circuit_trainval,  # Weight by CIRCUIT
        df_test_sorted=df_test_sorted,
        num_feats=num_feats,
        cat_feats=CAT,
        target=TARGET
    )
    
    tree_depth = pipeline_fitted.named_steps['model'].get_depth()
    tree_leaves = pipeline_fitted.named_steps['model'].get_n_leaves()
    
    print(f"\nResults:")
    print(f"  CV MAE:               {best_cv_score:.4f}s")
    print(f"  Test MAE:             {test_results['MAE']:.4f}s")
    print(f"  Test RMSE:            {test_results['RMSE']:.4f}s")
    print(f"  Test R²:              {test_results['R2']:.4f}")
    print(f"  Test Median MAE/race: {test_results['median_MAE_per_race']:.4f}s")
    print(f"  Tree depth:           {tree_depth}")
    print(f"  Tree leaves:          {tree_leaves}")
    
    results.append({
        "block": set_name,
        "n_features": len(num_feats) + len(CAT),
        "CV_MAE_per_race": best_cv_score,
        "test_MAE": test_results['MAE'],
        "test_RMSE": test_results['RMSE'],
        "test_R2": test_results['R2'],
        "test_median_MAE_per_race": test_results['median_MAE_per_race'],
        "tree_depth": tree_depth,
        "tree_leaves": tree_leaves,
        "best_params": json.dumps(best_params),
    })
    
    # Save per-race MAE
    per_race_df = pd.DataFrame({
        'Race': test_results['race_ids'],
        'MAE': test_results['per_race_mae']
    }).sort_values('MAE', ascending=False)
    per_race_path = os.path.join(OUTDIR, f"dt_{set_name.replace('+', '_')}_per_race_mae.csv")
    per_race_df.to_csv(per_race_path, index=False)
    
    # Save model config as JSON
    model_info = {
        "model": "DecisionTree",
        "feature_set": set_name,
        "n_features": len(num_feats) + len(CAT),
        "best_params": best_params,
        "cv_mae_median_per_circuit": float(best_cv_score),
        "test_mae": float(test_results['MAE']),
        "test_rmse": float(test_results['RMSE']),
        "test_r2": float(test_results['R2']),
        "test_median_mae_per_race": float(test_results['median_MAE_per_race']),
        "tree_depth": int(tree_depth),
        "tree_leaves": int(tree_leaves),
    }
    model_json_path = os.path.join(OUTDIR, f"dt_{set_name.replace('+', '_')}_config.json")
    with open(model_json_path, 'w') as f:
        json.dump(model_info, f, indent=2)
    print(f"Saved: {model_json_path}")

DECISION TREE - Baseline vs Baseline+Geometry

Loading data...
Train: 26,221 | Val: 7,199 | Test: 6,976
✓ Verified: no circuit overlap across Train/Val/Test

Feature sets:
  Baseline (Tyre+Weather+State): 18 features
  +Geometry: 29 features (+11)

Train+Val: 33,420 samples, 19 unique circuits
Hyperparameter grid: 16 configs

Tyre+Weather+State
Samples: 33,420
Unique circuits (CV groups): 19
Tuning (33,420 train samples)...
  [16/16] Best: 0.3852s

Results:
  CV MAE:               0.3852s
  Test MAE:             0.3601s
  Test RMSE:            0.5647s
  Test R²:              0.1055
  Test Median MAE/race: 0.3409s
  Tree depth:           10
  Tree leaves:          166
Saved: csv_output/nonlinear/dt_Tyre_Weather_State_config.json
Tyre+Weather+State+Geometry
Samples: 33,420
Unique circuits (CV groups): 19
Tuning (33,420 train samples)...
  [16/16] Best: 0.3839s

Results:
  CV MAE:               0.3839s
  Test MAE:             0.3876s
  Test RMSE:            0.6119s
  Test R²:             

In [19]:

# =========================================================
# SUMMARY

print("\n" + "="*100)
print("COMPARISON")
print("="*100)

summary = pd.DataFrame(results)
summary_path = os.path.join(OUTDIR, "dt_comparison.csv")
summary.to_csv(summary_path, index=False)

print("\n" + summary[['block', 'test_MAE', 'test_RMSE', 'test_R2', 'tree_depth', 'tree_leaves']].to_string(index=False))

# Difference
baseline = summary.iloc[0]
with_geom = summary.iloc[1]

diff_mae = with_geom['test_MAE'] - baseline['test_MAE']
pct_change = (diff_mae / baseline['test_MAE']) * 100

print(f"\nBaseline MAE:      {baseline['test_MAE']:.4f}s")
print(f"+Geometry MAE:     {with_geom['test_MAE']:.4f}s")
print(f"Difference:        {diff_mae:+.4f}s ({pct_change:+.1f}%)")

if abs(diff_mae) < 0.0001:
    print("→ Same (geometry neutral)")
elif diff_mae < 0:
    print(f"→ Geometry HELPS by {abs(pct_change):.1f}%")
else:
    print(f"→ Geometry HURTS by {pct_change:.1f}%")

print(f"\n✓ Saved to {OUTDIR}/")



COMPARISON

                      block  test_MAE  test_RMSE   test_R2  tree_depth  tree_leaves
         Tyre+Weather+State  0.345124   0.516537  0.251697           6           27
Tyre+Weather+State+Geometry  0.483366   0.791382 -0.756493           9           50

Baseline MAE:      0.3451s
+Geometry MAE:     0.4834s
Difference:        +0.1382s (+40.1%)
→ Geometry HURTS by 40.1%

✓ Saved to csv_output/nonlinear/


In [20]:

# =========================================================
# RANDOM FOREST
# =========================================================
print("\n" + "="*100)
print("RANDOM FOREST - Baseline vs Baseline+Geometry")
print("="*100)

# Random Forest hyperparameter grid
# param_rf = {
#     "model__n_estimators": [300, 600],
#     "model__max_depth": [8, 12, None],
#     "model__min_samples_leaf": [1, 5, 20],
#     "model__max_features": ["sqrt", 0.5],
# }

#More extensive grid (commented):
param_rf = {
    "model__n_estimators": [100, 300, 600],
    "model__max_depth": [8, 12, 16, None],
    "model__min_samples_leaf": [1, 5, 10, 20],
    "model__min_samples_split": [2, 5, 10],
    "model__max_features": ["sqrt", 0.5, 0.7],
}

n_configs_rf = len(list(ParameterGrid(param_rf)))
print(f"\nHyperparameter grid - Random Forest: {n_configs_rf} configs\n")

rf_results = []

for set_name, num_feats in [("Tyre+Weather+State", feat_baseline), 
                             ("Tyre+Weather+State+Geometry", feat_with_geom)]:
    print(f"\n{set_name}")
    
    X_trainval = df_trainval[num_feats + CAT].copy()
    y_trainval = df_trainval[TARGET].astype(float).copy()
    circuit_trainval = df_trainval['circuit_id'].copy()  # Unseen-circuit grouping
    
    mask_trainval = y_trainval.notna()
    X_trainval = X_trainval.loc[mask_trainval]
    y_trainval = y_trainval.loc[mask_trainval]
    circuit_trainval = circuit_trainval.loc[mask_trainval]
    
    preprocessor = make_preprocessor(num_feats, CAT)
    pipeline = Pipeline([
        ("preprocess", preprocessor),
        ("model", RandomForestRegressor(random_state=42, n_jobs=-1))
    ])
    
    print(f"Tuning ({len(X_trainval):,} train samples)...")
    best_params, best_cv_score = tune_with_groupkfold_cv(
        pipeline=pipeline,
        param_grid=param_rf,
        X_trainval=X_trainval,
        y_trainval=y_trainval,
        race_trainval=circuit_trainval,  # Group by CIRCUIT for unseen-circuit CV
        n_splits=N_CV_SPLITS
    )
    
    pipeline_fitted, test_results = refit_and_test_once(
        pipeline=pipeline,
        best_params=best_params,
        X_trainval=X_trainval,
        y_trainval=y_trainval,
        race_trainval=circuit_trainval,  # Weight by CIRCUIT
        df_test_sorted=df_test_sorted,
        num_feats=num_feats,
        cat_feats=CAT,
        target=TARGET
    )
    
    n_estimators = pipeline_fitted.named_steps['model'].n_estimators
    
    print(f"\nResults:")
    print(f"  CV MAE (median per-circuit): {best_cv_score:.4f}s")
    print(f"  Test MAE:                    {test_results['MAE']:.4f}s")
    print(f"  Test RMSE:                   {test_results['RMSE']:.4f}s")
    print(f"  Test R²:                     {test_results['R2']:.4f}")
    print(f"  Test Median MAE/race:        {test_results['median_MAE_per_race']:.4f}s")
    print(f"  N estimators:                {n_estimators}")
    
    rf_results.append({
        "block": set_name,
        "n_features": len(num_feats) + len(CAT),
        "CV_MAE_per_circuit": best_cv_score,
        "test_MAE": test_results['MAE'],
        "test_RMSE": test_results['RMSE'],
        "test_R2": test_results['R2'],
        "test_median_MAE_per_race": test_results['median_MAE_per_race'],
        "n_estimators": n_estimators,
        "best_params": json.dumps(best_params),
    })
    
    # Save per-race MAE
    per_race_df = pd.DataFrame({
        'Race': test_results['race_ids'],
        'MAE': test_results['per_race_mae']
    }).sort_values('MAE', ascending=False)
    per_race_path = os.path.join(OUTDIR, f"rf_{set_name.replace('+', '_')}_per_race_mae.csv")
    per_race_df.to_csv(per_race_path, index=False)
    
    # Save model config as JSON
    model_info = {
        "model": "RandomForest",
        "feature_set": set_name,
        "n_features": len(num_feats) + len(CAT),
        "best_params": best_params,
        "cv_mae_median_per_circuit": float(best_cv_score),
        "test_mae": float(test_results['MAE']),
        "test_rmse": float(test_results['RMSE']),
        "test_r2": float(test_results['R2']),
        "test_median_mae_per_race": float(test_results['median_MAE_per_race']),
        "n_estimators": int(n_estimators),
    }
    model_json_path = os.path.join(OUTDIR, f"rf_{set_name.replace('+', '_')}_config.json")
    with open(model_json_path, 'w') as f:
        json.dump(model_info, f, indent=2)
    print(f"Saved: {model_json_path}")

# =========================================================
# RANDOM FOREST SUMMARY
# =========================================================
print("\n" + "="*100)
print("RANDOM FOREST - COMPARISON")
print("="*100)

rf_summary = pd.DataFrame(rf_results)
rf_summary_path = os.path.join(OUTDIR, "rf_comparison.csv")
rf_summary.to_csv(rf_summary_path, index=False)

print("\n" + rf_summary[['block', 'test_MAE', 'test_RMSE', 'test_R2', 'n_estimators']].to_string(index=False))

baseline_rf = rf_summary.iloc[0]
with_geom_rf = rf_summary.iloc[1]
diff_mae_rf = with_geom_rf['test_MAE'] - baseline_rf['test_MAE']
pct_change_rf = (diff_mae_rf / baseline_rf['test_MAE']) * 100

print(f"\nBaseline MAE:      {baseline_rf['test_MAE']:.4f}s")
print(f"+Geometry MAE:     {with_geom_rf['test_MAE']:.4f}s")
print(f"Difference:        {diff_mae_rf:+.4f}s ({pct_change_rf:+.1f}%)")

if abs(diff_mae_rf) < 0.0001:
    print("→ Geometry: NEUTRAL")
elif diff_mae_rf < 0:
    print(f"→ Geometry: HELPS by {abs(pct_change_rf):.1f}%")
else:
    print(f"→ Geometry: HURTS by {pct_change_rf:.1f}%")

print(f"\n✓ All results saved to {OUTDIR}/")



RANDOM FOREST - Baseline vs Baseline+Geometry

Hyperparameter grid - Random Forest: 432 configs


Tyre+Weather+State
Tuning (33,420 train samples)...
  [50/432] Best: 0.3639s
  [100/432] Best: 0.3520s
  [150/432] Best: 0.3520s
  [200/432] Best: 0.3520s
  [250/432] Best: 0.3518s
  [300/432] Best: 0.3518s
  [350/432] Best: 0.3518s
  [400/432] Best: 0.3518s
  [432/432] Best: 0.3518s

Results:
  CV MAE (median per-circuit): 0.3518s
  Test MAE:                    0.3348s
  Test RMSE:                   0.4859s
  Test R²:                     0.3378
  Test Median MAE/race:        0.3012s
  N estimators:                600
Saved: csv_output/nonlinear/rf_Tyre_Weather_State_config.json

Tyre+Weather+State+Geometry
Tuning (33,420 train samples)...
  [50/432] Best: 0.3579s
  [100/432] Best: 0.3479s
  [150/432] Best: 0.3475s
  [200/432] Best: 0.3475s
  [250/432] Best: 0.3475s
  [300/432] Best: 0.3475s
  [350/432] Best: 0.3474s
  [400/432] Best: 0.3474s
  [432/432] Best: 0.3474s

Results:
  CV MAE (

In [25]:

# HIST GRADIENT BOOSTING - Circuit-Disjoint Split


print("\n\n" + "="*100)
print("HIST GRADIENT BOOSTING - Circuit-Disjoint Evaluation (GroupKFold CV)")
print("="*100)


# Helper function (HGB-specific, no sample_weight)

def tune_hgb_with_groupkfold_cv(pipeline, param_grid, X_trainval, y_trainval, circuit_trainval, 
                                 n_splits=5):
    """
    GroupKFold CV for HistGradientBoosting (NO sample_weight - not supported).
    Returns best params based on median MAE per circuit across folds.
    Groups by CIRCUIT for unseen-circuit evaluation.
    """
    best_score = np.inf
    best_params = None
    
    gkf = GroupKFold(n_splits=min(n_splits, circuit_trainval.nunique()))
    n_configs = len(list(ParameterGrid(param_grid)))
    
    print(f"  Running GroupKFold CV ({n_splits} splits, {n_configs} configs, grouped by CIRCUIT)...")
    
    for i, params in enumerate(ParameterGrid(param_grid), 1):
        fold_scores = []
        
        for tr_idx, va_idx in gkf.split(X_trainval, y_trainval, groups=circuit_trainval):
            # Fit WITHOUT sample weights (HGB doesn't support them reliably)
            pipeline.set_params(**params)
            pipeline.fit(X_trainval.iloc[tr_idx], y_trainval.iloc[tr_idx])
            
            # Predict on validation fold
            yp = pipeline.predict(X_trainval.iloc[va_idx])
            
            # Compute per-circuit MAE for this fold
            circuit_va = circuit_trainval.iloc[va_idx]
            per_circuit = []
            for cid in circuit_va.unique():
                mask_circuit = (circuit_va == cid).values
                if mask_circuit.sum() > 0:
                    per_circuit.append(mean_absolute_error(
                        y_trainval.iloc[va_idx].values[mask_circuit], 
                        yp[mask_circuit]
                    ))
            
            fold_scores.append(float(np.median(per_circuit)))
        
        # Median across folds
        score = float(np.median(fold_scores))
        
        if score < best_score:
            best_score = score
            best_params = params
        
        if i % 50 == 0 or i == n_configs:
            print(f"    [{i}/{n_configs}] Best CV: {best_score:.4f}s")
    
    print(f"  ✓ Best CV score: {best_score:.4f}s")
    return best_params, best_score

# =========================================================
# HYPERPARAMETER GRID FOR HGB
# =========================================================
# param_hgb = {
#     "model__max_depth": [3, 5, None],
#     "model__learning_rate": [0.05, 0.1],
#     "model__max_iter": [300, 600],
#     "model__min_samples_leaf": [20, 50],
# }

#More extensive grid (commented):
param_hgb = {
    "model__max_depth": [3, 5, 7, None],
    "model__learning_rate": [0.01, 0.05, 0.1, 0.2],
    "model__max_iter": [100, 300, 600, 1000],
    "model__min_samples_leaf": [20, 50, 100],
    "model__l2_regularization": [0.0, 0.1, 1.0],
}


n_configs_hgb = len(list(ParameterGrid(param_hgb)))
print(f"\n\nHyperparameter grid - HistGradientBoosting: {n_configs_hgb} configs\n")


# RUN HGB EXPERIMENTS (using same feature sets as DT/RF)

hgb_results = []

for set_name, num_feats in [("Tyre+Weather+State", feat_baseline), 
                             ("Tyre+Weather+State+Geometry", feat_with_geom)]:
    print(f"\n{set_name}")
    
    X_trainval = df_trainval[num_feats + CAT].copy()
    y_trainval = df_trainval[TARGET].astype(float).copy()
    circuit_trainval = df_trainval['circuit_id'].copy()
    
    mask_trainval = y_trainval.notna()
    X_trainval = X_trainval.loc[mask_trainval]
    y_trainval = y_trainval.loc[mask_trainval]
    circuit_trainval = circuit_trainval.loc[mask_trainval]
    
    preprocessor = make_preprocessor(num_feats, CAT)
    pipeline = Pipeline([
        ("preprocess", preprocessor),
        ("model", HistGradientBoostingRegressor(random_state=42))
    ])
    
    print(f"Tuning ({len(X_trainval):,} train samples)...")
    best_params, best_cv_score = tune_hgb_with_groupkfold_cv(
        pipeline=pipeline,
        param_grid=param_hgb,
        X_trainval=X_trainval,
        y_trainval=y_trainval,
        circuit_trainval=circuit_trainval,
        n_splits=N_CV_SPLITS
    )
    
    # Refit and test
    pipeline.set_params(**best_params)
    pipeline.fit(X_trainval, y_trainval)
    
    X_test = df_test_sorted[num_feats + CAT].copy()
    y_test = df_test_sorted[TARGET].astype(float).copy()
    test_pred_all = pipeline.predict(X_test)
    
    mask = y_test.notna()
    test_pred = test_pred_all[mask]
    y_test_valid = y_test[mask]
    
    test_mae = float(mean_absolute_error(y_test_valid, test_pred))
    test_rmse = float(np.sqrt(mean_squared_error(y_test_valid, test_pred)))
    test_r2 = float(r2_score(y_test_valid, test_pred))
    
    # Per-circuit MAE
    per_circuit_mae = []
    circuit_names = []
    for circuit in df_test_sorted['name'].unique():
        circuit_mask = (df_test_sorted['name'] == circuit).values
        circuit_mask = circuit_mask & mask.values
        if circuit_mask.sum() > 0:
            circuit_mae = mean_absolute_error(
                df_test_sorted[TARGET].values[circuit_mask], 
                test_pred_all[circuit_mask]
            )
            per_circuit_mae.append(circuit_mae)
            circuit_names.append(str(circuit))
    
    median_mae_per_circuit = float(np.median(per_circuit_mae))
    n_iter = pipeline.named_steps['model'].n_iter_
    
    print(f"\nResults:")
    print(f"  CV MAE (median per-circuit): {best_cv_score:.4f}s")
    print(f"  Test MAE:                    {test_mae:.4f}s")
    print(f"  Test RMSE:                   {test_rmse:.4f}s")
    print(f"  Test R²:                     {test_r2:.4f}")
    print(f"  Test Median MAE/circuit:     {median_mae_per_circuit:.4f}s")
    print(f"  N iterations:                {n_iter}")
    
    hgb_results.append({
        "block": set_name,
        "n_features": len(num_feats) + len(CAT),
        "CV_MAE_per_circuit": best_cv_score,
        "test_MAE": test_mae,
        "test_RMSE": test_rmse,
        "test_R2": test_r2,
        "test_median_MAE_per_circuit": median_mae_per_circuit,
        "n_iter": n_iter,
        "best_params": json.dumps(best_params),
    })
    
    # Save per-circuit MAE
    per_circuit_df = pd.DataFrame({
        'Circuit': circuit_names,
        'MAE': per_circuit_mae
    }).sort_values('MAE', ascending=False)
    per_circuit_path = os.path.join(OUTDIR, f"hgb_{set_name.replace('+', '_')}_per_circuit_mae.csv")
    per_circuit_df.to_csv(per_circuit_path, index=False)
    
    # Save model config as JSON
    model_info = {
        "model": "HistGradientBoosting",
        "feature_set": set_name,
        "n_features": len(num_feats) + len(CAT),
        "best_params": best_params,
        "cv_mae_median_per_circuit": float(best_cv_score),
        "test_mae": float(test_mae),
        "test_rmse": float(test_rmse),
        "test_r2": float(test_r2),
        "test_median_mae_per_circuit": float(median_mae_per_circuit),
        "n_iter": int(n_iter),
    }
    model_json_path = os.path.join(OUTDIR, f"hgb_{set_name.replace('+', '_')}_config.json")
    with open(model_json_path, 'w') as f:
        json.dump(model_info, f, indent=2)
    print(f"Saved: {model_json_path}")



HIST GRADIENT BOOSTING - Circuit-Disjoint Evaluation (GroupKFold CV)


Hyperparameter grid - HistGradientBoosting: 576 configs


Tyre+Weather+State
Tuning (33,420 train samples)...
  Running GroupKFold CV (5 splits, 576 configs, grouped by CIRCUIT)...
    [50/576] Best CV: 0.3621s
    [100/576] Best CV: 0.3618s
    [150/576] Best CV: 0.3618s
    [200/576] Best CV: 0.3618s
    [250/576] Best CV: 0.3616s
    [300/576] Best CV: 0.3616s
    [350/576] Best CV: 0.3616s
    [400/576] Best CV: 0.3616s
    [450/576] Best CV: 0.3616s
    [500/576] Best CV: 0.3616s
    [550/576] Best CV: 0.3616s
    [576/576] Best CV: 0.3616s
  ✓ Best CV score: 0.3616s

Results:
  CV MAE (median per-circuit): 0.3616s
  Test MAE:                    0.3348s
  Test RMSE:                   0.4989s
  Test R²:                     0.3020
  Test Median MAE/circuit:     0.3182s
  N iterations:                100
Saved: csv_output/nonlinear/hgb_Tyre_Weather_State_config.json

Tyre+Weather+State+Geometry
Tuning (33,420 tr

In [5]:

# Hist Gradient Boosting summary

print("\n" + "="*100)
print("HIST GRADIENT BOOSTING - COMPARISON")
print("="*100)

hgb_summary = pd.DataFrame(hgb_results)
hgb_summary_path = os.path.join(OUTDIR, "hgb_comparison.csv")
hgb_summary.to_csv(hgb_summary_path, index=False)

print("\n" + hgb_summary[['block', 'test_MAE', 'test_RMSE', 'test_R2', 'n_iter']].to_string(index=False))

baseline_hgb = hgb_summary.iloc[0]
with_geom_hgb = hgb_summary.iloc[1]
diff_mae_hgb = with_geom_hgb['test_MAE'] - baseline_hgb['test_MAE']
pct_change_hgb = (diff_mae_hgb / baseline_hgb['test_MAE']) * 100

print(f"\nBaseline MAE:      {baseline_hgb['test_MAE']:.4f}s")
print(f"+Geometry MAE:     {with_geom_hgb['test_MAE']:.4f}s")
print(f"Difference:        {diff_mae_hgb:+.4f}s ({pct_change_hgb:+.1f}%)")

if abs(diff_mae_hgb) < 0.0001:
    print("→ Geometry: NEUTRAL")
elif diff_mae_hgb < 0:
    print(f"→ Geometry: HELPS by {abs(pct_change_hgb):.1f}%")
else:
    print(f"→ Geometry: HURTS by {pct_change_hgb:.1f}%")

print(f"\n✓ All HGB results saved to {OUTDIR}/")



HIST GRADIENT BOOSTING - COMPARISON


NameError: name 'hgb_results' is not defined

In [13]:
# =========================================================
# CATBOOST REGRESSOR
# =========================================================

print("\n\n" + "="*100)
print("CATBOOST - Circuit-Disjoint Evaluation (GroupKFold CV)")
print("="*100)

# =========================================================
# HELPER FUNCTION FOR CATBOOST (no sample_weight in sklearn pipeline)
# =========================================================
def tune_catboost_with_groupkfold_cv(param_grid, X_trainval, y_trainval, circuit_trainval, 
                                      n_splits=5, categorical_features=None):
    """
    GroupKFold CV for CatBoost (natively handles categorical features).
    Returns best params based on median MAE per circuit across folds.
    Groups by CIRCUIT for unseen-circuit evaluation.
    """
    best_score = np.inf
    best_params = None
    
    gkf = GroupKFold(n_splits=min(n_splits, circuit_trainval.nunique()))
    n_configs = len(list(ParameterGrid(param_grid)))
    
    print(f"  Running GroupKFold CV ({n_splits} splits, {n_configs} configs, grouped by CIRCUIT)...")
    
    for i, params in enumerate(ParameterGrid(param_grid), 1):
        fold_scores = []
        
        for tr_idx, va_idx in gkf.split(X_trainval, y_trainval, groups=circuit_trainval):
            # Prepare data for this fold
            X_tr = X_trainval.iloc[tr_idx].copy()
            y_tr = y_trainval.iloc[tr_idx].copy()
            X_va = X_trainval.iloc[va_idx].copy()
            y_va = y_trainval.iloc[va_idx].copy()
            
            # Train CatBoost with early stopping
            model = CatBoostRegressor(
                random_state=42,
                verbose=0,
                iterations=3000,
                early_stopping_rounds=100,
                **params
            )
            
            # Fit with validation set and categorical features
            if categorical_features:
                model.fit(
                    X_tr, y_tr,
                    cat_features=categorical_features,
                    eval_set=(X_va, y_va),
                    use_best_model=True,
                    verbose=False
                )
            else:
                model.fit(
                    X_tr, y_tr,
                    eval_set=(X_va, y_va),
                    use_best_model=True,
                    verbose=False
                )
            
            # Predict on validation fold
            yp = model.predict(X_va)
            
            # Compute per-circuit MAE for this fold
            circuit_va = circuit_trainval.iloc[va_idx]
            per_circuit = []
            for cid in circuit_va.unique():
                mask_circuit = (circuit_va == cid).values
                if mask_circuit.sum() > 0:
                    per_circuit.append(mean_absolute_error(
                        y_va.values[mask_circuit], 
                        yp[mask_circuit]
                    ))
            
            fold_scores.append(float(np.median(per_circuit)))
        
        # Median across folds
        score = float(np.median(fold_scores))
        
        if score < best_score:
            best_score = score
            best_params = params
        
        if i % 50 == 0 or i == n_configs:
            print(f"    [{i}/{n_configs}] Best CV: {best_score:.4f}s")
    
    print(f"  ✓ Best CV score: {best_score:.4f}s")
    return best_params, best_score


# =========================================================
# HYPERPARAMETER GRID FOR CATBOOST
# =========================================================
# param_catboost = {
#     "depth": [4, 6, 8],
#     "learning_rate": [0.05, 0.1],
#     "l2_leaf_reg": [1, 3, 5],
# }

# More extensive grid (commented, with early stopping iterations can be higher):
param_catboost = {
    "depth": [4, 6, 8, 10],
    "learning_rate": [0.01, 0.03, 0.05, 0.1],
    "l2_leaf_reg": [1, 3, 5, 10],
    "random_strength": [0.5, 1.0],
}

n_configs_catboost = len(list(ParameterGrid(param_catboost)))
print(f"\nHyperparameter grid - CatBoost: {n_configs_catboost} configs (with early stopping)\n")

# =========================================================
# SETUP (if running CatBoost separately)
# =========================================================
# Define feature sets and data if not already in memory
try:
    feat_baseline
except NameError:
    print("Setting up features and data...")
    
    # Load data
    df_train = pd.read_excel(TRAIN_PATH)
    df_val = pd.read_excel(VAL_PATH)
    df_test = pd.read_excel(TEST_PATH)
    
    # Ensure is_new_tyre
    for df in [df_train, df_val, df_test]:
        if 'is_new_tyre' not in df.columns and 'TyreAgeAtStart' in df.columns:
            df['is_new_tyre'] = (df['TyreAgeAtStart'] == 0).astype(int)
    
    # Create circuit_id for grouping
    for df in [df_train, df_val, df_test]:
        df['race_id'] = (
            df['year'].astype(str) + '_' + 
            df['round'].astype(str) + '_' + 
            df['name'].astype(str)
        )
        df['circuit_id'] = df['name'].astype(str)
    
    # Prepare features
    num_tyre = keep_existing(NUM_TYRE, df_train)
    num_weather = keep_existing(NUM_WEATHER, df_train)
    num_state = keep_existing(NUM_STATE, df_train)
    num_geometry = [c for c in GEOM_COLS_ALL if c in df_train.columns and c != "hb_at_end_of_max"]
    CAT = keep_existing(CAT_FEATURES, df_train)
    
    feat_baseline = num_tyre + num_weather + num_state
    feat_with_geom = num_tyre + num_weather + num_state + num_geometry
    
    # Combine train+val
    df_trainval = pd.concat([df_train, df_val], axis=0, ignore_index=True)
    
    # Sort test
    sort_cols = ['year', 'round', 'name']
    if 'lap_number' in df_test.columns:
        sort_cols.append('lap_number')
    elif 'LapNumber' in df_test.columns:
        sort_cols.append('LapNumber')
    df_test_sorted = df_test.sort_values(sort_cols).reset_index(drop=True)
    
    print("✓ Features and data setup complete")



# =========================================================
# RUN CATBOOST EXPERIMENTS (using same feature sets as DT/RF/HGB)
# =========================================================
catboost_results = []

for set_name, num_feats in [("Tyre+Weather+State", feat_baseline), 
                             ("Tyre+Weather+State+Geometry", feat_with_geom)]:
    print(f"\n{set_name}")
    
    # For CatBoost, we work with raw data (not preprocessed)
    # Separate numerical and categorical
    X_trainval_raw = df_trainval[num_feats + CAT].copy()
    y_trainval_raw = df_trainval[TARGET].astype(float).copy()
    circuit_trainval_raw = df_trainval['circuit_id'].copy()
    
    mask_trainval = y_trainval_raw.notna()
    X_trainval_raw = X_trainval_raw.loc[mask_trainval]
    y_trainval_raw = y_trainval_raw.loc[mask_trainval]
    circuit_trainval_raw = circuit_trainval_raw.loc[mask_trainval]
    
    # Ensure categorical columns are strings (CatBoost requirement)
    for c in CAT:
        if c in X_trainval_raw.columns:
            X_trainval_raw[c] = X_trainval_raw[c].astype(str)
    
    # Get categorical feature indices
    cat_indices = [X_trainval_raw.columns.get_loc(c) for c in CAT if c in X_trainval_raw.columns]
    
    print(f"Tuning ({len(X_trainval_raw):,} train samples, {len(cat_indices)} categorical features)...")
    best_params, best_cv_score = tune_catboost_with_groupkfold_cv(
        param_grid=param_catboost,
        X_trainval=X_trainval_raw,
        y_trainval=y_trainval_raw,
        circuit_trainval=circuit_trainval_raw,
        n_splits=N_CV_SPLITS,
        categorical_features=cat_indices if cat_indices else None
    )
    
    # Refit and test
    final_model = CatBoostRegressor(
        random_state=42,
        verbose=0,
        iterations=3000,
        early_stopping_rounds=100,
        **best_params
    )
    
    # Prepare validation data for early stopping in final refit
    X_val_raw = df_val[num_feats + CAT].copy()
    y_val_raw = df_val[TARGET].astype(float).copy()
    
    # Ensure categorical columns are strings in validation set
    for c in CAT:
        if c in X_val_raw.columns:
            X_val_raw[c] = X_val_raw[c].astype(str)
    
    # Apply mask for valid targets in validation set
    mask_val = y_val_raw.notna()
    X_val_raw = X_val_raw.loc[mask_val]
    y_val_raw = y_val_raw.loc[mask_val]
    
    # Compute circuit-balanced sample weights
    vc = circuit_trainval_raw.value_counts()
    w_trainval = circuit_trainval_raw.map(lambda c: 1.0 / vc.loc[c]).values
    
    # Prepare test data with same categorical handling
    X_test_raw = df_test_sorted[num_feats + CAT].copy()
    y_test_raw = df_test_sorted[TARGET].astype(float).copy()
    
    # Ensure categorical columns are strings in test set too
    for c in CAT:
        if c in X_test_raw.columns:
            X_test_raw[c] = X_test_raw[c].astype(str)
    
    # Fit with validation set, early stopping, and circuit-balanced weights
    if cat_indices:
        final_model.fit(
            X_trainval_raw, y_trainval_raw,
            cat_features=cat_indices,
            sample_weight=w_trainval,
            eval_set=(X_val_raw, y_val_raw),
            use_best_model=True,
            verbose=False
        )
    else:
        final_model.fit(
            X_trainval_raw, y_trainval_raw,
            sample_weight=w_trainval,
            eval_set=(X_val_raw, y_val_raw),
            use_best_model=True,
            verbose=False
        )
    
    test_pred_all = final_model.predict(X_test_raw)
    
    mask = y_test_raw.notna()
    test_pred = test_pred_all[mask]
    y_test_valid = y_test_raw[mask]
    
    test_mae = float(mean_absolute_error(y_test_valid, test_pred))
    test_rmse = float(np.sqrt(mean_squared_error(y_test_valid, test_pred)))
    test_r2 = float(r2_score(y_test_valid, test_pred))
    
    # Per-circuit MAE (using circuit_id for consistency with CV grouping)
    test_circuit_col = 'circuit_id' if 'circuit_id' in df_test_sorted.columns else 'name'
    per_circuit_mae = []
    circuit_names = []
    for circuit in df_test_sorted[test_circuit_col].unique():
        circuit_mask = (df_test_sorted[test_circuit_col] == circuit).values
        circuit_mask = circuit_mask & mask.values
        if circuit_mask.sum() > 0:
            circuit_mae = mean_absolute_error(
                df_test_sorted[TARGET].values[circuit_mask], 
                test_pred_all[circuit_mask]
            )
            per_circuit_mae.append(circuit_mae)
            circuit_names.append(str(circuit))
    
    median_mae_per_circuit = float(np.median(per_circuit_mae))
    n_trees = final_model.tree_count_
    
    print(f"\nResults:")
    print(f"  CV MAE (median per-circuit): {best_cv_score:.4f}s")
    print(f"  Test MAE:                    {test_mae:.4f}s")
    print(f"  Test RMSE:                   {test_rmse:.4f}s")
    print(f"  Test R²:                     {test_r2:.4f}")
    print(f"  Test Median MAE/circuit:     {median_mae_per_circuit:.4f}s")
    print(f"  Trees used:                  {n_trees}")
    
    catboost_results.append({
        "block": set_name,
        "n_features": len(num_feats) + len(CAT),
        "CV_MAE_per_circuit": best_cv_score,
        "test_MAE": test_mae,
        "test_RMSE": test_rmse,
        "test_R2": test_r2,
        "test_median_MAE_per_circuit": median_mae_per_circuit,
        "n_trees": n_trees,
        "best_params": json.dumps(best_params),
    })
    
    # Save per-circuit MAE
    per_circuit_df = pd.DataFrame({
        'Circuit': circuit_names,
        'MAE': per_circuit_mae
    }).sort_values('MAE', ascending=False)
    per_circuit_path = os.path.join(OUTDIR, f"catboost_{set_name.replace('+', '_')}_per_circuit_mae.csv")
    per_circuit_df.to_csv(per_circuit_path, index=False)
    
    # Save model config as JSON
    model_info = {
        "model": "CatBoost",
        "feature_set": set_name,
        "n_features": len(num_feats) + len(CAT),
        "best_params": best_params,
        "cv_mae_median_per_circuit": float(best_cv_score),
        "test_mae": float(test_mae),
        "test_rmse": float(test_rmse),
        "test_r2": float(test_r2),
        "test_median_mae_per_circuit": float(median_mae_per_circuit),
        "n_trees": int(n_trees),
    }
    model_json_path = os.path.join(OUTDIR, f"catboost_{set_name.replace('+', '_')}_config.json")
    with open(model_json_path, 'w') as f:
        json.dump(model_info, f, indent=2)
    print(f"Saved: {model_json_path}")




CATBOOST - Circuit-Disjoint Evaluation (GroupKFold CV)

Hyperparameter grid - CatBoost: 128 configs (with early stopping)


Tyre+Weather+State
Tuning (33,420 train samples, 3 categorical features)...
  Running GroupKFold CV (5 splits, 128 configs, grouped by CIRCUIT)...
    [50/128] Best CV: 0.3527s
    [100/128] Best CV: 0.3527s
    [128/128] Best CV: 0.3527s
  ✓ Best CV score: 0.3527s

Results:
  CV MAE (median per-circuit): 0.3527s
  Test MAE:                    1.0259s
  Test RMSE:                   1.6625s
  Test R²:                     -6.7519
  Test Median MAE/circuit:     0.3772s
  Trees used:                  2998
Saved: csv_output/nonlinear/catboost_Tyre_Weather_State_config.json

Tyre+Weather+State+Geometry
Tuning (33,420 train samples, 3 categorical features)...
  Running GroupKFold CV (5 splits, 128 configs, grouped by CIRCUIT)...
    [50/128] Best CV: 0.3498s
    [100/128] Best CV: 0.3498s
    [128/128] Best CV: 0.3498s
  ✓ Best CV score: 0.3498s

Results:
  CV MAE (med

In [None]:

# =========================================================
# CATBOOST SUMMARY
# =========================================================
print("\n" + "="*100)
print("CATBOOST - COMPARISON")
print("="*100)

catboost_summary = pd.DataFrame(catboost_results)
catboost_summary_path = os.path.join(OUTDIR, "catboost_comparison.csv")
catboost_summary.to_csv(catboost_summary_path, index=False)

print("\n" + catboost_summary[['block', 'test_MAE', 'test_RMSE', 'test_R2', 'n_trees']].to_string(index=False))

baseline_catboost = catboost_summary.iloc[0]
with_geom_catboost = catboost_summary.iloc[1]
diff_mae_catboost = with_geom_catboost['test_MAE'] - baseline_catboost['test_MAE']
pct_change_catboost = (diff_mae_catboost / baseline_catboost['test_MAE']) * 100

print(f"\nBaseline MAE:      {baseline_catboost['test_MAE']:.4f}s")
print(f"+Geometry MAE:     {with_geom_catboost['test_MAE']:.4f}s")
print(f"Difference:        {diff_mae_catboost:+.4f}s ({pct_change_catboost:+.1f}%)")

if abs(diff_mae_catboost) < 0.0001:
    print("→ Geometry: NEUTRAL")
elif diff_mae_catboost < 0:
    print(f"→ Geometry: HELPS by {abs(pct_change_catboost):.1f}%")
else:
    print(f"→ Geometry: HURTS by {pct_change_catboost:.1f}%")

print(f"\n✓ All CatBoost results saved to {OUTDIR}/")

