In [None]:
"""
MLB Contract Regression Sweep, set to a 2 hour time limit
Using RandomizedSearchCV to find XGBoost parameters
Ensures model runs until params are found or early stopping is implemented

Most of the functions are replicated from the baseline model
"""

In [None]:
# package imports
from __future__ import annotations

import time
import warnings
warnings.filterwarnings("ignore")

from typing import Dict, List, Tuple
from itertools import product

import numpy as np
import pandas as pd

from sklearn.compose import ColumnTransformer, TransformedTargetRegressor
from sklearn.impute import SimpleImputer
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.model_selection import TimeSeriesSplit
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OneHotEncoder, StandardScaler

from xgboost import XGBRegressor

In [None]:
# Configuration

DATA_PATH = r"contracts_with_isi_v2_SWEEP_WIDE_WITH_KEYS_PLUS_CPI.csv"
BAT_RATES_PATH = r"batting_rates_by_season.csv"
PIT_RATES_PATH = r"pitching_rates_by_season.csv"
DEF_STATS_PATH = r"defensive_stats.csv"
STATCAST_PIT_PATH = r"statcast_pitching_2015_2025.csv"

# Output path for results and model weights
OUT_RESULTS = r"regression_xgb_2hour_results.csv"

# use this path if pkl file is necessary, csv results save model params
OUT_BEST_MODEL = r"best_xgb_model.pkl"

# establishes time limit for how long the model can run
TIME_LIMIT_HOURS = 2.0
RANDOM_STATE = 42

# Time split
TRAIN_YEARS = [2020, 2021, 2022, 2023]
TEST_YEARS = [2024, 2025]

# Contract filters
# utilzied in config rather than further down, as in original baseline model
# max years filtered max contract term
# top pctl filters top_n percent of contract AAV
MAX_YEARS = 5
REMOVE_TOP_PCTL = 0.95

# Target
REG_TARGET = "guarantee_real_per_year_2025"


In [None]:
# Hyperparameter Grid Search
# total of 614,400 potential combinations

PARAM_GRID = {
    'n_estimators': [500, 1000, 1500, 2000, 2500, 3000],
    'learning_rate': [0.005, 0.01, 0.02, 0.03, 0.05],
    'max_depth': [3, 4, 5, 6],
    'min_child_weight': [3, 5, 7, 10],                  
    'subsample': [0.7, 0.8, 0.85, 0.9],                 
    'colsample_bytree': [0.7, 0.8, 0.85, 0.9],         
    'reg_lambda': [0.5, 1.0, 1.5, 2.0, 3.0],            
    'reg_alpha': [0.0, 0.1, 0.5, 1.0],                  
    'gamma': [0, 0.1, 0.3, 0.5],                        
}


# Prints total number of combos
total_combos = np.prod(
    [len(v) for v in PARAM_GRID.values()]
)

print(f"Total possible combos: {total_combos:,}")


In [None]:
# Helper functions

# drop dups, preserves order
def unique_list(seq):
    return list(dict.fromkeys(seq))

# drop dup col names
def dedupe_columns(df: pd.DataFrame) -> pd.DataFrame:
    return df.loc[:, ~df.columns.duplicated()].copy()

def is_pitcher(pos) -> int:
    PITCHER_PREFIXES = ("P", "SP", "RP", "RHP", "LHP")
    if pd.isna(pos):
        return 0
    s = str(pos).strip().upper()
    # handles pitcher positional variations
    return int(s.startswith(PITCHER_PREFIXES) or ("RHP" in s) or ("LHP" in s))

def time_split(df: pd.DataFrame, year_col: str = "year") -> Tuple[pd.DataFrame, pd.DataFrame]:
    y = pd.to_numeric(df[year_col], errors="coerce").astype("Int64")
    train = df[y.isin(TRAIN_YEARS)].copy()
    test = df[y.isin(TEST_YEARS)].copy()
    return train, test

# Data integrity check
def regression_metrics(y_true: np.ndarray, y_pred: np.ndarray) -> Dict[str, float]:
    y_true = np.asarray(y_true, dtype=float)
    y_pred = np.asarray(y_pred, dtype=float)
    
    mask = np.isfinite(y_true) & np.isfinite(y_pred)
    if mask.sum() == 0:
        raise ValueError("No y_true/y_pred pairs found post-filtering.")
    
    y_true = y_true[mask]
    y_pred = y_pred[mask]
    
    rmse = float(np.sqrt(mean_squared_error(y_true, y_pred)))
    mae = float(mean_absolute_error(y_true, y_pred))
    r2 = float(r2_score(y_true, y_pred))
    
    return {"RMSE": rmse, "MAE": mae, "R2": r2}

def safe_cols(df: pd.DataFrame, col_list: List[str]) -> List[str]:
    return [c for c in col_list if c in df.columns]

In [None]:
# Data Loader

df = pd.read_csv(DATA_PATH, low_memory=False)
print(f"Loaded contracts: {len(df)} rows")

df = df[pd.to_numeric(df["years_int"], errors="coerce") <= MAX_YEARS].copy()
print(f"After filtering years_int <= {MAX_YEARS}: {len(df)} rows")

df["year"] = pd.to_numeric(df["term_start_year"], errors="coerce").astype("Int64")
df["is_pitcher_flag"] = df["position"].apply(is_pitcher)

bat_rates = pd.read_csv(BAT_RATES_PATH, low_memory=False)
pit_rates = pd.read_csv(PIT_RATES_PATH, low_memory=False)
def_stats = pd.read_csv(DEF_STATS_PATH, low_memory=False)
sc_pit = pd.read_csv(STATCAST_PIT_PATH, low_memory=False)

# page split
print("\n" + "="*80)
print("BUILDING PRE-WINDOW FEATURES")
print("="*80)

def _safe_numeric(df: pd.DataFrame, cols: List[str]) -> pd.DataFrame:
    out = df.copy()
    for c in cols:
        if c in out.columns:
            out[c] = pd.to_numeric(out[c], errors="coerce")
    return out

def _weighted_mean(series: pd.Series, weights: pd.Series) -> float:
    s = pd.to_numeric(series, errors="coerce")
    w = pd.to_numeric(weights, errors="coerce").fillna(0.0)
    mask = np.isfinite(s) & np.isfinite(w) & (w > 0)
    if mask.sum() == 0:
        s2 = s[np.isfinite(s)]
        return float(s2.mean()) if len(s2) else np.nan
    return float(np.average(s[mask], weights=w[mask]))

def add_pre_rate_features(
    contracts: pd.DataFrame,
    season_rates: pd.DataFrame,
    *,
    rate_cols: List[str],
    weight_col: str | None,
    prefix: str,
    pre_years: int = 3,
) -> pd.DataFrame:
    """
    Pre-window aggregate computations for seasons in window
    """    
    dfc = contracts.copy()
    dfc["key_fangraphs"] = pd.to_numeric(dfc["key_fangraphs"], errors="coerce")
    dfc["year"] = pd.to_numeric(dfc["year"], errors="coerce")
    dfc["_row_id"] = np.arange(len(dfc), dtype=int)

    dfs = season_rates.copy()
    dfs["playerId"] = pd.to_numeric(dfs["playerId"], errors="coerce")
    dfs["Season"] = pd.to_numeric(dfs["Season"], errors="coerce")

    dfs = _safe_numeric(dfs, rate_cols + ([weight_col] if weight_col else []))

    m = dfc[["_row_id", "key_fangraphs", "year"]].merge(
        dfs,
        left_on="key_fangraphs",
        right_on="playerId",
        how="left",
    )

    m["lb_start"] = m["year"] - pre_years
    m["lb_end"] = m["year"] - 1
    m = m[m["Season"].between(m["lb_start"], m["lb_end"], inclusive="both")].copy()

    out = dfc.copy()

    cov = m.groupby("_row_id")["Season"].nunique().rename(f"{prefix}_pre_seasons")
    out = out.merge(cov, left_on="_row_id", right_index=True, how="left")
    out[f"{prefix}_pre_seasons"] = out[f"{prefix}_pre_seasons"].fillna(0).astype(int)

    if weight_col and weight_col in m.columns:
        rel_sum = m.groupby("_row_id")[weight_col].sum(min_count=1).rename(f"{prefix}_pre_reliability_sum")
        out = out.merge(rel_sum, left_on="_row_id", right_index=True, how="left")
        out[f"{prefix}_pre_reliability_sum"] = pd.to_numeric(out[f"{prefix}_pre_reliability_sum"], errors="coerce").fillna(0.0)

    for rc in rate_cols:
        feat_name = f"{prefix}_pre_{rc}"
        if rc not in m.columns:
            out[feat_name] = np.nan
            continue

        if weight_col and weight_col in m.columns:
            agg = m.groupby("_row_id").apply(lambda g: _weighted_mean(g[rc], g[weight_col])).rename(feat_name)
        else:
            agg = m.groupby("_row_id")[rc].mean().rename(feat_name)

        out = out.merge(agg, left_on="_row_id", right_index=True, how="left")

    out[f"has_{prefix}_pre"] = (out[f"{prefix}_pre_seasons"] > 0).astype(int)
    out = out.drop(columns=["_row_id"])
    return out

In [None]:
# apply pre panel features

def add_pre_panel_features(
    contracts: pd.DataFrame,
    panel: pd.DataFrame,
    *,
    contract_key_col: str,
    panel_key_col: str,
    contract_year_col: str,
    panel_year_col: str,
    feature_cols: List[str],
    weight_col: str | None,
    prefix: str,
    pre_years: int = 3,
) -> pd.DataFrame:
    """ Generic season panel aggregator (only used for defense and statcast files) """
    # reduced implementation of data integrity checks since data
    # is known to be clean due to success of the baseline model

    # temp cols
    dfc = contracts.copy()
    dfc[contract_key_col] = pd.to_numeric(dfc[contract_key_col], errors="coerce")
    dfc[contract_year_col] = pd.to_numeric(dfc[contract_year_col], errors="coerce")
    dfc["_row_id"] = np.arange(len(dfc), dtype=int)

    dfp = panel.copy()
    dfp[panel_key_col] = pd.to_numeric(dfp[panel_key_col], errors="coerce")
    dfp[panel_year_col] = pd.to_numeric(dfp[panel_year_col], errors="coerce")
    
    dfp = _safe_numeric(dfp, feature_cols + ([weight_col] if weight_col else []))

    m = dfc[["_row_id", contract_key_col, contract_year_col]].merge(
        dfp,
        left_on=contract_key_col,
        right_on=panel_key_col,
        how="left",
        suffixes=("_contract", "_panel")
    )

    # use the panel year column
    year_col_in_m = f"{panel_year_col}_panel" if f"{panel_year_col}_panel" in m.columns else panel_year_col
    contract_year_in_m = f"{contract_year_col}_contract" if f"{contract_year_col}_contract" in m.columns else contract_year_col

    # Apply lookback filter
    m["lb_start"] = m[contract_year_in_m] - pre_years
    m["lb_end"] = m[contract_year_in_m] - 1
    m = m[m[year_col_in_m].between(m["lb_start"], m["lb_end"], inclusive="both")].copy()

    out = dfc.copy()

    cov = m.groupby("_row_id")[year_col_in_m].nunique().rename(f"{prefix}_pre_seasons")
    out = out.merge(cov, left_on="_row_id", right_index=True, how="left")
    out[f"{prefix}_pre_seasons"] = out[f"{prefix}_pre_seasons"].fillna(0).astype(int)

    # Weighted sums
    if weight_col and weight_col in m.columns:
        wsum = m.groupby("_row_id")[weight_col].sum(min_count=1).rename(f"{prefix}_pre_weight_sum")
        out = out.merge(wsum, left_on="_row_id", right_index=True, how="left")
        out[f"{prefix}_pre_weight_sum"] = pd.to_numeric(out[f"{prefix}_pre_weight_sum"], errors="coerce").fillna(0.0)

    # Aggregate features where necessary
    for fc in feature_cols:
        feat_name = f"{prefix}_pre_{fc}"
        if fc not in m.columns:
            out[feat_name] = np.nan
            continue

        if weight_col and weight_col in m.columns:
            agg = m.groupby("_row_id").apply(lambda g: _weighted_mean(g[fc], g[weight_col])).rename(feat_name)
        else:
            agg = m.groupby("_row_id")[fc].mean().rename(feat_name)

        out = out.merge(agg, left_on="_row_id", right_index=True, how="left")

    out[f"has_{prefix}_pre"] = (out[f"{prefix}_pre_seasons"] > 0).astype(int)

    out = out.drop(columns=["_row_id"])
    return out

In [None]:

# cols to exclude
BAT_EXCLUDE = {"playerId", "Season", "Name", "Tm", "PA", "bat_rate_reliability"}
bat_rate_cols = [
    c for c in bat_rates.columns
    if c not in BAT_EXCLUDE
    and not c.endswith("_dup")
]

# cols to exclude
PIT_EXCLUDE = {"playerId", "Season", "Name", "Tm", "IP", "TBF", "pit_rate_reliability"}
pit_rate_cols = [
    c for c in pit_rates.columns
    if c not in PIT_EXCLUDE
    and not c.endswith("_dup")
]

# Defensive stats feature list
def_feature_cols = [c for c in ["defensive_runs_saved", "fielding_percentage", "Errors"] 
                    if c in def_stats.columns]

# Statcast stats feature list
sc_feature_cols = [
    c for c in [
        "fastball_avg_speed",
        "whiff_percent",
        "hard_hit_percent",
        "barrel_batted_rate",
        "exit_velocity_avg",
        "swing_percent",
    ]
    if c in sc_pit.columns
]

# Data integrity
bat_weight_col = "bat_rate_reliability" if "bat_rate_reliability" in bat_rates.columns else None
pit_weight_col = "pit_rate_reliability" if "pit_rate_reliability" in pit_rates.columns else None


In [None]:
# apply features to the dataset using newly created functions above
# prints coverage statement for each add_features function

df = add_pre_rate_features(
    df,
    bat_rates,
    rate_cols=bat_rate_cols,
    weight_col=bat_weight_col,
    prefix="bat",
    pre_years=3,
)
print(f"has_bat_pre: {df['has_bat_pre'].mean():.3f} coverage ({df['has_bat_pre'].sum()}/{len(df)})")

df = add_pre_rate_features(
    df,
    pit_rates,
    rate_cols=pit_rate_cols,
    weight_col=pit_weight_col,
    prefix="pit",
    pre_years=3,
)
print(f"has_pit_pre: {df['has_pit_pre'].mean():.3f} coverage ({df['has_pit_pre'].sum()}/{len(df)})")

df = add_pre_panel_features(
    contracts=df,
    panel=def_stats,
    contract_key_col="key_mlbam",
    panel_key_col="MLBAMID",
    contract_year_col="year",
    panel_year_col="year",
    feature_cols=def_feature_cols,
    weight_col="Innings_played" if "Innings_played" in def_stats.columns else None,
    prefix="def",
    pre_years=3,
)
print(f"has_def_pre: {df['has_def_pre'].mean():.3f} coverage ({df['has_def_pre'].sum()}/{len(df)})")

df = add_pre_panel_features(
    contracts=df,
    panel=sc_pit,
    contract_key_col="key_mlbam",
    panel_key_col="player_id",
    contract_year_col="year",
    panel_year_col="year",
    feature_cols=sc_feature_cols,
    weight_col="pa" if "pa" in sc_pit.columns else None,
    prefix="scpit",
    pre_years=3,
)
print(f"has_scpit_pre: {df['has_scpit_pre'].mean():.3f} coverage ({df['has_scpit_pre'].sum()}/{len(df)})")

df = dedupe_columns(df)

In [None]:

BASE_NUMERIC = ["age_at_signing", "years_int", "opt_out_flag", "year", "is_pitcher_flag"]
BASE_CATEGORICAL = ["position", "qualifying_offer"]

cov_suffixes = ("_pre_seasons", "_pre_reliability_sum", "_pre_weight_sum")
generated_cov_feats = [
    c for c in df.columns
    if c in {"has_bat_pre", "has_pit_pre", "has_def_pre", "has_scpit_pre"}
    or c.endswith(cov_suffixes)
]

PREFIXES = ("bat_pre_", "pit_pre_", "def_pre_", "scpit_pre_")
generated_rate_feats = [
    c for c in df.columns
    if c.startswith(PREFIXES) and c not in generated_cov_feats
]

# builds final feature lists
base_numeric_features = unique_list([
    c for c in (BASE_NUMERIC + generated_rate_feats + generated_cov_feats)
    if c in df.columns
])

categorical_features = unique_list([
    c for c in BASE_CATEGORICAL
    if c in df.columns
])

# print count of numeric and categorical features
print(f"\n[INFO] Base numeric features: {len(base_numeric_features)}")
print(f"[INFO] Categorical features: {len(categorical_features)}")


In [None]:
# ISI inclusion

# ISI variants
# used to test different lambdas (weight decay) and lookback periods
ISI_VARIANTS = [
    {"name": "lb3_l35", "isi": "ISI_lb3_lamdba_35"},
    {"name": "lb3_l50", "isi": "ISI_lb3_lamdba_5"},
    {"name": "lb3_l70", "isi": "ISI_lb3_lamdba_7"},
    {"name": "lb5_l35", "isi": "ISI_lb5_lamdba_35"},
    {"name": "lb5_l50", "isi": "ISI_lb5_lamdba_5"},
    {"name": "lb5_l70", "isi": "ISI_lb5_lamdba_7"},
]

def isi_core_cols(df: pd.DataFrame, isi_col: str) -> List[str]:
    suffix = isi_col.replace("ISI", "", 1)
    candidates = [
        isi_col,
        f"isi_full_window_flag{suffix}",
        f"isi_window_seasons_avail{suffix}",
        f"any_surgery_flag{suffix}",
        f"any_structural_flag{suffix}",
        f"any_tier3_plus_flag{suffix}",
        f"any_tier_3plus_flag{suffix}",
    ]
    return safe_cols(df, candidates)

df_reg = df[pd.notna(df[REG_TARGET])].copy()
train_reg, test_reg = time_split(df_reg, year_col="year")

# apply AAV autoff
aav_cutoff = train_reg[REG_TARGET].quantile(REMOVE_TOP_PCTL)
train_reg_cut = train_reg[train_reg[REG_TARGET] <= aav_cutoff].copy()
test_reg_cut = test_reg[test_reg[REG_TARGET] <= aav_cutoff].copy()

# displays size of training and testing datasets (post AAV cutoff)
print(f"[TOP5%] Cutoff: {aav_cutoff:,.0f}")
print(f"[TOP5%] Train: {len(train_reg_cut)} | Test: {len(test_reg_cut)}")



In [None]:
# Data Preprocessing

def make_preprocessor(numeric_features: List[str], categorical_features: List[str]) -> ColumnTransformer:
    num_pipe = Pipeline([
        ("imputer", SimpleImputer(strategy="median")),
        ("scaler", StandardScaler())
    ])

    cat_pipe = Pipeline([
        ("imputer", SimpleImputer(strategy="most_frequent")),
        ("onehot", OneHotEncoder(handle_unknown="ignore"))
    ])
    
    return ColumnTransformer(
        transformers=[
            ("num", num_pipe, numeric_features),
            ("cat", cat_pipe, categorical_features),
        ],
        remainder="drop"
    )


In [None]:
# Brute force CV Grid Search

def brute_force_search(
    X_train, y_train, X_test, y_test,
    numeric_features, categorical_features,
    variant_name, time_budget_seconds
):
    """Brute force hypterparameters until maximum time runs out"""
    print(f"\n{'='*80}")
    print(f"VARIANT: {variant_name}")
    print(f"{'='*80}")
    print(f"Train: {X_train.shape}, Test: {X_test.shape}")
    print(f"Features: {len(numeric_features)} numeric + {len(categorical_features)} categorical")
    print(f"Time budget: {time_budget_seconds/60:.1f} minutes")
    
    # runs proprocessor function created above
    preprocessor = make_preprocessor(numeric_features, categorical_features)
    
    # generates param combos
    param_keys = list(PARAM_GRID.keys())
    param_values = [PARAM_GRID[k] for k in param_keys]
    all_combinations = list(product(*param_values))

    # shuffle param combos to allow for a randomized grid search
    np.random.seed(RANDOM_STATE)
    np.random.shuffle(all_combinations)
        
    # Results tracker
    results = []
    start_time = time.time()
    best_mae = float('inf')
    best_params = None
    
    # TimeSeriesSplit for CV (refer to documentation for this)
    # CV splits limited to three since the dataset is small (n < 1000 rows)
    tscv = TimeSeriesSplit(n_splits=3)
    
    for i, combo in enumerate(all_combinations):
        # Check time limit
        # prints number of combos tested after the time limit expires
        elapsed = time.time() - start_time
        if elapsed >= time_budget_seconds:
            print(f"\n[TIME LIMIT] Stopped after {elapsed/60:.1f} minutes, tested {i} combinations")
            break
        
        # create param dict to store results
        params = dict(zip(param_keys, combo))
        
        try:
            base_model = XGBRegressor(
                **params,
                random_state=RANDOM_STATE,
                objective="reg:squarederror",
                n_jobs=1,
                verbosity=0
            )
            
            pipeline = Pipeline([
                ("pre", preprocessor),
                ("model", base_model)
            ])
            
            # log transformation
            ttr = TransformedTargetRegressor(
                regressor=pipeline,
                func=np.log1p,
                inverse_func=np.expm1
            )
            
            # CV eval
            cv_maes = []
            for train_idx, val_idx in tscv.split(X_train):
                X_tr, X_val = X_train.iloc[train_idx], X_train.iloc[val_idx]
                y_tr, y_val = y_train.iloc[train_idx], y_train.iloc[val_idx]
                
                ttr.fit(X_tr, y_tr)
                y_pred = ttr.predict(X_val)
                mae = mean_absolute_error(y_val, y_pred)
                cv_maes.append(mae)
            
            cv_mae = np.mean(cv_maes)
            
            # test set eval
            ttr.fit(X_train, y_train)
            y_test_pred = ttr.predict(X_test)
            test_metrics = regression_metrics(y_test, y_test_pred)
            
            # records best results
            # prints an update statement on improved results
            if test_metrics['MAE'] < best_mae:
                best_mae = test_metrics['MAE']
                best_params = params
                print(f"\n[NEW BEST] Iteration {i+1}/{len(all_combinations)}")
                print(f"  CV MAE: ${cv_mae:,.0f}")
                print(f"  Test MAE: ${test_metrics['MAE']:,.0f}")
                print(f"  Test RÂ²: {test_metrics['R2']:.4f}")
                print(f"  Params: {params}")
            
            # append results
            results.append({
                'cv_mae': cv_mae,
                'test_mae': test_metrics['MAE'],
                'test_rmse': test_metrics['RMSE'],
                'test_r2': test_metrics['R2'],
                **params
            })
            
            # Displays update every 10 iters
            if (i + 1) % 10 == 0:
                elapsed = time.time() - start_time
                rate = (i + 1) / elapsed
                remaining = time_budget_seconds - elapsed
                est_remaining_iters = int(rate * remaining)
                print(f"  [{i+1:4d}] Elapsed: {elapsed/60:.1f}m | Rate: {rate:.1f} iter/s | Est. remaining: {est_remaining_iters} iters")
        
        # prevents system crash if a model fails
        except Exception as e:
            print(f"  [ERROR] Iteration {i+1}: {e}")
            continue
    
    elapsed_total = time.time() - start_time
    print(f"\n{'='*80}")
    print(f"COMPLETED {variant_name}")
    print(f"{'='*80}")
    print(f"Tested {len(results)} combinations in {elapsed_total/60:.1f} minutes")
    print(f"Best Test MAE: ${best_mae:,.0f}")
    print(f"Best Params: {best_params}")
    
    return {
        "variant": variant_name,
        "best_params": best_params,
        "best_test_mae": best_mae,
        "n_tested": len(results),
        "elapsed_seconds": elapsed_total,
        "all_results": results
    }

In [None]:
# Brute force main run

# time limit rules
total_seconds = TIME_LIMIT_HOURS * 3600
n_variants = 1 + len(ISI_VARIANTS)
time_per_variant = total_seconds / n_variants

all_variant_results = []
overall_start = time.time()

# BASELINE model params
baseline_features = [f for f in base_numeric_features + categorical_features 
                     if f in train_reg_cut.columns and f in test_reg_cut.columns]
baseline_numeric = [f for f in base_numeric_features if f in train_reg_cut.columns]

# tests baseline model (attempts to improve original baseline results)
try:
    result = brute_force_search(
        train_reg_cut[baseline_features],
        train_reg_cut[REG_TARGET],
        test_reg_cut[baseline_features],
        test_reg_cut[REG_TARGET],
        baseline_numeric,
        categorical_features,
        "BASELINE",
        time_per_variant
    )
    all_variant_results.append(result)
except Exception as e:
    print(f"ERROR in BASELINE: {e}")
    import traceback
    traceback.print_exc()

# tests ISI variant models
for isi_var in ISI_VARIANTS:
    elapsed_total = time.time() - overall_start
    if elapsed_total >= total_seconds:
        print(f"\n[GLOBAL TIME LIMIT] Stopping after {elapsed_total/3600:.2f} hours")
        break
    
    isi_col = isi_var["isi"]
    variant_name = f"ISI_CORE_{isi_var['name']}"
    
    isi_cols = isi_core_cols(df_reg, isi_col)
    if not isi_cols:
        print(f"[SKIP] {variant_name}: ISI columns not found")
        continue
    
    numeric_feats_isi = unique_list(base_numeric_features + isi_cols)
    all_feats_isi = numeric_feats_isi + categorical_features
    
    available_features = [f for f in all_feats_isi if f in train_reg_cut.columns and f in test_reg_cut.columns]
    available_numeric = [f for f in numeric_feats_isi if f in train_reg_cut.columns]
    
    try:
        result = brute_force_search(
            train_reg_cut[available_features],
            train_reg_cut[REG_TARGET],
            test_reg_cut[available_features],
            test_reg_cut[REG_TARGET],
            available_numeric,
            categorical_features,
            variant_name,
            time_per_variant
        )
        all_variant_results.append(result)
    except Exception as e:
        print(f"ERROR in {variant_name}: {e}")
        import traceback
        traceback.print_exc()

# page split
# saves results
print("\n" + "="*80)
print("SAVING RESULTS")
print("="*80)

if not all_variant_results:
    print("[WARNING] No results to save")
else:
    # Summary results
    summary = pd.DataFrame([
        {
            "variant": r["variant"],
            "best_test_mae": r["best_test_mae"],
            "n_combinations_tested": r["n_tested"],
            "elapsed_minutes": r["elapsed_seconds"] / 60,
            "best_params": str(r["best_params"])
        }
        for r in all_variant_results
    ])
    
    summary = summary.sort_values("best_test_mae")
    summary.to_csv(OUT_RESULTS, index=False)
    
    print(f"\nSaved summary to: {OUT_RESULTS}")
    print("\n" + "="*80)
    print("FINAL RESULTS")
    print("="*80)
    print(summary[["variant", "best_test_mae", "n_combinations_tested", "elapsed_minutes"]].to_string(index=False))
    
    # saves detailed results for each varaint tested
    for r in all_variant_results:
        variant_file = OUT_RESULTS.replace(".csv", f"_{r['variant']}_detailed.csv")
        detailed_df = pd.DataFrame(r["all_results"])
        detailed_df = detailed_df.sort_values("test_mae")
        detailed_df.to_csv(variant_file, index=False)
        print(f"Rsults saved: {variant_file}")