# 02 - Baseline Modeling

Train and evaluate leakage-safe baseline models with calibration and upset-focused diagnostics.

In [None]:
from pathlib import Path
import sys

PROJECT_ROOT = Path.cwd().resolve().parent if Path.cwd().name == "notebooks" else Path.cwd().resolve()
if str(PROJECT_ROOT) not in sys.path:
    sys.path.insert(0, str(PROJECT_ROOT))

import pandas as pd
from sklearn.metrics import confusion_matrix, precision_recall_fscore_support

from src.data_prep import load_matches, build_team1_win_target, assign_favorite_underdog_from_elo, time_based_split
from src.features import build_pre_match_feature_frame
from src.models import (
    calibrate_classifier,
    evaluate_binary_model,
    train_logistic_baseline,
    train_xgboost_baseline,
)
from src.viz import plot_calibration_curve

df = load_matches()
df = build_team1_win_target(df)
df = assign_favorite_underdog_from_elo(df)
train_df, valid_df, test_df = time_based_split(df)

X_train, y_train = build_pre_match_feature_frame(train_df)
X_valid, y_valid = build_pre_match_feature_frame(valid_df)
X_test, y_test = build_pre_match_feature_frame(test_df)

logistic = train_logistic_baseline(X_train, y_train)
logistic_calibrated = calibrate_classifier(logistic, X_valid, y_valid)

# Optional advanced baseline for comparison.
xgb = train_xgboost_baseline(X_train, y_train)

In [None]:
print("Validation metrics")
print("- Logistic:", evaluate_binary_model(logistic, X_valid, y_valid))
print("- Logistic (calibrated):", evaluate_binary_model(logistic_calibrated, X_valid, y_valid))
print("- XGBoost:", evaluate_binary_model(xgb, X_valid, y_valid))

print("\nTest metrics")
print("- Logistic:", evaluate_binary_model(logistic, X_test, y_test))
print("- Logistic (calibrated):", evaluate_binary_model(logistic_calibrated, X_test, y_test))
print("- XGBoost:", evaluate_binary_model(xgb, X_test, y_test))

fig, _ = plot_calibration_curve(y_valid, logistic_calibrated.predict_proba(X_valid)[:, 1])
fig

In [None]:
upset_rate = df["is_upset"].mean()
print(f"Overall upset rate: {upset_rate:.2%}")

# Upset-focused evaluation: positives are underdog wins.
y_upset_test = test_df["is_upset"].astype(int)
underdog_pred = (logistic_calibrated.predict_proba(X_test)[:, 1] < 0.5).astype(int)

p, r, f1, _ = precision_recall_fscore_support(y_upset_test, underdog_pred, average="binary", zero_division=0)
cm = confusion_matrix(y_upset_test, underdog_pred)

print("Upset precision:", round(p, 4))
print("Upset recall:", round(r, 4))
print("Upset f1:", round(f1, 4))
print("Upset confusion matrix:\n", cm)