# 02 - Calibrated Logistic Baseline

One-vs-rest logistic regression with calibration. Train on older seasons, validate on 2024/25, test on 2025/26 to date.

In [None]:
import sys
from pathlib import Path
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.linear_model import LogisticRegression
from sklearn.calibration import CalibratedClassifierCV, calibration_curve
from sklearn.metrics import log_loss, brier_score_loss

PROJECT_ROOT = Path(__file__).resolve().parents[1]
DATA_PATH = PROJECT_ROOT / "data" / "processed" / "market_epl.parquet"
sys.path.append(str(PROJECT_ROOT / "src"))

sns.set_style("whitegrid")
pd.options.display.float_format = "{:.4f}".format

## Load processed market dataset
Run `01_backtest_naive.ipynb` first to build `data/processed/market_epl.parquet`.

In [None]:
RESULT_COL = "FTR"

from typing import Optional


def load_market_dataset(path: Path = DATA_PATH) -> pd.DataFrame:
    if not path.exists():
        raise FileNotFoundError("Run 01_backtest_naive.ipynb to build data/processed/market_epl.parquet.")
    return pd.read_parquet(path)


def season_start_year(season: str) -> Optional[int]:
    if isinstance(season, str) and "-" in season:
        try:
            return int(season.split("-")[0])
        except ValueError:
            return None
    return None


data = load_market_dataset()
data["season_start"] = data.get("season", pd.Series(dtype=str)).apply(season_start_year)

data.head()

## Feature set and splits
Add pre-match features (form, rolling goal/shots differentials, league position deltas) to `FEATURE_COLS` once engineered. Only use information available before kickoff to avoid leakage.

In [None]:
FEATURE_COLS = [
    "odds_home",
    "odds_draw",
    "odds_away",
    # TODO: add pre-match stats (rolling goal diff, shots, form, league position deltas)
]

TRAIN_START = 2016
TRAIN_END = 2023
VALID_SEASON = 2024
TEST_SEASON = 2025

model_df = data.dropna(subset=FEATURE_COLS + [RESULT_COL]).copy()
model_df = model_df[model_df["season_start"].notna()]

train_df = model_df[(model_df["season_start"] >= TRAIN_START) & (model_df["season_start"] <= TRAIN_END)]
valid_df = model_df[model_df["season_start"] == VALID_SEASON]
test_df = model_df[model_df["season_start"] == TEST_SEASON]

print({
    "train_rows": len(train_df),
    "valid_rows": len(valid_df),
    "test_rows": len(test_df),
})

X_train = train_df[FEATURE_COLS]
y_train = train_df[RESULT_COL]
X_valid = valid_df[FEATURE_COLS]
y_valid = valid_df[RESULT_COL]
X_test = test_df[FEATURE_COLS]
y_test = test_df[RESULT_COL]

if X_train.empty:
    raise ValueError("Training set is empty. Check season labels and FEATURE_COLS.")

## Fit one-vs-rest logistic regression with calibration

In [None]:
base_model = LogisticRegression(
    multi_class="ovr",
    C=1.0,
    penalty="l2",
    max_iter=500,
)

calibrated_model = CalibratedClassifierCV(
    base_estimator=base_model,
    method="isotonic",
    cv=5,
)

calibrated_model.fit(X_train, y_train)

## Evaluate log loss, Brier score, and calibration curves

In [None]:
def evaluate_split(name: str, X: pd.DataFrame, y: pd.Series):
    if X.empty:
        print(f"[warn] {name} split is empty.")
        return None
    probs = calibrated_model.predict_proba(X)
    classes = calibrated_model.classes_
    prob_df = pd.DataFrame(probs, columns=[f"model_p{c}" for c in classes])

    ll = log_loss(y, probs, labels=classes)
    brier_components = []
    for cls in classes:
        brier_components.append(brier_score_loss((y == cls).astype(int), prob_df[f"model_p{cls}"]))
    brier = float(np.mean(brier_components))

    print(f"{name} log loss: {ll:.4f} | Brier (macro): {brier:.4f}")
    return prob_df, classes, ll, brier


eval_valid = evaluate_split("valid", X_valid, y_valid)
eval_test = evaluate_split("test", X_test, y_test)

In [None]:
if eval_valid:
    prob_df, classes, _, _ = eval_valid
    fig, axes = plt.subplots(1, len(classes), figsize=(5 * len(classes), 4), sharey=True)
    if len(classes) == 1:
        axes = [axes]
    for ax, cls in zip(axes, classes):
        true_binary = (y_valid == cls).astype(int)
        prob_pos = prob_df[f"model_p{cls}"]
        frac_pos, mean_pred = calibration_curve(true_binary, prob_pos, n_bins=10, strategy="quantile")
        ax.plot(mean_pred, frac_pos, marker="o", label="Observed")
        ax.plot([0, 1], [0, 1], linestyle="--", color="gray", label="Perfect")
        ax.set_title(f"Calibration for {cls} (valid)")
        ax.set_xlabel("Predicted prob")
        ax.set_ylabel("Observed freq")
        ax.legend()
    plt.tight_layout()
    plt.show()

## Save test predictions for downstream comparisons
Stores market and model probabilities for each outcome.

In [None]:
preds_test = test_df.reset_index(drop=True).copy()
if eval_test:
    prob_df, classes, _, _ = eval_test
    for cls in classes:
        preds_test[f"model_p{cls}"] = prob_df[f"model_p{cls}"]

preds_path = PROJECT_ROOT / "reports" / "predictions_baseline.csv"
preds_path.parent.mkdir(parents=True, exist_ok=True)
preds_test.to_csv(preds_path, index=False)
preds_path