# 04 - Market Comparison & Favourite–Longshot

Compares model vs no-vig market probabilities, builds favourite–longshot curves, and inspects deltas.

In [None]:
import sys
from pathlib import Path
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

PROJECT_ROOT = Path(__file__).resolve().parents[1]
preds_path = PROJECT_ROOT / "reports" / "predictions_baseline.csv"
sys.path.append(str(PROJECT_ROOT / "src"))

sns.set_style("whitegrid")
pd.options.display.float_format = "{:.4f}".format

## Load predictions and reshape to long format

In [None]:
if not preds_path.exists():
    raise FileNotFoundError("Run 02_model_baseline.ipynb to generate reports/predictions_baseline.csv")

df = pd.read_csv(preds_path)

long_rows = []
for _, row in df.iterrows():
    for outcome, market_col, odds_col in [
        ("H", "pH", "odds_home"),
        ("D", "pD", "odds_draw"),
        ("A", "pA", "odds_away"),
    ]:
        model_col = f"model_p{outcome}"
        if model_col not in row or pd.isna(row[market_col]):
            continue
        long_rows.append(
            {
                "outcome": outcome,
                "market_p": row[market_col],
                "model_p": row.get(model_col, np.nan),
                "odds": row.get(odds_col, np.nan),
                "result": 1 if row.get("FTR") == outcome else 0,
            }
        )

long_df = pd.DataFrame(long_rows)
long_df.head()

## Favourite–longshot bias curve (market)
Groups by market-probability deciles and compares observed win rates.

In [None]:
if long_df.empty:
    raise ValueError("No data available to compute FLB. Check earlier notebooks.")

long_df = long_df.dropna(subset=["market_p"])
long_df["decile"] = pd.qcut(long_df["market_p"], 10, labels=False, duplicates="drop")
flb = long_df.groupby("decile").agg(
    market_p_mean=("market_p", "mean"),
    observed_win=("result", "mean"),
    count=("result", "size"),
)

plt.figure(figsize=(6, 4))
plt.plot(flb["market_p_mean"], flb["observed_win"], marker="o", label="Observed")
plt.plot([0, 1], [0, 1], linestyle="--", color="gray", label="Perfect")
plt.xlabel("Market implied probability (decile mean)")
plt.ylabel("Observed win rate")
plt.title("Favourite–Longshot Bias (market)")
plt.legend()
plt.tight_layout()
plt.show()
flb

## Model vs market probability deltas

In [None]:
plt.figure(figsize=(6, 4))
plt.scatter(long_df["market_p"], long_df["model_p"], alpha=0.4)
plt.plot([0, 1], [0, 1], linestyle="--", color="gray")
plt.xlabel("Market no-vig p")
plt.ylabel("Model p")
plt.title("Model vs Market Probabilities")
plt.tight_layout()
plt.show()

long_df["delta"] = long_df["model_p"] - long_df["market_p"]
long_df.describe()[["market_p", "model_p", "delta"]]