# 05 - Value Backtest & CLV

Backtests a simple rule: bet when `model_p - market_p >= tau`. Reports EV, stake counts, and optional CLV if opening vs closing odds are available.

In [None]:
import sys
from pathlib import Path
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

PROJECT_ROOT = Path(__file__).resolve().parents[1]
preds_path = PROJECT_ROOT / "reports" / "predictions_baseline.csv"
sys.path.append(str(PROJECT_ROOT / "src"))

## Load predictions

In [None]:
if not preds_path.exists():
    raise FileNotFoundError("Run 02_model_baseline.ipynb to generate reports/predictions_baseline.csv")

df = pd.read_csv(preds_path)

required_cols = {"pH", "pD", "pA", "odds_home", "odds_draw", "odds_away", "FTR"}
missing = required_cols - set(df.columns)
if missing:
    raise ValueError(f"Missing required columns in predictions: {missing}")

df.head()

## Run threshold grid search

In [None]:
TAUS = [round(x, 2) for x in np.arange(0.02, 0.12, 0.02)]
PICK_MAP = {
    "H": ("odds_home", "pH"),
    "D": ("odds_draw", "pD"),
    "A": ("odds_away", "pA"),
}


def value_backtest(df: pd.DataFrame, taus=TAUS, stake: float = 1.0) -> pd.DataFrame:
    rows = []
    for tau in taus:
        for outcome, (odds_col, market_col) in PICK_MAP.items():
            model_col = f"model_p{outcome}"
            if model_col not in df.columns:
                continue
            mask = (df[model_col] - df[market_col]) >= tau
            bets = df[mask].dropna(subset=[odds_col, market_col, model_col, "FTR"])
            if bets.empty:
                rows.append({"tau": tau, "outcome": outcome, "n_bets": 0, "pnl": 0.0, "roi": np.nan, "ev": np.nan})
                continue
            returns = np.where(bets["FTR"] == outcome, bets[odds_col] - 1, -1) * stake
            pnl = returns.sum()
            n_bets = len(bets)
            roi = pnl / (n_bets * stake)
            ev = (bets[model_col] * (bets[odds_col] - 1) - (1 - bets[model_col])).mean() * stake
            rows.append({"tau": tau, "outcome": outcome, "n_bets": n_bets, "pnl": pnl, "roi": roi, "ev": ev})
    return pd.DataFrame(rows)


value_results = value_backtest(df, TAUS, stake=100.0)
value_results

## Plot EV and ROI vs threshold

In [None]:
if not value_results.empty:
    pivot_ev = value_results.pivot(index="tau", columns="outcome", values="ev")
    pivot_roi = value_results.pivot(index="tau", columns="outcome", values="roi")

    fig, axes = plt.subplots(1, 2, figsize=(10, 4))
    pivot_ev.plot(ax=axes[0], marker="o")
    axes[0].axhline(0, color="gray", linestyle="--")
    axes[0].set_title("EV vs tau")
    axes[0].set_ylabel("EV (per 100 stake)")

    pivot_roi.plot(ax=axes[1], marker="o")
    axes[1].axhline(0, color="gray", linestyle="--")
    axes[1].set_title("ROI vs tau")
    axes[1].set_ylabel("ROI")

    plt.tight_layout()
    plt.show()

## Optional: Closing Line Value (CLV)
If both opening and closing odds columns exist (e.g., `PSH` vs `PSCH`), compute CLV as the difference in implied probabilities between your entry and the closing price.

In [None]:
def compute_clv(row, outcome: str) -> float:
    open_col = {"H": "PSH", "D": "PSD", "A": "PSA"}[outcome]
    close_col = {"H": "PSCH", "D": "PSCD", "A": "PSCA"}[outcome]
    if open_col not in row or close_col not in row or pd.isna(row[open_col]) or pd.isna(row[close_col]):
        return np.nan
    entry_prob = 1.0 / row[open_col]
    close_prob = 1.0 / row[close_col]
    return close_prob - entry_prob


# Example usage once columns are present:
# df[f"clv_{outcome}"] = df.apply(lambda r: compute_clv(r, outcome), axis=1)