# 03 - Poisson / Dixon-Coles (Optional)

Skeleton notebook for fitting team attack/defence parameters and mapping score matrices to 1X2 probabilities.

In [None]:
import sys
from pathlib import Path
import pandas as pd
import numpy as np
from scipy import optimize

PROJECT_ROOT = Path(__file__).resolve().parents[1]
DATA_PATH = PROJECT_ROOT / "data" / "processed" / "market_epl.parquet"
sys.path.append(str(PROJECT_ROOT / "src"))

## Outline
1. Load processed match data with goals (`FTHG`, `FTAG`) and market probabilities.
2. Fit attack/defence ratings with Dixon-Coles (or plain Poisson) likelihood and ridge regularization.
3. Generate scoreline probability matrix per match.
4. Aggregate to 1X2 probabilities and compare with market.

Add the full implementation as time permits; keep all features strictly pre-match.

In [None]:
def load_data(path: Path = DATA_PATH) -> pd.DataFrame:
    if not path.exists():
        raise FileNotFoundError("Run 01_backtest_naive.ipynb first to build processed data.")
    df = pd.read_parquet(path)
    required = {"HomeTeam", "AwayTeam", "FTHG", "FTAG"}
    missing = required - set(df.columns)
    if missing:
        raise ValueError(f"Missing columns for Poisson model: {missing}")
    return df


def fit_dixon_coles(df: pd.DataFrame):
    """TODO: implement DC likelihood with time-decay and ridge. This stub is a placeholder."""
    raise NotImplementedError("Implement Dixon-Coles estimation here")


def score_matrix_to_1x2(score_matrix: np.ndarray) -> dict:
    home_win = np.tril(score_matrix, -1).sum()
    draw = np.trace(score_matrix)
    away_win = np.triu(score_matrix, 1).sum()
    return {"pH": home_win, "pD": draw, "pA": away_win}