# Project 3: Football Win Probability – Classification & Live Updating
Build a model to predict **home_win** using pre/post-match features.

### Tasks
1. EDA + feature selection.
2. Train **LogisticRegression**; compute ROC-AUC.
3. Calibrate probabilities (Platt scaling).
4. LIVE update demo: adjust win prob as xG_diff changes within a match.

In [None]:
import pandas as pd, numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import roc_auc_score
from sklearn.calibration import CalibratedClassifierCV
import matplotlib.pyplot as plt

df = pd.read_csv('../datasets/football_match_stats.csv')
X = df[['shots_home','shots_away','xg_home','xg_away','ppda_home','ppda_away','yellow_home','yellow_away','fouls_home','fouls_away']]
y = df['home_win']
X_tr, X_te, y_tr, y_te = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)
pipe = Pipeline([('sc', StandardScaler()), ('logit', LogisticRegression(max_iter=500))])
clf = CalibratedClassifierCV(pipe, cv=3)
clf.fit(X_tr, y_tr)
proba = clf.predict_proba(X_te)[:,1]
print({'ROC_AUC': roc_auc_score(y_te, proba)})

In [None]:
# Live update demo
import numpy as np
base = df[['xg_home','xg_away','shots_home','shots_away','ppda_home','ppda_away','yellow_home','yellow_away','fouls_home','fouls_away']].mean()
timeline = []
for minute in range(0, 91, 5):
    xg_diff = (minute/90.0)*1.2 - 0.2  # synthetic trajectory
    row = base.copy()
    row['xg_home'] = 1.0 + max(0, xg_diff)
    row['xg_away'] = 1.0 + max(0, -xg_diff)
    proba = clf.predict_proba([row.values])[0,1]
    timeline.append((minute, proba))
timeline = pd.DataFrame(timeline, columns=['minute','home_win_prob'])
timeline.plot(x='minute', y='home_win_prob', title='Live Win Probability (Demo)')
plt.ylim(0,1)
plt.show()
