<a href="https://colab.research.google.com/github/angwelo/ASSIGNMENT-/blob/main/Poisson_Model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
# --- Premier League Poisson Model (Football-Data.org) ---
# Requirements: pip install requests pandas numpy python-dateutil
# Notes:
# - Free tier does NOT include odds; this is a pure data-driven model.
# - You can widen LOOKBACK_DAYS for more history or narrow for recency.

from datetime import datetime, timedelta, timezone
from zoneinfo import ZoneInfo
import requests
import pandas as pd
import numpy as np
from math import exp
from itertools import product

API_KEY = "bd6e0b59ca634a5c8dd998c8542c603d"  # <- put your key here
BASE_URL = "https://api.football-data.org/v4"
HEADERS = {"X-Auth-Token": API_KEY}

COMPETITION = "PL"
LOOKBACK_DAYS = 540  # ~18 months of history
MAX_GOALS = 10       # Poisson summation upper bound (0..MAX_GOALS)

# -----------------------
# Date Helpers (Africa/Nairobi weekend)
# -----------------------
NAIROBI = ZoneInfo("Africa/Nairobi")
today_ke = datetime.now(NAIROBI).date()

# Next Saturday/Sunday in Nairobi
# weekday(): Mon=0 ... Sun=6
days_to_sat = (5 - today_ke.weekday()) % 7
days_to_sun = (6 - today_ke.weekday()) % 7
sat_ke = today_ke + timedelta(days=days_to_sat)
sun_ke = today_ke + timedelta(days=days_to_sun)

# Convert to UTC ISO dates for API (API uses UTC date fences)
# We'll fetch matches with utcDate between Sat 00:00:00Z and Sun 23:59:59Z inclusive
sat_utc = datetime.combine(sat_ke, datetime.min.time(), tzinfo=NAIROBI).astimezone(timezone.utc).date()
sun_utc = datetime.combine(sun_ke, datetime.max.time(), tzinfo=NAIROBI).astimezone(timezone.utc).date()

# -----------------------
# API helpers
# -----------------------
def fetch_matches(date_from: str, date_to: str, status=None):
    """
    Fetch matches for the Premier League between date_from and date_to (YYYY-MM-DD).
    status: None or one of {"FINISHED","SCHEDULED","IN_PLAY","TIMED"}
    """
    params = {"dateFrom": date_from, "dateTo": date_to}
    if status:
        params["status"] = status
    url = f"{BASE_URL}/competitions/{COMPETITION}/matches"
    r = requests.get(url, headers=HEADERS, params=params, timeout=30)
    r.raise_for_status()
    return r.json().get("matches", [])

def fetch_historical_matches(lookback_days=540):
    end_date = datetime.now(timezone.utc).date()
    start_date = end_date - timedelta(days=lookback_days)
    matches = fetch_matches(start_date.isoformat(), end_date.isoformat(), status="FINISHED")
    # Keep only matches with score info
    rows = []
    for m in matches:
        sc = m.get("score", {})
        full = sc.get("fullTime", {}) or {}
        hg = full.get("home", None)
        ag = full.get("away", None)
        if hg is None or ag is None:
            continue
        rows.append({
            "utcDate": m["utcDate"],
            "homeTeam": m["homeTeam"]["name"],
            "awayTeam": m["awayTeam"]["name"],
            "homeGoals": int(hg),
            "awayGoals": int(ag),
        })
    return pd.DataFrame(rows)

def fetch_weekend_fixtures(sat_date_utc: datetime.date, sun_date_utc: datetime.date):
    games = fetch_matches(sat_date_utc.isoformat(), sun_date_utc.isoformat(), status="SCHEDULED")
    rows = []
    for m in games:
        rows.append({
            "utcDate": m["utcDate"],
            "homeTeam": m["homeTeam"]["name"],
            "awayTeam": m["awayTeam"]["name"],
        })
    return pd.DataFrame(rows)

# -----------------------
# Poisson model utilities
# -----------------------
def build_team_strengths(results: pd.DataFrame):
    """
    Ratio-based attack/defense strengths, with separate home/away bases:
    λ_home_base = league mean home goals/match
    λ_away_base = league mean away goals/match

    For each team:
      attack_home = (team home goals per home match) / λ_home_base
      defense_home = (team home goals conceded per home match) / λ_away_base
      attack_away = (team away goals per away match) / λ_away_base
      defense_away = (team away goals conceded per away match) / λ_home_base

    Expected goals:
      E[HomeGoals] = λ_home_base * attack_home(Home) * defense_away(Away)
      E[AwayGoals] = λ_away_base * attack_away(Away) * defense_home(Home)
    """
    if results.empty:
        raise ValueError("No historical results found to fit strengths.")

    # League baselines
    total_home_goals = results["homeGoals"].sum()
    total_away_goals = results["awayGoals"].sum()
    n_matches = len(results)
    lam_home_base = total_home_goals / n_matches
    lam_away_base = total_away_goals / n_matches

    # Per-team aggregations
    # Home rows
    home = results.groupby("homeTeam").agg(
        home_goals_for=("homeGoals", "sum"),
        home_goals_against=("awayGoals", "sum"),
        home_matches=("homeGoals", "count"),
    )
    # Away rows
    away = results.groupby("awayTeam").agg(
        away_goals_for=("awayGoals", "sum"),
        away_goals_against=("homeGoals", "sum"),
        away_matches=("awayGoals", "count"),
    )

    teams = sorted(set(results["homeTeam"]) | set(results["awayTeam"]))
    df = pd.DataFrame(index=teams)
    df = df.join(home, how="left").join(away, how="left").fillna(0)

    # Avoid division by zero; add tiny ridge prior (shrinkage)
    eps = 1e-9
    df["home_gpg_for"] = df["home_goals_for"] / (df["home_matches"] + eps)
    df["home_gpg_against"] = df["home_goals_against"] / (df["home_matches"] + eps)
    df["away_gpg_for"] = df["away_goals_for"] / (df["away_matches"] + eps)
    df["away_gpg_against"] = df["away_goals_against"] / (df["away_matches"] + eps)

    # Strengths (ratio to league baselines). Add mild shrink toward 1 using empirical Bayes:
    # shrink factor alpha moves small-sample teams closer to league mean.
    alpha = 6  # ~prior matches worth; tune as desired
    df["attack_home"] = ((df["home_gpg_for"] * df["home_matches"]) + (alpha * lam_home_base)) / (df["home_matches"] + alpha) / (lam_home_base + eps)
    df["defense_home"] = ((df["home_gpg_against"] * df["home_matches"]) + (alpha * lam_away_base)) / (df["home_matches"] + alpha) / (lam_away_base + eps)
    df["attack_away"] = ((df["away_gpg_for"] * df["away_matches"]) + (alpha * lam_away_base)) / (df["away_matches"] + alpha) / (lam_away_base + eps)
    df["defense_away"] = ((df["away_gpg_against"] * df["away_matches"]) + (alpha * lam_home_base)) / (df["away_matches"] + alpha) / (lam_home_base + eps)

    strengths = {
        "team_strengths": df[["attack_home", "defense_home", "attack_away", "defense_away"]],
        "lam_home_base": lam_home_base,
        "lam_away_base": lam_away_base,
    }
    return strengths

def expected_goals(home_team, away_team, strengths):
    ts = strengths["team_strengths"]
    lam_home_base = strengths["lam_home_base"]
    lam_away_base = strengths["lam_away_base"]

    if home_team not in ts.index or away_team not in ts.index:
        # fallback to league-average if team not found
        return lam_home_base, lam_away_base

    h = ts.loc[home_team]
    a = ts.loc[away_team]
    lam_home = lam_home_base * h["attack_home"] * a["defense_away"]
    lam_away = lam_away_base * a["attack_away"] * h["defense_home"]
    # safety: avoid zeros
    return max(lam_home, 1e-6), max(lam_away, 1e-6)

def poisson_pmf(k, lam):
    # k goals with mean lam
    from math import factorial
    return (lam**k) * np.exp(-lam) / factorial(k)

def match_outcome_prob(lam_home, lam_away, max_goals=10):
    """
    Sum Poisson scoreline probabilities up to max_goals to get P(Home Win), P(Draw), P(Away Win).
    """
    p_home = 0.0
    p_draw = 0.0
    p_away = 0.0
    for hg in range(max_goals + 1):
        p_h = poisson_pmf(hg, lam_home)
        for ag in range(max_goals + 1):
            p_a = poisson_pmf(ag, lam_away)
            p = p_h * p_a
            if hg > ag:
                p_home += p
            elif hg == ag:
                p_draw += p
            else:
                p_away += p
    # Normalize tiny rounding
    s = p_home + p_draw + p_away
    if s > 0:
        p_home, p_draw, p_away = p_home/s, p_draw/s, p_away/s
    return p_home, p_draw, p_away

# -----------------------
# Run pipeline
# -----------------------
# 1) Fit strengths from historical results
results = fetch_historical_matches(LOOKBACK_DAYS)
if results.empty:
    raise RuntimeError("No historical results fetched. Check API key/limits or widen LOOKBACK_DAYS.")
strengths = build_team_strengths(results)

# 2) Get this weekend fixtures (Sat–Sun Nairobi time windows converted to UTC dates)
fixtures = fetch_weekend_fixtures(sat_utc, sun_utc)
if fixtures.empty:
    print("No Premier League fixtures scheduled this weekend (Sat–Sun).")
else:
    # 3) Compute expected goals and outcome probabilities
    rows = []
    for _, r in fixtures.iterrows():
        home = r["homeTeam"]
        away = r["awayTeam"]
        lam_h, lam_a = expected_goals(home, away, strengths)
        ph, pd, pa = match_outcome_prob(lam_h, lam_a, MAX_GOALS)
        rows.append({
            "utcDate": r["utcDate"],
            "homeTeam": home,
            "awayTeam": away,
            "xG_home": round(lam_h, 3),
            "xG_away": round(lam_a, 3),
            "P_home": round(ph, 3),
            "P_draw": round(pd, 3),
            "P_away": round(pa, 3),
        })


In [5]:
del pd
import pandas as pd


In [6]:
out = pd.DataFrame(rows).sort_values("utcDate").reset_index(drop=True)
print(out.to_string(index=False))


             utcDate           homeTeam                   awayTeam  xG_home  xG_away  P_home  P_draw  P_away
2025-08-23T11:30:00Z Manchester City FC       Tottenham Hotspur FC    2.451    1.225   0.643   0.181   0.175
2025-08-23T14:00:00Z    AFC Bournemouth Wolverhampton Wanderers FC    1.847    0.963   0.581   0.225   0.193
2025-08-23T14:00:00Z       Brentford FC             Aston Villa FC    1.973    1.454   0.497   0.218   0.285
2025-08-23T14:00:00Z         Burnley FC             Sunderland AFC    1.300    1.634   0.305   0.242   0.453
2025-08-23T16:30:00Z         Arsenal FC            Leeds United FC    1.890    0.955   0.593   0.221   0.186
2025-08-24T13:00:00Z  Crystal Palace FC       Nottingham Forest FC    1.662    1.437   0.430   0.237   0.333
2025-08-24T13:00:00Z         Everton FC  Brighton & Hove Albion FC    1.491    1.195   0.440   0.256   0.304
2025-08-24T15:30:00Z          Fulham FC       Manchester United FC    1.439    1.333   0.397   0.254   0.349


In [7]:
# --- Blending Poisson model with bookmaker odds ---
def odds_to_probs(odds_dict):
    """Convert decimal odds to normalized implied probabilities."""
    raw = {k: 1/v for k, v in odds_dict.items()}
    total = sum(raw.values())
    return {k: v/total for k, v in raw.items()}

def blend_probs(poisson_probs, odds_probs, w_market=0.6):
    """
    Blend bookmaker (market) and Poisson model probabilities.
    w_market = weight for market odds, (1-w_market) for Poisson.
    """
    blended = {}
    for k in ["home", "draw", "away"]:
        blended[k] = w_market * odds_probs[k] + (1 - w_market) * poisson_probs[k]
    return blended

# Example bookmaker odds (decimal) for this weekend (replace with yours)
bookmaker_odds = {
   "Westham FC vs Chelsea FC":{"home": 4.90, "draw": 4.10, "away": 1.72},
   "Manchester City FC vs Tottenham Hotspur Fc":{"home": 1.52, "draw": 4.80, "away": 5.80},
   "AFC Bournemouth vs Wolverhampton Wanderers": {"home": 1.80, "draw": 3.95, "away": 4.30},
   "Brentford FC vs Aston Villa FC":{"home":3.25,"draw":3.55,"away":2.23},
   "Burnley FC vs Sunderland AFC": {"home": 2.28, "draw": 3.20, "away": 3.45},
   "Arsenal FC vs Leeds United FC": {"home": 1.28, "draw": 6.00, "away": 11.00},
   "Crystal Palace FC vs Nottingham FC": {"home": 2.42, "draw": 3.25, "away": 3.15},
   "Everton Fc vs Brighton & Hove Albion": {"home": 3.15, "draw": 3.35, "away": 2.36},
   "Fulham FC vs Manchester United FC": {"home": 3.45, "draw": 3.60, "away": 2.13},
   "Newcastle United FC vs Liverpool FC": {"home": 3.30, "draw": 3.90, "away": 2.08},


}

# Merge with Poisson output
merged_rows = []
for _, r in out.iterrows():  # 'out' is the Poisson DataFrame from before
    fixture = f"{r['homeTeam']} vs {r['awayTeam']}"
    poisson_probs = {"home": r["P_home"], "draw": r["P_draw"], "away": r["P_away"]}
    if fixture in bookmaker_odds:
        odds_probs = odds_to_probs(bookmaker_odds[fixture])
        blended = blend_probs(poisson_probs, odds_probs, w_market=0.6)
    else:
        # fallback = pure Poisson
        odds_probs = {"home": None, "draw": None, "away": None}
        blended = poisson_probs
    merged_rows.append({
        "Fixture": fixture,
        "xG_home": r["xG_home"],
        "xG_away": r["xG_away"],
        "Poisson_H": poisson_probs["home"],
        "Poisson_D": poisson_probs["draw"],
        "Poisson_A": poisson_probs["away"],
        "Odds_H": odds_probs["home"],
        "Odds_D": odds_probs["draw"],
        "Odds_A": odds_probs["away"],
        "Final_H": round(blended["home"], 3),
        "Final_D": round(blended["draw"], 3),
        "Final_A": round(blended["away"], 3),
    })

final_df = pd.DataFrame(merged_rows)
print(final_df.to_string(index=False))


                                      Fixture  xG_home  xG_away  Poisson_H  Poisson_D  Poisson_A   Odds_H   Odds_D   Odds_A  Final_H  Final_D  Final_A
   Manchester City FC vs Tottenham Hotspur FC    2.451    1.225      0.643      0.181      0.175      NaN      NaN      NaN    0.643    0.181    0.175
AFC Bournemouth vs Wolverhampton Wanderers FC    1.847    0.963      0.581      0.225      0.193      NaN      NaN      NaN    0.581    0.225    0.193
               Brentford FC vs Aston Villa FC    1.973    1.454      0.497      0.218      0.285 0.296481 0.271427 0.432092    0.377    0.250    0.373
                 Burnley FC vs Sunderland AFC    1.300    1.634      0.305      0.242      0.453 0.421342 0.300206 0.278452    0.375    0.277    0.348
                Arsenal FC vs Leeds United FC    1.890    0.955      0.593      0.221      0.186 0.752051 0.160438 0.087511    0.688    0.185    0.127
    Crystal Palace FC vs Nottingham Forest FC    1.662    1.437      0.430      0.237      0.3