This notebook was inspired by this blog post: https://dashee87.github.io/football/python/predicting-football-results-with-statistical-modelling/
The Poisson model essentially follows the initial part of above blog. The rest of the notebook applies the results to estimating Fantasy Premier League Assistant Manager chip expected points for each manager. 

In [None]:
import pandas as pd
import numpy as np
import datetime as dt
import json
import requests
from tqdm import tqdm

from scipy.stats import poisson
import statsmodels.api as sm
import statsmodels.formula.api as smf
from random import choices

import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go

# Fetch data

In [None]:
matches24_25 = pd.read_html('https://fbref.com/en/comps/9/schedule/Premier-League-Scores-and-Fixtures')[0]
matches23_24 = pd.read_html('https://fbref.com/en/comps/9/2023-2024/schedule/2023-2024-Premier-League-Scores-and-Fixtures')[0]
fixtures_fbref = pd.concat([matches23_24, matches24_25], ignore_index=True)
fixtures_fbref['Date'] = pd.to_datetime(fixtures_fbref['Date'])
display(fixtures_fbref)

In [None]:
results = fixtures_fbref[fixtures_fbref['Score'].notnull()]
results = results.reset_index()
results

# Process data

In [None]:
results['home_goals'] = results['Score'].apply(lambda x: x.split('–')[0])
results['away_goals'] = results['Score'].apply(lambda x: x.split('–')[1])
results

In [None]:
current_date = dt.datetime.today()
results['days_since'] = results['Date'].apply(lambda x: (current_date-x).days)
results

In [None]:
results = results[results.days_since <= 365]
results

# Model

In [None]:
def predict_average_goals(foot_model, homeTeam, awayTeam):
    home_goals_avg = foot_model.predict(pd.DataFrame(data={'team': homeTeam, 
                                                            'opponent': awayTeam,'home':1},
                                                      index=[1])).values[0]
    away_goals_avg = foot_model.predict(pd.DataFrame(data={'team': awayTeam, 
                                                            'opponent': homeTeam,'home':0},
                                                      index=[1])).values[0]
    return home_goals_avg, away_goals_avg

In [None]:
goal_model_data = pd.concat([results[['Home','Away','home_goals']].assign(home=1).rename(
            columns={'Home':'team', 'Away':'opponent','home_goals':'goals'}),
           results[['Away','Home','away_goals']].assign(home=0).rename(
            columns={'Away':'team', 'Home':'opponent','away_goals':'goals'})]).reset_index(drop=True)

goal_model_data['goals'] = goal_model_data['goals'].apply(lambda x: int(x))

goal_model_data

In [None]:
poisson_model = smf.glm(formula="goals ~ home + team + opponent", 
                        data=goal_model_data, 
                        family=sm.families.Poisson()).fit()

print(poisson_model.summary())

In [None]:
median_attack_value = poisson_model.params.iloc[1:23].median()
median_attack_team = (poisson_model.params.iloc[1:23] - median_attack_value).abs().idxmin()
median_defense_value = poisson_model.params.iloc[23:45].median()
median_defense_team = (poisson_model.params.iloc[23:45] - median_defense_value).abs().idxmin()
print(f'Median attack: {median_attack_team}')
print(f'Median defense: {median_defense_team}')

In [None]:
median_attack_team.split('.')[1][:-1]

In [None]:
teams = "Arsenal,Aston Villa,Bournemouth,Brentford,Brighton,Chelsea,Crystal Palace,Everton,Fulham,Ipswich Town,Leicester City,Liverpool,Manchester City,Manchester Utd,Newcastle Utd,Nott'ham Forest,Southampton,Tottenham,West Ham,Wolves" 
teams = teams.split(',')

In [None]:
for team in teams:
    home_goals_avg, _ = predict_average_goals(poisson_model, team, median_defense_team.split('.')[1][:-1])
    _, away_goals_avg = predict_average_goals(poisson_model, median_attack_team.split('.')[1][:-1], team)
    print(f'Attack rating for team: {team}')
    print(np.round(np.mean((home_goals_avg, away_goals_avg)),2))

In [None]:
for team in teams:
    _, away_goals_avg = predict_average_goals(poisson_model, team, median_defense_team.split('.')[1][:-1])
    home_goals_avg,_  = predict_average_goals(poisson_model, median_attack_team.split('.')[1][:-1], team)
    print(f'Defense rating for team: {team}')
    print(np.round(np.mean((home_goals_avg, away_goals_avg)),2))

# Predict fixtures

In [None]:
def scoreline_probabilities(foot_model, homeTeam, awayTeam, max_goals=10):
    home_goals_avg, away_goals_avg = predict_average_goals(foot_model, homeTeam, awayTeam)
    team_pred = [[poisson.pmf(i, team_avg) for i in range(0, max_goals+1)] for team_avg in [home_goals_avg, away_goals_avg]]
    return(np.outer(np.array(team_pred[0]), np.array(team_pred[1])))

In [None]:
fixtures = fixtures_fbref[(fixtures_fbref['Score'].isnull()) & (fixtures_fbref['Wk'].notnull())].reset_index()
fixtures

In [None]:
home_team = 'Chelsea'
away_team = 'Tottenham'
outcome_matrix = scoreline_probabilities(poisson_model, home_team, away_team)

In [None]:
ax = sns.heatmap(outcome_matrix, annot=True, fmt=".2f")
ax.set(xlabel=away_team, ylabel=home_team)
ax.xaxis.tick_top()
ax.xaxis.set_label_position('top')

In [None]:
poisson_model.predict(pd.DataFrame(data={'team': 'Everton', 'opponent': 'Liverpool', 'home':1},index=[1]))

In [None]:
poisson_model.predict(pd.DataFrame(data={'team': 'Tottenham', 'opponent': 'Chelsea', 'home':0},index=[1]))

In [None]:
all_probabilities = []
for _, fixture in fixtures.iterrows():
    # probability of different scorelines
    outcome_matrix = scoreline_probabilities(poisson_model, fixture['Home'], fixture['Away'])
    # match outcome probabilities
    probability_home_win = outcome_matrix[np.tril_indices_from(outcome_matrix, k=-1)].sum()
    probability_draw = np.trace(outcome_matrix)
    probability_away_win = outcome_matrix[np.triu_indices_from(outcome_matrix, k=1)].sum()
    # add probabilities to list
    all_probabilities.append([probability_home_win, probability_draw, probability_away_win])

# add all probabilites to fixtures df
fixtures[['home_win_probability', 'draw_probability', 'away_win_probability']] = all_probabilities

display(fixtures)

## Simulate season

In [None]:
def simulate_match(foot_model, homeTeam, awayTeam):
    '''
    Simulate the outcome of a single match.
    '''
    
    # simulate scoreline
    home_goals_avg_, away_goals_avg_ = predict_average_goals(foot_model, homeTeam, awayTeam)
    home_goals = poisson.rvs(home_goals_avg_)
    away_goals = poisson.rvs(away_goals_avg_)

    if home_goals > away_goals:
        outcome = 'home_win'
    elif home_goals == away_goals:   
        outcome = 'draw'
    else:
        outcome = 'away_win'

    return outcome, home_goals, away_goals
    #return choices(['home_win', 'draw', 'away_win'], probabilities)[0]

In [None]:
def simulate_season(league_table, fixtures, foot_model):
    '''Simulate a whole season. ADD GOALS AND GOAL DIFFERENCE'''
    for _, fixture in fixtures.iterrows():
        home_team_ = fixture.Home
        away_team_ = fixture.Away
        # simulate match outcome
        outcome, home_goals, away_goals = simulate_match(foot_model, home_team_, away_team_)
        # update matches played
        league_table.loc[league_table['Squad']==home_team_, 'MP'] += 1
        league_table.loc[league_table['Squad']==away_team_, 'MP'] += 1
        # update league table 
        league_table.loc[league_table['Squad']==home_team_, 'GF'] += home_goals
        league_table.loc[league_table['Squad']==home_team_, 'GA'] += away_goals
        league_table.loc[league_table['Squad']==home_team_, 'GD'] += home_goals - away_goals
        league_table.loc[league_table['Squad']==away_team_, 'GF'] += away_goals
        league_table.loc[league_table['Squad']==away_team_, 'GA'] += home_goals
        league_table.loc[league_table['Squad']==away_team_, 'GD'] += away_goals - home_goals
        if outcome=='home_win':
            league_table.loc[league_table['Squad']==home_team_, 'Pts'] += 3
            league_table.loc[league_table['Squad']==home_team_, 'W'] += 1
            league_table.loc[league_table['Squad']==away_team_, 'L'] += 1
        elif outcome=='draw':
            league_table.loc[league_table['Squad']==home_team_, 'Pts'] += 1
            league_table.loc[league_table['Squad']==away_team_, 'Pts'] += 1
            league_table.loc[league_table['Squad']==home_team_, 'D'] += 1
            league_table.loc[league_table['Squad']==away_team_, 'D'] += 1
        elif outcome=='away_win':
            league_table.loc[league_table['Squad']==away_team_, 'Pts'] += 3
            league_table.loc[league_table['Squad']==away_team_, 'W'] += 1
            league_table.loc[league_table['Squad']==home_team_, 'L'] += 1
        else: 
            print('No valid result for:')
            print(fixture)
    
    assert np.all(league_table['MP']==38), 'All teams have not played 38 games!'

    league_table = league_table.sort_values(by=['Pts', 'GD', 'GF'], ascending=False)
    league_table['Rk'] = np.arange(1,21)

    return league_table

In [None]:
league_table = pd.read_html('https://fbref.com/en/comps/9/Premier-League-Stats')[0]
league_table

In [None]:
simulated_table = simulate_season(league_table[['Squad', 'MP', 'W', 'D', 'L', 'GF', 'GA', 'GD', 'Pts']].copy(), fixtures, poisson_model)
display(simulated_table)

In [None]:
nr_simulations = 1000
simulation_results = []
for i in tqdm(range(nr_simulations), desc='Simulating...'):
    simulated_table = simulate_season(league_table[['Squad', 'MP', 'W', 'D', 'L', 'GF', 'GA', 'GD', 'Pts']].copy(), fixtures, poisson_model)
    simulated_table['simulation_nr'] = i
    simulation_results.append(simulated_table)

simulation_results_df = pd.concat(simulation_results).reset_index(drop=True)

In [None]:
result_matrix = (
    simulation_results_df.groupby(['Squad', 'Rk'])
    .size()
    .unstack(fill_value=0)
    .reindex(columns=range(1, 21), fill_value=0)  # Ensure columns go from 1 to 20
)

# normalize to percentage
result_matrix = 100 * (result_matrix / nr_simulations)

In [None]:
# Determine the most common position for each team (highest count)
#dominant_positions = result_matrix.idxmax(axis=1)
# Sort teams based on their most common position
#sorted_teams = dominant_positions.sort_values().index

# Reorder the matrix based on average final league position
sorted_teams = simulation_results_df.groupby(['Squad'])['Rk'].mean().sort_values().index
sorted_matrix = result_matrix.loc[sorted_teams]

# Plot the reordered heatmap
plt.figure(figsize=(10, 6))
sns.heatmap(sorted_matrix, annot=True, cmap="Blues", linewidths=0.5, cbar_kws={'label': 'Probability'})

plt.title("Distribution of Final League Positions")
plt.xlabel("Final League Position")
plt.ylabel("Team")

plt.show()

# FPL Assistant Manager Chip

In [None]:
#teams = "Arsenal,Aston Villa,Bournemouth,Brentford,Brighton,Chelsea,Crystal Palace,Everton,Fulham,Ipswich Town,Leicester City,Liverpool,Manchester City,Manchester Utd,Newcastle Utd,Nott'ham Forest,Southampton,Tottenham,West Ham,Wolves" 
#teams = teams.split(',')
#teams

In [None]:
fpl_fixtures_data = json.loads(requests.get('https://fantasy.premierleague.com/api/fixtures/').text)
fpl_fixtures = pd.DataFrame(fpl_fixtures_data)
fpl_fixtures['home_team'] = [teams[i] for i in fpl_fixtures['team_h']-1]
fpl_fixtures['away_team'] = [teams[i] for i in fpl_fixtures['team_a']-1]
fpl_fixtures

In [None]:
fixtures = fpl_fixtures[fpl_fixtures['team_h_score'].isnull()].reset_index(drop=True)
fixtures

In [None]:
all_probabilities = []
for _, fixture in fixtures.iterrows():
    # probability of different scorelines
    outcome_matrix = scoreline_probabilities(poisson_model, fixture['home_team'], fixture['away_team'])
    # match outcome probabilities
    probability_home_win = outcome_matrix[np.tril_indices_from(outcome_matrix, k=-1)].sum()
    probability_draw = np.trace(outcome_matrix)
    probability_away_win = outcome_matrix[np.triu_indices_from(outcome_matrix, k=1)].sum()
    # add probabilities to list
    all_probabilities.append([probability_home_win, probability_draw, probability_away_win])

# add all probabilites to fixtures df
fixtures[['home_win_probability', 'draw_probability', 'away_win_probability']] = all_probabilities

fixtures

In [None]:
def table_bonus_check(focal_team, opponent):
    focal_team_rank = league_table.loc[league_table.Squad==focal_team, 'Rk'].values[0]
    opponent_rank = league_table.loc[league_table.Squad==opponent, 'Rk'].values[0]
    if focal_team_rank - opponent_rank >= 5:
        return 1.0
    else:
        return 0.0

In [None]:
manager_xp = []
for _, row in fixtures.iterrows():
    if np.isnan(row.event):
        continue
    else:
        # calculate expected goals for both teams
        home_team_xg = poisson_model.predict(pd.DataFrame(data={'team': row.home_team, 'opponent': row.away_team, 'home':1},index=[1])).values[0]
        away_team_xg = poisson_model.predict(pd.DataFrame(data={'team': row.away_team, 'opponent': row.home_team, 'home':0},index=[1])).values[0]
        
        # CALCULATE EXPECTED MANAGER POINTS FOR HOME TEAM
        new_row1 = [row.event, row.home_team, 0]
        table_bonus_coefficient = table_bonus_check(row.home_team, row.away_team) # 1 or 0
        # points for win
        new_row1[2] += (6 + 10*table_bonus_coefficient)*row.home_win_probability
        # points for draw
        new_row1[2] += (3 + 5*table_bonus_coefficient)*row.draw_probability
        # expected goals scored
        new_row1[2] += home_team_xg
        # points for clean sheet
        new_row1[2] += poisson.pmf(0, away_team_xg)

        # CALCULATE EXPECTED MANAGER POINTS FOR AWAY TEAM
        new_row2 = [row.event, row.away_team, 0]
        table_bonus_coefficient = table_bonus_check(row.away_team, row.home_team) # 1 or 0
        # points for win
        new_row2[2] += (6 + 10*table_bonus_coefficient)*row.away_win_probability
        # points for draw
        new_row2[2] += (3 + 5*table_bonus_coefficient)*row.draw_probability
        # expected goals scored
        new_row2[2] += away_team_xg
        # points for clean sheet
        new_row2[2] += poisson.pmf(0, home_team_xg)

        # add rows to manager_xp
        manager_xp.extend([new_row1, new_row2])
        
manager_df = pd.DataFrame(manager_xp, columns=['gameweek', 'team', 'expected_points'])
manager_df

In [None]:
weekly_scores = manager_df.groupby(['gameweek', 'team']).sum().reset_index()
weekly_scores

In [None]:
fig = px.line(weekly_scores, x='gameweek', y='expected_points', color='team', markers=True,
              title="Expected Points per Game Week",
              labels={"expected_points": "Expected Points", "gameweek": "Game Week"})

fig.show()

In [None]:
# Get unique teams
teams = np.sort(weekly_scores['team'].unique())
default_team = teams[0]  # Default highlighted team

# Create figure
fig = go.Figure()

for team in teams:
    fig.add_trace(go.Scatter(
        x=weekly_scores[weekly_scores['team'] == team]['gameweek'],
        y=weekly_scores[weekly_scores['team'] == team]['expected_points'],
        mode='lines+markers',
        name=team,
        line=dict(color='blue' if team == default_team else 'gray'),
        opacity=1 if team == default_team else 0.3,
        visible=True
    ))

# Create dropdown menu
dropdown_buttons = []
for selected_team in teams:
    button = {
        "label": selected_team,
        "method": "update",
        "args": [
            {
                "opacity": [1 if team == selected_team else 0.3 for team in teams],
                "line": [dict(color="blue" if team == selected_team else "gray") for team in teams],
            },
            {"title": f"Expected Points per Game Week - {selected_team}"}
        ]
    }
    dropdown_buttons.append(button)

# Add dropdown menu
fig.update_layout(
    title=f"Expected Points per Game Week - {default_team}",
    xaxis_title="Game Week",
    yaxis_title="Expected Points",
    updatemenus=[{
        "buttons": dropdown_buttons,
        "direction": "down",
        "showactive": True,
    }]
)

fig.show()

In [None]:
weekly_scores['3_week_rolling_sum_xp'] = weekly_scores.groupby("team")["expected_points"].rolling(3).sum().reset_index(level=0, drop=True)
weekly_scores

In [None]:
# Get unique teams
teams = np.sort(weekly_scores['team'].unique())
default_team = teams[0]  # Default highlighted team

# Create figure
fig = go.Figure()

for team in teams:
    fig.add_trace(go.Scatter(
        x=weekly_scores[weekly_scores['team'] == team]['gameweek'],
        y=weekly_scores[weekly_scores['team'] == team]['3_week_rolling_sum_xp'],
        mode='lines+markers',
        name=team,
        line=dict(color='blue' if team == default_team else 'gray'),
        opacity=1 if team == default_team else 0.3,
        visible=True
    ))

# Create dropdown menu
dropdown_buttons = []
for selected_team in teams:
    button = {
        "label": selected_team,
        "method": "update",
        "args": [
            {
                "opacity": [1 if team == selected_team else 0.3 for team in teams],
                "line": [dict(color="blue" if team == selected_team else "gray") for team in teams],
            },
            {"title": f"3-week Rolling Sum of Expected Points - {selected_team}"}
        ]
    }
    dropdown_buttons.append(button)

# Add dropdown menu
fig.update_layout(
    title=f"3-week Rolling Sum of Expected Points - {default_team}",
    xaxis_title="Gameweek",
    yaxis_title="Expected Points",
    updatemenus=[{
        "buttons": dropdown_buttons,
        "direction": "down",
        "showactive": True,
    }]
)

fig.show()