In [None]:
import penaltyblog as pb
import pandas as pd
import numpy as np
import datetime as dt
import dataframe_image as dfi
from random import choices
import json
import requests
from tqdm import tqdm

import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go

pd.set_option('display.max_columns', None)

In [None]:
import sys
sys.path.append('../')
from src.utils import (simulate_match, analyse_match, table_bonus_check, calculate_manager_points, simulate_season_fpl)

# Data

In [None]:
#fb = pb.scrapers.FootballData("ENG Premier League", "2023-2024")
#df_list = [fb.get_fixtures()]
#fb = pb.scrapers.FootballData("ENG Premier League", "2024-2025")
#df_list.append(fb.get_fixtures())

#df = pd.concat(df_list)
#display(df)

In [None]:
matches24_25 = pd.read_html('https://fbref.com/en/comps/9/schedule/Premier-League-Scores-and-Fixtures')[0]
matches23_24 = pd.read_html('https://fbref.com/en/comps/9/2023-2024/schedule/2023-2024-Premier-League-Scores-and-Fixtures')[0]
fixtures_fbref = pd.concat([matches23_24, matches24_25], ignore_index=True)
fixtures_fbref['Date'] = pd.to_datetime(fixtures_fbref['Date'])

df = fixtures_fbref[fixtures_fbref['Score'].notnull()]
df = df.reset_index()

df['goals_home'] = df['Score'].apply(lambda x: x.split('–')[0])
df['goals_away'] = df['Score'].apply(lambda x: x.split('–')[1])

df.rename(columns={'Home': 'team_home', 'Away': 'team_away', 'Date': 'date'}, inplace=True)

df

In [None]:
current_date = dt.datetime.today()
df['days_since'] = df['date'].apply(lambda x: (current_date-x).days)
df = df[df.days_since <= 365].copy()
df

In [None]:
league_table = pd.read_html('https://fbref.com/en/comps/9/Premier-League-Stats')[0]
#league_table['Squad'] = league_table['Squad'].replace(team_name_dict)
league_table

In [None]:
teams = np.sort(league_table['Squad'].unique())
teams

In [None]:
fpl_fixtures_data = json.loads(requests.get('https://fantasy.premierleague.com/api/fixtures/').text)
fpl_fixtures = pd.DataFrame(fpl_fixtures_data)
fpl_fixtures['home_team'] = [teams[i] for i in fpl_fixtures['team_h']-1]
fpl_fixtures['away_team'] = [teams[i] for i in fpl_fixtures['team_a']-1]
#fpl_fixtures
display(fpl_fixtures.shape)

In [None]:
fixtures = fpl_fixtures[fpl_fixtures['team_h_score'].isnull()].reset_index(drop=True)
#fixtures
display(fixtures.shape)

# Model

In [None]:
xi = 0.0018
weights = pb.models.dixon_coles_weights(df["date"], xi)

clf = pb.models.DixonColesGoalModel(
    df["goals_home"], df["goals_away"], df["team_home"], df["team_away"], weights
)
clf.fit()

In [None]:
homeTeam = "Wolves"
awayTeam = "Tottenham"
analyse_match(clf, homeTeam, awayTeam)

# Team ratings

In [None]:
#teams = "Arsenal,Aston Villa,Bournemouth,Brentford,Brighton,Chelsea,Crystal Palace,Everton,Fulham,Ipswich Town,Leicester City,Liverpool,Manchester City,Manchester Utd,Newcastle Utd,Nott'ham Forest,Southampton,Tottenham,West Ham,Wolves" 
#teams = teams.split(',')

In [None]:
# Extract attack and defense parameters
params = clf.get_params()
attack_params = {k: v for k, v in params.items() if k.startswith('attack_')}
defense_params = {k: v for k, v in params.items() if k.startswith('defence_')}

# Compute median values
median_attack = np.median(list(attack_params.values()))
median_defense = np.median(list(defense_params.values()))

# Find teams with median values
median_attack_team = [team.split('attack_')[1] for team, value in attack_params.items() if value == median_attack]
median_defense_team = [team.split('defence_')[1] for team, value in defense_params.items() if value == median_defense]

print(f"Median attack value: {median_attack}, Team(s): {median_attack_team}")
print(f"Median defense value: {median_defense}, Team(s): {median_defense_team}")

In [None]:
ratings = []
for team in teams:
    team_attack_rating_home =  clf.predict(team, median_defense_team[0]).home_goal_expectation
    team_attack_rating_away = clf.predict(median_defense_team[0], team).away_goal_expectation
    team_attack_rating = np.mean((team_attack_rating_home, team_attack_rating_away))
    
    team_defense_rating_home = clf.predict(team, median_attack_team[0]).away_goal_expectation
    team_defense_rating_away = clf.predict(median_attack_team[0], team).home_goal_expectation
    team_defense_rating = np.mean((team_defense_rating_home, team_defense_rating_away))
    
    team_goal_difference_rating = team_attack_rating - team_defense_rating
    ratings.append((team, team_attack_rating, team_defense_rating, team_goal_difference_rating))

ratings_df = pd.DataFrame(ratings, columns=['team', 'attack_rating', 'defense_rating', 'goal_difference_rating']).sort_values(by='goal_difference_rating', ascending=False).reset_index(drop=True)
ratings_df.index += 1

#dfi.export(ratings_df, "../ratings.png", table_conversion='matplotlib',)

display(ratings_df)

# Simulate season

In [None]:
#team_name_dict = {"Nott'ham Forest": "Nott'm Forest",
#                  'Manchester City': 'Man City',
#                  'Manchester Utd': 'Man United',
#                  'Newcastle Utd': 'Newcastle',
#                  'Ipswich Town': 'Ipswich',
#                  'Leicester City': 'Leicester',
#                  }

In [None]:
nr_simulations = 1000
simulation_results = []
manager_results = []
final_gameweek = 38
for i in tqdm(range(nr_simulations), desc='Simulating...'):
    simulated_table = league_table[['Rk', 'Squad', 'MP', 'W', 'D', 'L', 'GF', 'GA', 'GD', 'Pts']].copy()
    simulated_table, manager_xp = simulate_season_fpl(simulated_table, fixtures, clf, final_gameweek)
    simulated_table['simulation_nr'] = i
    simulation_results.append(simulated_table)

    manager_df = pd.DataFrame(manager_xp, columns=['gameweek', 'team', 'expected_points'])
    manager_df['simulation_nr'] = i
    manager_results.append(manager_df)

print('Done!')

simulation_results_df = pd.concat(simulation_results).reset_index(drop=True)
manager_results_df = pd.concat(manager_results).reset_index(drop=True)

In [None]:
result_matrix = (
    simulation_results_df.groupby(['Squad', 'Rk'])
    .size()
    .unstack(fill_value=0)
    .reindex(columns=range(1, 21), fill_value=0)  # Ensure columns go from 1 to 20
)

# normalize to percentage
result_matrix = 100 * (result_matrix / nr_simulations)

In [None]:
# Reorder the matrix based on average final league position
sorted_teams = simulation_results_df.groupby(['Squad'])['Rk'].mean().sort_values().index
sorted_matrix = result_matrix.loc[sorted_teams]

# Plot the reordered heatmap
plt.figure(figsize=(10, 6))
sns.heatmap(sorted_matrix, annot=True, cmap="Blues", linewidths=0.5, cbar_kws={'label': 'Probability'})

timestamp = dt.datetime.now().strftime("%d.%m.%Y")
if final_gameweek == 38:
    plt.title(f"Distribution of Final League Positions \n(forecast on {timestamp} with a Dixon-Coles model)")
else:
    plt.title(f"Distribution of League Positions after gameweek {final_gameweek} \n(forecast on {timestamp} with a Dixon-Coles model)")
plt.xlabel("Final League Position")
plt.ylabel("Team")

#plt.savefig('league_distribution.png')

plt.show()

# FPL Assistant Manager analysis

In [None]:
weekly_scores = (manager_results_df.groupby(['gameweek', 'team']).sum() / nr_simulations).reset_index()

In [None]:
# fill weeks with no game with 0 manager points
# Get all unique gameweeks and teams
all_gameweeks = range(int(weekly_scores['gameweek'].min()), int(weekly_scores['gameweek'].max()) + 1)
all_teams = weekly_scores['team'].unique()

# Create a MultiIndex for all possible gameweek-team combinations
idx = pd.MultiIndex.from_product([all_gameweeks, all_teams], names=['gameweek', 'team'])

# Reindex the DataFrame to include missing gameweeks for each team
weekly_scores = weekly_scores.set_index(['gameweek', 'team']).reindex(idx, fill_value=0).reset_index()

weekly_scores[["rolling_expected_points"]] = weekly_scores.groupby("team").rolling(3, on='gameweek').sum().reset_index(level=0, drop=True)[["expected_points"]]
#weekly_scores[["rolling_quantile_10", "rolling_quantile_30", "rolling_expected_points", "rolling_quantile_70", "rolling_quantile_90"]] = weekly_scores.groupby("team").rolling(3, on='gameweek').sum().reset_index(level=0, drop=True)[["quantile_10", "quantile_30", "expected_points", "quantile_70", "quantile_90"]]

weekly_scores

In [None]:
# Get unique teams
teams = np.sort(weekly_scores['team'].unique())
default_team = teams[0]  # Default highlighted team

# Create figure
fig = go.Figure()

for team in teams:
    fig.add_trace(go.Scatter(
        x=weekly_scores[weekly_scores['team'] == team]['gameweek'],
        y=weekly_scores[weekly_scores['team'] == team]['expected_points'],
        mode='lines+markers',
        name=team,
        line=dict(color='blue' if team == default_team else 'gray'),
        opacity=1 if team == default_team else 0.3,
        visible=True
    ))

# Create dropdown menu
dropdown_buttons = []
for selected_team in teams:
    button = {
        "label": selected_team,
        "method": "update",
        "args": [
            {
                "opacity": [1 if team == selected_team else 0.3 for team in teams],
                "line": [dict(color="blue" if team == selected_team else "gray") for team in teams],
            },
            {"title": f"Expected Points per Game Week - {selected_team}"}
        ]
    }
    dropdown_buttons.append(button)

# Add dropdown menu
fig.update_layout(
    title=f"Expected Points per Game Week - {default_team}",
    xaxis_title="Game Week",
    yaxis_title="Expected Points",
    updatemenus=[{
        "buttons": dropdown_buttons,
        "direction": "down",
        "showactive": True,
    }]
)

fig.show()

In [None]:
# Get unique teams
teams = np.sort(weekly_scores['team'].unique())
default_team = teams[0]  # Default highlighted team

# Create figure
fig = go.Figure()

for team in teams:
    fig.add_trace(go.Scatter(
        x=weekly_scores[weekly_scores['team'] == team]['gameweek'],
        y=weekly_scores[weekly_scores['team'] == team]['rolling_expected_points'],
        mode='lines+markers',
        name=team,
        line=dict(color='blue' if team == default_team else 'gray'),
        opacity=1 if team == default_team else 0.3,
        visible=True
    ))

# Create dropdown menu
dropdown_buttons = []
for selected_team in teams:
    button = {
        "label": selected_team,
        "method": "update",
        "args": [
            {
                "opacity": [1 if team == selected_team else 0.3 for team in teams],
                "line": [dict(color="blue" if team == selected_team else "gray") for team in teams],
            },
            {"title": f"3-week Rolling Sum of Expected Points - {selected_team}"}
        ]
    }
    dropdown_buttons.append(button)

# Add dropdown menu
fig.update_layout(
    title=f"3-week Rolling Sum of Expected Points - {default_team}",
    xaxis_title="Gameweek",
    yaxis_title="Expected Points",
    updatemenus=[{
        "buttons": dropdown_buttons,
        "direction": "down",
        "showactive": True,
    }]
)

fig.show()