In [None]:
latest_gameweek = 29

In [None]:
import pandas as pd
import numpy as np
from pathlib import Path
import math
import sys

import catboost
#import shap

import plotly.graph_objects as go
import plotly.express as px
import seaborn as sns
import matplotlib.pyplot as plt
sns.set_style("darkgrid")

pd.set_option('display.max_columns', 300)
#pd.set_option('display.max_rows', 100)

In [None]:
# get login credentials for fetching team data
file_path = Path('../../login_credentials/fpl_login.json')
login_credentials = pd.read_json(file_path, typ='series')

In [None]:
# utility function for fetching team data
sys.path.append('../../')
from src.utils import fetch_my_team


# Data

In [None]:
filepath = Path(f'../data/predictions/gameweek{latest_gameweek}.csv')
projections = pd.read_csv(filepath, index_col=0)
display(projections.head())
display(projections.shape)

In [None]:
filepath = Path('../data/fpl_df.csv')
fpl_df = pd.read_csv(filepath, index_col=0, low_memory=False)
fpl_df = fpl_df[fpl_df.season=='23-24']
display(fpl_df.head())
display(fpl_df.shape)

In [None]:
df = fpl_df.groupby('name').last().reset_index()[['id', 'name', 'team_name', 'element_type', 'now_cost', 
                            'gameweek_minutes_ewm_20', 'points_per_game', 'total_points', 
                            'gameweek_xPoints_ewm_5', 'gameweek_xPoints_ewm_10', 'gameweek_xPoints_ewm_20', 'gameweek_xPoints_ewm_40']]
df['games_played'] = np.round(np.where(df['points_per_game']!=0, df['total_points'] / df['points_per_game'], 0),0)
df['price'] = df['now_cost'] / 10.0
df['value'] = df['gameweek_xPoints_ewm_20'] / df['price']
df['value_points'] = np.sqrt( df['gameweek_xPoints_ewm_20'] *  df['value'])
df

In [None]:
# drop duplicate players (some players get new spelling for their name during the season causing duplicates)
duplicate_ids = df.loc[df.id.duplicated(), 'id'].unique()
for id in duplicate_ids:
    ix = df.loc[df.id==id, 'games_played'].idxmin()
    df = df.drop(ix)

In [None]:
expected_points_next_10gw = (projections[projections.gameweek.isin( np.arange(latest_gameweek+1, latest_gameweek+11, 1) )]
 .groupby('name')
 .sum()
 )[['expected_points']].reset_index().rename(columns={'expected_points':'expected_points_next_10_GW'})

expected_points_next_5gw = (projections[projections.gameweek.isin( np.arange(latest_gameweek+1, latest_gameweek+6, 1) )]
 .groupby('name')
 .sum()
 )[['expected_points']].reset_index().rename(columns={'expected_points':'expected_points_next_5_GW'})

expected_points_next_10gw

In [None]:
df = df.merge(expected_points_next_10gw, on='name', how='left')
df = df.merge(expected_points_next_5gw, on='name', how='left')
display(df.head())

# Analysis

In [None]:
(df[(df.games_played>=5)]
        .sort_values(by='expected_points_next_10_GW', ascending=False)
        .head(20))[['name', 'team_name', 'price', 'games_played',
                    'points_per_game', 'total_points', 'gameweek_xPoints_ewm_20', 'expected_points_next_10_GW',
                    'value_points']]

In [None]:
df[df['name'].str.contains('Pickford')][['name', 'team_name', 'price', 'games_played',
                    'points_per_game', 'total_points', 'gameweek_xPoints_ewm_5', 'gameweek_xPoints_ewm_10',
                    'gameweek_xPoints_ewm_20', 'expected_points_next_10_GW', 'value_points']]

In [None]:
positions = ['GOALKEEPERS', 'DEFENDERS', 'MIDFIELDERS', 'FORWARDS']

for ix, position in enumerate(positions):
    print(position)
    display(
        (df[(df.games_played>=5) & (df.element_type==ix+1)]
        .sort_values(by='expected_points_next_10_GW', ascending=False) 
        .head(20))[['name', 'team_name', 'price', 'games_played',
                    'points_per_game', 'total_points', 'gameweek_xPoints_ewm_20',
                    'expected_points_next_10_GW', 'value_points']]
    )


In [None]:
display(
        (df[(df.games_played>=5) & (df.element_type==3) & (df.price<=6.6)]
        .sort_values(by='expected_points_next_10_GW', ascending=False) 
        .head(20))[['name', 'team_name', 'price', 'games_played',
                    'points_per_game', 'total_points', 'gameweek_xPoints_ewm_20',
                    'expected_points_next_10_GW', 'value_points']]
    )

In [None]:
fpl_df.loc[fpl_df['name'].str.contains('Gordon'), ['name', 'event_points', 'gameweek_xPoints', 'gameweek_xPoints_expanding_per90']] #André Onana

In [None]:
(projections[projections.gameweek==(latest_gameweek+1)]
 .groupby('name')
 .sum()
 .sort_values('expected_points', ascending=False).head(20))[['expected_points']]

In [None]:
(projections[(projections.gameweek.isin(np.arange(latest_gameweek+1, latest_gameweek+11)))]
 .groupby('name')
 .sum()
 .sort_values('expected_points', ascending=False).head(20))[['expected_points']]

In [None]:
projections[projections['name'].str.contains('Onana')].name.unique()

In [None]:
fpl_df.loc[fpl_df['name'].str.contains('Haaland'), ['name', 'gameweek', 'gameweek_minutes', 'gameweek_xPoints', ]]

# My team

In [None]:
my_team = fetch_my_team(login_credentials.user_name, login_credentials.password, login_credentials.team_id)

In [None]:
my_gameweek = latest_gameweek+1

my_starting_11_names = []
for pick in my_team['picks'][0:11]:
    my_starting_11_names.append( df.loc[df.id==pick['element'], 'name'].item() )

my_subs_names = []
for pick in my_team['picks'][11:]:
    my_subs_names.append( df.loc[df.id==pick['element'], 'name'].item() )

my_starting_11 = pd.DataFrame()
for my_name in my_starting_11_names:
    my_starting_11 = pd.concat([
                    my_starting_11, 
                    projections[(projections.gameweek==my_gameweek) & (projections.name.str.contains(my_name))]
                    ])

my_subs = pd.DataFrame()
for my_name in my_subs_names:
    my_subs = pd.concat([
                    my_subs, 
                    projections[(projections.gameweek==my_gameweek) & (projections.name.str.contains(my_name))]
                    ])

expected_points = my_starting_11['expected_points'].sum() + my_starting_11['expected_points'].max()
display(f'Expected points: {expected_points}')

display(my_starting_11[['name', 'element_type', 'home', 'expected_points']])
#display(my_starting_11.shape[0])

display(my_subs[['name', 'element_type', 'home', 'expected_points']])
#display(my_subs.shape[0])

In [None]:
# my_gameweek = latest_gameweek+1

# my_starting_11_names = ['Pickford', 'Zabarn', 'Bradley', 'Konsa', 'Salah', 'Saka', 'Pascal Gro', 'Palmer', 'Solanke',  
#                   'Haaland', 'Watkins']
# my_subs_names = ['Turner', 'Sarabia', 'Taylor', 'Estupi',]

# my_starting_11 = pd.DataFrame()
# for my_name in my_starting_11_names:
#     my_starting_11 = pd.concat([
#                     my_starting_11, 
#                     projections[(projections.gameweek==my_gameweek) & (projections.name.str.contains(my_name))]
#                     ])

# my_subs = pd.DataFrame()
# for my_name in my_subs_names:
#     my_subs = pd.concat([
#                     my_subs, 
#                     projections[(projections.gameweek==my_gameweek) & (projections.name.str.contains(my_name))]
#                     ])

# expected_points = my_starting_11['expected_points'].sum() + my_starting_11['expected_points'].max()
# display(f'Expected points: {expected_points}')

# display(my_starting_11[['name', 'element_type', 'home', 'expected_points']])
# display(my_starting_11.shape[0])

# display(my_subs[['name', 'element_type', 'home', 'expected_points']])
# display(my_subs.shape[0])

In [None]:
(projections[(projections.gameweek==(latest_gameweek+1))]
 .groupby('name')
 .sum()
 .sort_values('expected_points', ascending=False).head(20))[['now_cost', 'expected_points']]

In [None]:
#players = ['Pervis Estupiñán', 'Kieran Trippier']
#colors = ['red', 'white']
players = ['Heung-Min Son', 'Bruno Borges Fernandes']
colors = ['white', 'red']

fig = go.Figure()
for count, player in enumerate(players):

    my_projections = projections[projections['name']==player].sort_values(by='gameweek')

    fig.add_trace(
        go.Scatter(
            x=np.unique(my_projections['gameweek']),
            y=my_projections.groupby('gameweek').sum()['expected_points'].cumsum(),
            mode="markers+lines",
            marker=dict(color=colors[count]),
            name=player + ' - projected',
            #hovertext=('Opponent: ' + my_projections['opp_team'] + 
            #           ', was_home: ' + my_projections['was_home'].astype(str)),
            showlegend=True,
            ),
    )

fig.update_layout(
    #title="",
    template='plotly_dark',
    xaxis_title="gameweek",
    yaxis_title='projected_points',
    #showlegend=True
)

In [None]:
for player in players:
    my_projections = projections[projections['name']==player].sort_values(by='gameweek')
    print(player)
    display(my_projections[['opponent_team','home','gameweek', 'expected_points']].set_index('gameweek'))

In [None]:
fpl_df[fpl_df['name'].str.contains('Trippier')].groupby('gameweek').sum()

In [None]:
from scipy.ndimage import gaussian_filter

In [None]:
mode = 'gameweek'
#mode = 'cumsum'

players = ['Phil Foden', 'Jarrod Bowen']
colors = ['red', 'blue']
#players = ['Heung-Min Son', 'Bruno Borges Fernandes']
#colors = ['white', 'red']

fig = go.Figure()
for count, player in enumerate(players):

    my_past_data = fpl_df[fpl_df['name']==player].sort_values(by='gameweek')
    my_projections = projections[projections['name']==player].sort_values(by='gameweek')
    
    x_past = np.unique(my_past_data['gameweek'])
    x_future = np.unique(my_projections['gameweek'])

    if mode=='gameweek':
        y_past = my_past_data.groupby('gameweek').sum()['gameweek_xPoints_ewm_5']
        y_future = my_projections.groupby('gameweek').sum()['expected_points']
    elif mode=='cumsum':
        y_past = my_past_data.groupby('gameweek').sum()['gameweek_xPoints'].cumsum()
        y_future = y_past.iloc[-1] + my_projections.groupby('gameweek').sum()['expected_points'].cumsum()
    else:
        print('Choose a mode for the graph!')

    fig.add_trace(
        go.Scatter(
            x=x_past,
            y=y_past,
            mode="markers+lines",
            marker=dict(color=colors[count]),
            name=player,            
            showlegend=True,
            ),
    )

    fig.add_trace(
        go.Scatter(
            x=x_future,
            y=y_future,
            mode="markers+lines",
            marker=dict(color=colors[count]),
            name=player,            
            showlegend=False,
            ),
    )

fig.update_layout(
    #title="",
    template='plotly_dark',
    xaxis_title="gameweek",
    yaxis_title='expected points',
    #showlegend=True
)

In [None]:
players = ['Kieran Trippier', 'Trent Alexander-Arnold']

#my_fill = ['tozeroy', 'tonexty']
my_fill = ['tonexty', 'tonexty']
#players = ['Heung-Min Son', 'Bruno Borges Fernandes']
#colors = ['white', 'red']

fig = go.Figure()
for count, player in enumerate(players):

    my_past_data = fpl_df[fpl_df['name']==player].sort_values(by='gameweek')
    my_projections = projections[projections['name']==player].sort_values(by='gameweek')
    
    x_past = list(np.unique(my_past_data['gameweek']))
    x_future = list(np.unique(my_projections['gameweek']))
    my_x = x_past + x_future

    y_past = list(my_past_data.groupby('gameweek').sum()['gameweek_xPoints'])
    y_future = list(my_projections.groupby('gameweek').sum()['expected_points'])
    my_y = y_past + y_future
    my_y_filtered = gaussian_filter(y_past + y_future, sigma=2, mode='nearest')

    fig.add_trace(
        go.Scatter(
            x=my_x,
            y=my_y_filtered,
            mode="markers+lines",
            marker=dict(color=colors[count]),  
            fill=my_fill[count],    
            name=player,            
            showlegend=True,
            ),
    )

    if len(players)==1:
        fig.add_trace(
            go.Scatter(
                x=my_x,
                y=my_y,
                mode="markers",
                marker=dict(color='white'),
                name=player,            
                showlegend=False,
                ),
        )

fig.add_vline(x=latest_gameweek+0.5,)

fig.update_layout(
    #title="",
    template='plotly_dark',
    xaxis_title="gameweek",
    yaxis_title='expected points',
    #showlegend=True
)

In [None]:
import math

In [None]:
player = 'Cole Palmer'
my_past_data = fpl_df[fpl_df['name']==player].sort_values(by='gameweek')
y_past = list(my_past_data.groupby('gameweek').sum()['gameweek_xPoints'])
#min([math.floor(x) for x in y_past])
max([math.ceil(x) for x in y_past])

In [None]:
player = 'Erling Haaland'
my_past_data = fpl_df[fpl_df['name']==player].sort_values(by='gameweek')
my_xpoints = list(my_past_data.groupby('gameweek').sum()['gameweek_xPoints'])
my_points = list(my_past_data.groupby('gameweek').sum()['event_points'])

bin_size = 1
x_start = min(min([math.floor(x) for x in my_xpoints]), min([math.floor(x) for x in my_points]))
x_end = max(max([math.ceil(x) for x in my_xpoints]), max([math.ceil(x) for x in my_points]))

fig = go.Figure()

fig.add_trace(
        go.Histogram(
            x=my_xpoints,   
            xbins=dict(start=x_start, end=x_end, size=bin_size),
            name=player,            
            showlegend=True,
            ),
    )

fig.add_trace(
        go.Histogram(
            x=my_points,   
            xbins=dict(start=x_start, end=x_end, size=bin_size),
            name=player,            
            showlegend=True,
            ),
    )

fig.update_layout(barmode='overlay')
fig.update_traces(opacity=0.75)

In [None]:
players = ['Kieran Trippier', 'Trent Alexander-Arnold']
fpl_df[(fpl_df['name']==players[0]) | (fpl_df['name']==players[1])][['name','gameweek']]

In [None]:
players = ['Kieran Trippier', 'Trent Alexander-Arnold']
fpl_df[(fpl_df['name']==players[0]) | (fpl_df['name']==players[1])].groupby(['name','gameweek']).sum()[['gameweek_xPoints', 'event_points']].reset_index()

In [None]:
players = ['Kieran Trippier', 'Trent Alexander-Arnold']
mode = 'gameweek_xPoints' 
#mode = 'event_points'

if len(players)==1:
    my_past_data = (
        fpl_df[fpl_df['name']==players[0]]
        .groupby('gameweek')
        .sum()[['gameweek_xPoints', 'event_points', 'name']] 
    )   
elif len(players)==2:
    my_past_data = (
        fpl_df[(fpl_df['name']==players[0]) | (fpl_df['name']==players[1])]
        .groupby(['name','gameweek'])
        .sum()[['gameweek_xPoints', 'event_points']]
        .reset_index()
    )

sns.swarmplot(my_past_data, x=mode, orient='h', size=10, hue='name')
plt.xlabel(mode)
plt.show()

## SHAP

In [None]:
y_past.iloc[-1]

In [None]:
model_path = Path('../models/catboost_20240102-210118.cbm')
model = catboost.CatBoostRegressor()
model.load_model(model_path)

In [None]:
features_no_shift = ['element_type', 'home', 'opponent_xG_ewm_5', 'opponent_xG_ewm_10',
       'opponent_xG_ewm_20', 'opponent_xG_ewm_40', 'opponent_xGA_ewm_5',
       'opponent_xGA_ewm_10', 'opponent_xGA_ewm_20',
       'opponent_xGA_ewm_40', ]

features_shift = ['corners_and_indirect_freekicks_order', 'creativity_rank', 
       'direct_freekicks_order', 'ict_index_rank', 'influence_rank',
       'minutes', 'now_cost', 'penalties_order', 'points_per_game', 
       'selected_by_percent', 'threat_rank',
       'team_xG_ewm_5', 'team_xG_ewm_10', 'team_xG_ewm_20',
       'team_xG_ewm_40', 'team_xGA_ewm_5', 'team_xGA_ewm_10',
       'team_xGA_ewm_20', 'team_xGA_ewm_40', 
       'gameweek_assists_ewm_5', 'gameweek_bps_ewm_5',
       'gameweek_creativity_ewm_5', 'event_points_ewm_5',
       'gameweek_goals_scored_ewm_5', 'gameweek_goals_conceded_ewm_5',
       'gameweek_saves_ewm_5', 'gameweek_threat_ewm_5',
       'gameweek_xG_ewm_5', 'gameweek_xA_ewm_5', 'gameweek_xGA_ewm_5',
       'gameweek_minutes_ewm_5', 'gameweek_xPoints_ewm_5',
       'gameweek_assists_ewm_10', 'gameweek_bps_ewm_10',
       'gameweek_creativity_ewm_10', 'event_points_ewm_10',
       'gameweek_goals_scored_ewm_10', 'gameweek_goals_conceded_ewm_10',
       'gameweek_saves_ewm_10', 'gameweek_threat_ewm_10',
       'gameweek_xG_ewm_10', 'gameweek_xA_ewm_10', 'gameweek_xGA_ewm_10',
       'gameweek_minutes_ewm_10', 'gameweek_xPoints_ewm_10',
       'gameweek_assists_ewm_20', 'gameweek_bps_ewm_20',
       'gameweek_creativity_ewm_20', 'event_points_ewm_20',
       'gameweek_goals_scored_ewm_20', 'gameweek_goals_conceded_ewm_20',
       'gameweek_saves_ewm_20', 'gameweek_threat_ewm_20',
       'gameweek_xG_ewm_20', 'gameweek_xA_ewm_20', 'gameweek_xGA_ewm_20',
       'gameweek_minutes_ewm_20', 'gameweek_xPoints_ewm_20',
       'gameweek_assists_ewm_40', 'gameweek_bps_ewm_40',
       'gameweek_creativity_ewm_40', 'event_points_ewm_40',
       'gameweek_goals_scored_ewm_40', 'gameweek_goals_conceded_ewm_40',
       'gameweek_saves_ewm_40', 'gameweek_threat_ewm_40',
       'gameweek_xG_ewm_40', 'gameweek_xA_ewm_40', 'gameweek_xGA_ewm_40',
       'gameweek_minutes_ewm_40', 'gameweek_xPoints_ewm_40',
       'gameweek_assists_expanding', 'gameweek_bps_expanding',
       'gameweek_creativity_expanding', 'event_points_expanding',
       'gameweek_goals_scored_expanding',
       'gameweek_goals_conceded_expanding', 'gameweek_saves_expanding',
       'gameweek_threat_expanding', 'gameweek_xG_expanding',
       'gameweek_xA_expanding', 'gameweek_xGA_expanding',
       'gameweek_minutes_expanding', 'gameweek_xPoints_expanding',
       'gameweek_assists_expanding_per90', 'gameweek_bps_expanding_per90',
       'gameweek_creativity_expanding_per90',
       'event_points_expanding_per90',
       'gameweek_goals_scored_expanding_per90',
       'gameweek_goals_conceded_expanding_per90',
       'gameweek_saves_expanding_per90',
       'gameweek_threat_expanding_per90', 'gameweek_xG_expanding_per90',
       'gameweek_xA_expanding_per90', 'gameweek_xGA_expanding_per90',
       'gameweek_xPoints_expanding_per90', 'xG_overperformance'
    ]

features = features_no_shift + features_shift

target = ['event_points']

In [None]:
my_player = 'Konsa'
projections.loc[projections['name'].str.contains(my_player), ['name', 'team_name', 'home', 'expected_points']]

In [None]:
explainer = shap.TreeExplainer(model)
shap_values = explainer(projections.loc[projections['name'].str.contains(my_player), features]);

In [None]:
shap.plots.bar(shap_values, max_display=20)