- Need to add data processing for gameweeks where one team has multiple games. This needs to be done for the "Add team data to FPL data" section. Should be doable utilizing fpl_df.data_retrieved to pick right games (the previous game before the given date).
- Fix Son first_name, second_name issue

In [1]:
shift_param = 1

In [2]:
import pandas as pd
import numpy as np

from pathlib import Path
import os
import datetime as dt
import pickle

from src.utils import fetch_latest_fpl_data

from sklearn.linear_model import LogisticRegression
from scipy.stats import poisson

import matplotlib.pyplot as plt
import plotly.express as px

pd.set_option('display.max_columns', 500)
pd.set_option('display.max_rows', 100)

In [3]:
# model for estimating bonus points based on gameweek bps
model_path = Path(f"../models/logistic_regression_for_bonus_points.pkl")
with open(model_path, "rb") as f:
    clf = pickle.load(f)

https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


# Functions

In [4]:
def fpl_data_processing(df, columns):

    xg_data = []
    xa_data = []
    xga_data = []
    for ix, row in df.iterrows():
        my_gameweek = row['gameweek']
        xg_data.append( row[f'xG_week{my_gameweek}'] )
        xa_data.append( row[f'xA_week{my_gameweek}'] )
        xga_data.append( row[f'xGA_week{my_gameweek}'] )

    df['gameweek_xG'] = xg_data
    df['gameweek_xA'] = xa_data
    df['gameweek_xGA'] = xga_data

    df_new = df[columns].copy()

    return df_new

In [5]:
def my_fill_na(x, gameweek_col, diff_col):
    '''Fill nan values for first items for grouped variables where diff is calculated. But also don't fill for season 22-23,
    where data is missing for a number of weeks at the beginning of the season.'''
    my_value = x[diff_col] if (np.isnan(x[gameweek_col])) & (x['minutes']<=90) else x[gameweek_col]
    return my_value
    

In [6]:
def calculate_xPoints(x,clf):
    """Expected points for a given gameweek given underlying stats for that gameweek."""

    clean_sheet_points = np.array([4,4,1,0])
    goal_points = np.array([6,6,5,4])

    # calculate expexted points
    points_played = np.array([1 if x['gameweek_minutes']>0 else 0])
    points_played_over_60 = np.array([1 if x['gameweek_minutes']>=60 else 0])
    points_xG = goal_points[x['element_type']-1] * x['gameweek_xG']
    points_xA = x['gameweek_xA'] * 3
    clean_sheet_probability = np.array(poisson.pmf(0,x['team_xGA']))
    points_clean_sheet = [clean_sheet_points[x['element_type']-1] * clean_sheet_probability if x['gameweek_minutes']>=60 else 0]
    points_saves = x['gameweek_saves'] // 3
    points_penalty_saves = x['gameweek_penalties_saved'] * 5 * 0.21 #points for save times approx. probability of penalty save
    #penalty_for_penalty_miss = x['Performance_PKatt'] * (-2*0.21) # this data only on fbref
    # estimate bonus points
    if not np.isnan(x['gameweek_bps']):
        y_pred_prob = clf.predict_proba(np.array(x['gameweek_bps']).reshape(-1, 1))
    else:
        # return nan if bonus points can't be estimated 
        return np.nan
    points_bonus = np.matmul(y_pred_prob, np.array([0,1,2,3]).reshape((4,1)))
    
    # penalty for possible points deductions based on goals conceded
    xGA = x['team_xGA']
    # calculate penalty
    xGA_conceded_penalty = -(poisson.pmf(2,xGA)+poisson.pmf(3,xGA))-(poisson.pmf(4,xGA)+poisson.pmf(5,xGA))-(poisson.pmf(6,xGA)+poisson.pmf(7,xGA))-(poisson.pmf(8,xGA)+poisson.pmf(9,xGA)-(poisson.pmf(10,xGA)+poisson.pmf(11,xGA)))
    # apply penalty only to GK and DEF
    if (x['element_type']==1) | (x['element_type']==2):
        xGA_conceded_penalty = xGA_conceded_penalty
    else:
        xGA_conceded_penalty = 0
    # scale penalty with playing time
    xGA_conceded_penalty = (x['gameweek_minutes'] / 90) * xGA_conceded_penalty

    penalty_for_cards = [-3 if x['gameweek_red_cards']==1 else -1 if x['gameweek_yellow_cards']==1 else 0]
    penalty_for_own_goal = -2 * x['gameweek_own_goals']

    # add up all point components
    total_points = float(points_played + points_played_over_60 + points_xG + points_xA + points_clean_sheet + points_saves +\
                    points_penalty_saves + points_bonus + xGA_conceded_penalty +\
                    penalty_for_cards + penalty_for_own_goal)
    
    return total_points

# Fetch data

In [7]:
# fpl data from previous seasons
filepath = Path('../data/modeling/fpl_df.csv')
fpl_df = pd.read_csv(filepath, index_col=0)
display(fpl_df.head())
display(fpl_df.shape)

Unnamed: 0,assists,bonus,bps,clean_sheets,corners_and_indirect_freekicks_order,creativity,creativity_rank,creativity_rank_type,direct_freekicks_order,dreamteam_count,element_type,event_points,first_name,goals_conceded,goals_scored,ict_index,ict_index_rank,ict_index_rank_type,influence,influence_rank,influence_rank_type,minutes,now_cost,own_goals,penalties_missed,penalties_order,penalties_saved,points_per_game,red_cards,saves,second_name,selected_by_percent,threat,threat_rank,threat_rank_type,total_points,web_name,yellow_cards,team_name,gameweek,season,gameweek_xG,gameweek_xA,gameweek_xGA,gameweek_minutes,team_xG,team_xGA,team_xG_ewm_5,team_xG_ewm_10,team_xG_ewm_20,team_xG_ewm_40,team_xGA_ewm_5,team_xGA_ewm_10,team_xGA_ewm_20,team_xGA_ewm_40,opponent_xG,opponent_xGA,opponent_xG_ewm_5,opponent_xG_ewm_10,opponent_xG_ewm_20,opponent_xG_ewm_40,opponent_xGA_ewm_5,opponent_xGA_ewm_10,opponent_xGA_ewm_20,opponent_xGA_ewm_40,home,gameweek_assists,gameweek_bps,gameweek_creativity,gameweek_goals_scored,gameweek_goals_conceded,gameweek_own_goals,gameweek_penalties_saved,gameweek_red_cards,gameweek_saves,gameweek_threat,gameweek_yellow_cards,gameweek_xPoints,gameweek_assists_ewm_5,gameweek_bps_ewm_5,gameweek_creativity_ewm_5,event_points_ewm_5,gameweek_goals_scored_ewm_5,gameweek_goals_conceded_ewm_5,gameweek_saves_ewm_5,gameweek_threat_ewm_5,gameweek_xG_ewm_5,gameweek_xA_ewm_5,gameweek_xGA_ewm_5,gameweek_minutes_ewm_5,gameweek_xPoints_ewm_5,gameweek_assists_ewm_10,gameweek_bps_ewm_10,gameweek_creativity_ewm_10,event_points_ewm_10,gameweek_goals_scored_ewm_10,gameweek_goals_conceded_ewm_10,gameweek_saves_ewm_10,gameweek_threat_ewm_10,gameweek_xG_ewm_10,gameweek_xA_ewm_10,gameweek_xGA_ewm_10,gameweek_minutes_ewm_10,gameweek_xPoints_ewm_10,gameweek_assists_ewm_20,gameweek_bps_ewm_20,gameweek_creativity_ewm_20,event_points_ewm_20,gameweek_goals_scored_ewm_20,gameweek_goals_conceded_ewm_20,gameweek_saves_ewm_20,gameweek_threat_ewm_20,gameweek_xG_ewm_20,gameweek_xA_ewm_20,gameweek_xGA_ewm_20,gameweek_minutes_ewm_20,gameweek_xPoints_ewm_20,gameweek_assists_ewm_40,gameweek_bps_ewm_40,gameweek_creativity_ewm_40,event_points_ewm_40,gameweek_goals_scored_ewm_40,gameweek_goals_conceded_ewm_40,gameweek_saves_ewm_40,gameweek_threat_ewm_40,gameweek_xG_ewm_40,gameweek_xA_ewm_40,gameweek_xGA_ewm_40,gameweek_minutes_ewm_40,gameweek_xPoints_ewm_40,gameweek_assists_expanding,gameweek_bps_expanding,gameweek_creativity_expanding,event_points_expanding,gameweek_goals_scored_expanding,gameweek_goals_conceded_expanding,gameweek_saves_expanding,gameweek_threat_expanding,gameweek_xG_expanding,gameweek_xA_expanding,gameweek_xGA_expanding,gameweek_minutes_expanding,gameweek_xPoints_expanding,gameweek_assists_expanding_per90,gameweek_bps_expanding_per90,gameweek_creativity_expanding_per90,event_points_expanding_per90,gameweek_goals_scored_expanding_per90,gameweek_goals_conceded_expanding_per90,gameweek_saves_expanding_per90,gameweek_threat_expanding_per90,gameweek_xG_expanding_per90,gameweek_xA_expanding_per90,gameweek_xGA_expanding_per90,gameweek_minutes_expanding_per90,gameweek_xPoints_expanding_per90,xG_overperformance
0,0,0,3,0,,0.0,493,188,4.0,0,2,1,David,0,0,0.0,497,188,0.0,490,188,1,55,0,0,,0,1.0,0,0,Luiz Moreira Marinho,0.9,0.0,479,186,1,David Luiz,0,Arsenal,2,20-21,0.0,0.0,1.9,1.0,1.4,1.9,1.8,1.8,1.8,1.8,0.2,0.2,0.2,0.2,1.9,1.4,1.1,1.1,1.1,1.1,1.5,1.5,1.5,1.5,1.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.993836,0.0,3.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.9,1.0,0.993836,0.0,3.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.9,1.0,0.993836,0.0,3.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.9,1.0,0.993836,0.0,3.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.9,1.0,0.993836,0.0,3.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.9,1.0,0.993836,0.0,270.0,0.0,90.0,0.0,0.0,0.0,0.0,0.0,0.0,171.0,90.0,89.445204,
1,1,0,39,1,,51.7,19,15,6.0,0,3,5,Pierre-Emerick,1,1,18.5,13,9,64.6,31,15,180,119,0,0,1.0,0,6.0,0,0,Aubameyang,32.9,69.0,16,6,12,Aubameyang,1,Arsenal,2,20-21,0.1,0.5,1.9,90.0,1.4,1.9,1.8,1.8,1.8,1.8,0.2,0.2,0.2,0.2,1.9,1.4,1.1,1.1,1.1,1.1,1.5,1.5,1.5,1.5,1.0,,,,,,,,,,,,,,,,5.0,,,,,0.1,0.5,1.9,90.0,,,,,5.0,,,,,0.1,0.5,1.9,90.0,,,,,5.0,,,,,0.1,0.5,1.9,90.0,,,,,5.0,,,,,0.1,0.5,1.9,90.0,,,,,5.0,,,,,0.1,0.5,1.9,90.0,,,,,5.0,,,,,0.1,0.5,1.9,90.0,,
2,0,2,58,1,,18.4,95,11,5.0,0,4,7,Alexandre,1,2,16.9,20,8,71.6,25,9,162,85,0,0,3.0,0,7.0,0,0,Lacazette,5.1,79.0,14,10,14,Lacazette,0,Arsenal,2,20-21,0.1,0.0,1.9,76.0,1.4,1.9,1.8,1.8,1.8,1.8,0.2,0.2,0.2,0.2,1.9,1.4,1.1,1.1,1.1,1.1,1.5,1.5,1.5,1.5,1.0,,,,,,,,,,,,,,,,7.0,,,,,0.1,0.0,1.9,76.0,,,,,7.0,,,,,0.1,0.0,1.9,76.0,,,,,7.0,,,,,0.1,0.0,1.9,76.0,,,,,7.0,,,,,0.1,0.0,1.9,76.0,,,,,7.0,,,,,0.1,0.0,1.9,76.0,,,,,8.289474,,,,,0.118421,0.0,2.25,90.0,,
3,0,1,47,1,,0.0,460,47,,0,1,2,Bernd,1,0,3.2,191,12,32.0,100,12,180,50,0,0,,0,4.5,0,4,Leno,9.0,0.0,435,47,9,Leno,0,Arsenal,2,20-21,0.0,0.0,1.9,90.0,1.4,1.9,1.8,1.8,1.8,1.8,0.2,0.2,0.2,0.2,1.9,1.4,1.1,1.1,1.1,1.1,1.5,1.5,1.5,1.5,1.0,,,,,,,,,,,,,,,,2.0,,,,,0.0,0.0,1.9,90.0,,,,,2.0,,,,,0.0,0.0,1.9,90.0,,,,,2.0,,,,,0.0,0.0,1.9,90.0,,,,,2.0,,,,,0.0,0.0,1.9,90.0,,,,,2.0,,,,,0.0,0.0,1.9,90.0,,,,,2.0,,,,,0.0,0.0,1.9,90.0,,
4,0,0,21,1,,23.9,74,49,2.0,0,3,2,Granit,1,0,6.8,88,44,23.2,133,47,167,55,0,0,,0,2.5,0,0,Xhaka,0.8,21.0,92,49,5,Xhaka,0,Arsenal,2,20-21,0.0,0.0,1.9,90.0,1.4,1.9,1.8,1.8,1.8,1.8,0.2,0.2,0.2,0.2,1.9,1.4,1.1,1.1,1.1,1.1,1.5,1.5,1.5,1.5,1.0,,,,,,,,,,,,,,,,2.0,,,,,0.0,0.0,1.9,90.0,,,,,2.0,,,,,0.0,0.0,1.9,90.0,,,,,2.0,,,,,0.0,0.0,1.9,90.0,,,,,2.0,,,,,0.0,0.0,1.9,90.0,,,,,2.0,,,,,0.0,0.0,1.9,90.0,,,,,2.0,,,,,0.0,0.0,1.9,90.0,,


(21223, 157)

In [8]:
# fpl data from this season
fpl_df_new = fetch_latest_fpl_data()
display(fpl_df_new.head())
display(fpl_df_new.shape)

Unnamed: 0,chance_of_playing_next_round,chance_of_playing_this_round,code,cost_change_event,cost_change_event_fall,cost_change_start,cost_change_start_fall,dreamteam_count,element_type,ep_next,ep_this,event_points,first_name,form,id,in_dreamteam,news,news_added,now_cost,photo,points_per_game,second_name,selected_by_percent,special,squad_number,status,team,team_code,total_points,transfers_in,transfers_in_event,transfers_out,transfers_out_event,value_form,value_season,web_name,minutes,goals_scored,assists,clean_sheets,goals_conceded,own_goals,penalties_saved,penalties_missed,yellow_cards,red_cards,saves,bonus,bps,influence,creativity,threat,ict_index,starts,expected_goals,expected_assists,expected_goal_involvements,expected_goals_conceded,influence_rank,influence_rank_type,creativity_rank,creativity_rank_type,threat_rank,threat_rank_type,ict_index_rank,ict_index_rank_type,corners_and_indirect_freekicks_order,corners_and_indirect_freekicks_text,direct_freekicks_order,direct_freekicks_text,penalties_order,penalties_text,expected_goals_per_90,saves_per_90,expected_assists_per_90,expected_goal_involvements_per_90,expected_goals_conceded_per_90,goals_conceded_per_90,now_cost_rank,now_cost_rank_type,form_rank,form_rank_type,points_per_game_rank,points_per_game_rank_type,selected_rank,selected_rank_type,starts_per_90,clean_sheets_per_90,team_name,name,gameweek,season,data_retrieved_datetime
0,,,226597,0,0,0,0,0,2,2.8,2.8,1,Gabriel,1.0,5,False,,,50,226597.jpg,1.0,dos Santos Magalhães,31.6,False,,a,1,3,1,26550,26550,115952,115952,0.2,0.2,Gabriel,4,0,0,0,0,0,0,0,0,0,0,0,2,0.2,0.0,0.0,0.0,0,0.0,0.0,0.0,0.02,196,72,554,210,546,209,559,210,,,,,,,0.0,0.0,0.0,0.0,0.45,0.0,255,31,218,77,218,77,7,3,0.0,0.0,Arsenal,Gabriel dos Santos Magalhães,1,23-24,2023-08-14 21:41:02.445217
1,,,219847,0,0,0,0,0,3,3.1,3.1,2,Kai,2.0,6,False,,,75,219847.jpg,2.0,Havertz,4.8,False,,a,1,3,2,4273,4273,25704,25704,0.3,0.3,Havertz,90,0,0,0,1,0,0,0,0,0,0,0,11,9.4,30.3,15.0,5.5,1,0.05,0.24,0.29,1.18,130,51,22,16,58,34,54,29,,,,,,,0.05,0.0,0.24,0.29,1.18,1.0,24,14,156,73,156,73,76,25,1.0,0.0,Arsenal,Kai Havertz,1,23-24,2023-08-14 21:41:02.445217
2,,,444145,0,0,0,0,0,3,3.3,3.3,5,Gabriel,5.0,12,False,,,80,444145.jpg,5.0,Martinelli Silva,15.1,False,,a,1,3,5,41226,41226,17905,17905,0.6,0.6,Martinelli,85,0,1,0,1,0,0,0,0,0,0,0,23,25.4,42.5,8.0,7.6,1,0.0,0.22,0.22,1.16,46,16,11,7,82,48,27,15,4.0,,3.0,,3.0,,0.0,0.0,0.23,0.23,1.23,1.06,16,9,51,16,51,16,29,8,1.06,0.0,Arsenal,Gabriel Martinelli Silva,1,23-24,2023-08-14 21:41:02.445217
3,,,205533,0,0,0,0,0,4,2.4,2.4,8,Eddie,8.0,13,False,,,55,205533.jpg,8.0,Nketiah,4.0,False,,a,1,3,8,68097,68097,8998,8998,1.5,1.5,Nketiah,72,1,0,1,0,0,0,0,0,0,0,2,26,30.2,5.9,36.0,7.2,1,0.21,0.01,0.22,0.3,35,11,110,10,16,9,31,11,,,,,,,0.26,0.0,0.01,0.27,0.38,0.0,121,33,17,5,17,5,88,16,1.25,1.25,Arsenal,Eddie Nketiah,1,23-24,2023-08-14 21:41:02.445217
4,,,184029,0,0,0,0,0,3,3.4,3.4,2,Martin,2.0,14,False,,,85,184029.jpg,2.0,Ødegaard,23.6,False,,a,1,3,2,31704,31704,64463,64463,0.2,0.2,Ødegaard,90,0,0,0,1,0,0,0,0,0,0,0,11,12.0,23.4,14.0,4.9,1,0.11,0.09,0.2,1.18,110,40,39,27,60,36,68,38,3.0,,1.0,,,,0.11,0.0,0.09,0.2,1.18,1.0,8,6,158,74,158,74,14,6,1.0,0.0,Arsenal,Martin Ødegaard,1,23-24,2023-08-14 21:41:02.445217


(302, 93)

In [9]:
# concatenate new fpl data with old
fpl_df = pd.concat([fpl_df, fpl_df_new], join='outer').reset_index(drop=True)
display(fpl_df.head())
display(fpl_df.shape)

Unnamed: 0,assists,bonus,bps,clean_sheets,corners_and_indirect_freekicks_order,creativity,creativity_rank,creativity_rank_type,direct_freekicks_order,dreamteam_count,element_type,event_points,first_name,goals_conceded,goals_scored,ict_index,ict_index_rank,ict_index_rank_type,influence,influence_rank,influence_rank_type,minutes,now_cost,own_goals,penalties_missed,penalties_order,penalties_saved,points_per_game,red_cards,saves,second_name,selected_by_percent,threat,threat_rank,threat_rank_type,total_points,web_name,yellow_cards,team_name,gameweek,season,gameweek_xG,gameweek_xA,gameweek_xGA,gameweek_minutes,team_xG,team_xGA,team_xG_ewm_5,team_xG_ewm_10,team_xG_ewm_20,team_xG_ewm_40,team_xGA_ewm_5,team_xGA_ewm_10,team_xGA_ewm_20,team_xGA_ewm_40,opponent_xG,opponent_xGA,opponent_xG_ewm_5,opponent_xG_ewm_10,opponent_xG_ewm_20,opponent_xG_ewm_40,opponent_xGA_ewm_5,opponent_xGA_ewm_10,opponent_xGA_ewm_20,opponent_xGA_ewm_40,home,gameweek_assists,gameweek_bps,gameweek_creativity,gameweek_goals_scored,gameweek_goals_conceded,gameweek_own_goals,gameweek_penalties_saved,gameweek_red_cards,gameweek_saves,gameweek_threat,gameweek_yellow_cards,gameweek_xPoints,gameweek_assists_ewm_5,gameweek_bps_ewm_5,gameweek_creativity_ewm_5,event_points_ewm_5,gameweek_goals_scored_ewm_5,gameweek_goals_conceded_ewm_5,gameweek_saves_ewm_5,gameweek_threat_ewm_5,gameweek_xG_ewm_5,gameweek_xA_ewm_5,gameweek_xGA_ewm_5,gameweek_minutes_ewm_5,gameweek_xPoints_ewm_5,gameweek_assists_ewm_10,gameweek_bps_ewm_10,gameweek_creativity_ewm_10,event_points_ewm_10,gameweek_goals_scored_ewm_10,gameweek_goals_conceded_ewm_10,gameweek_saves_ewm_10,gameweek_threat_ewm_10,gameweek_xG_ewm_10,gameweek_xA_ewm_10,gameweek_xGA_ewm_10,gameweek_minutes_ewm_10,gameweek_xPoints_ewm_10,gameweek_assists_ewm_20,gameweek_bps_ewm_20,gameweek_creativity_ewm_20,event_points_ewm_20,gameweek_goals_scored_ewm_20,gameweek_goals_conceded_ewm_20,gameweek_saves_ewm_20,gameweek_threat_ewm_20,gameweek_xG_ewm_20,gameweek_xA_ewm_20,gameweek_xGA_ewm_20,gameweek_minutes_ewm_20,gameweek_xPoints_ewm_20,gameweek_assists_ewm_40,gameweek_bps_ewm_40,gameweek_creativity_ewm_40,event_points_ewm_40,gameweek_goals_scored_ewm_40,gameweek_goals_conceded_ewm_40,gameweek_saves_ewm_40,gameweek_threat_ewm_40,gameweek_xG_ewm_40,gameweek_xA_ewm_40,gameweek_xGA_ewm_40,gameweek_minutes_ewm_40,gameweek_xPoints_ewm_40,gameweek_assists_expanding,gameweek_bps_expanding,gameweek_creativity_expanding,event_points_expanding,gameweek_goals_scored_expanding,gameweek_goals_conceded_expanding,gameweek_saves_expanding,gameweek_threat_expanding,gameweek_xG_expanding,gameweek_xA_expanding,gameweek_xGA_expanding,gameweek_minutes_expanding,gameweek_xPoints_expanding,gameweek_assists_expanding_per90,gameweek_bps_expanding_per90,gameweek_creativity_expanding_per90,event_points_expanding_per90,gameweek_goals_scored_expanding_per90,gameweek_goals_conceded_expanding_per90,gameweek_saves_expanding_per90,gameweek_threat_expanding_per90,gameweek_xG_expanding_per90,gameweek_xA_expanding_per90,gameweek_xGA_expanding_per90,gameweek_minutes_expanding_per90,gameweek_xPoints_expanding_per90,xG_overperformance,chance_of_playing_next_round,chance_of_playing_this_round,code,cost_change_event,cost_change_event_fall,cost_change_start,cost_change_start_fall,ep_next,ep_this,form,id,in_dreamteam,news,news_added,photo,special,squad_number,status,team,team_code,transfers_in,transfers_in_event,transfers_out,transfers_out_event,value_form,value_season,starts,expected_goals,expected_assists,expected_goal_involvements,expected_goals_conceded,corners_and_indirect_freekicks_text,direct_freekicks_text,penalties_text,expected_goals_per_90,saves_per_90,expected_assists_per_90,expected_goal_involvements_per_90,expected_goals_conceded_per_90,goals_conceded_per_90,now_cost_rank,now_cost_rank_type,form_rank,form_rank_type,points_per_game_rank,points_per_game_rank_type,selected_rank,selected_rank_type,starts_per_90,clean_sheets_per_90,name,data_retrieved_datetime
0,0,0,3,0,,0.0,493,188,4.0,0,2,1,David,0,0,0.0,497,188,0.0,490,188,1,55,0,0,,0,1.0,0,0,Luiz Moreira Marinho,0.9,0.0,479,186,1,David Luiz,0,Arsenal,2,20-21,0.0,0.0,1.9,1.0,1.4,1.9,1.8,1.8,1.8,1.8,0.2,0.2,0.2,0.2,1.9,1.4,1.1,1.1,1.1,1.1,1.5,1.5,1.5,1.5,1.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.993836,0.0,3.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.9,1.0,0.993836,0.0,3.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.9,1.0,0.993836,0.0,3.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.9,1.0,0.993836,0.0,3.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.9,1.0,0.993836,0.0,3.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.9,1.0,0.993836,0.0,270.0,0.0,90.0,0.0,0.0,0.0,0.0,0.0,0.0,171.0,90.0,89.445204,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1,1,0,39,1,,51.7,19,15,6.0,0,3,5,Pierre-Emerick,1,1,18.5,13,9,64.6,31,15,180,119,0,0,1.0,0,6.0,0,0,Aubameyang,32.9,69.0,16,6,12,Aubameyang,1,Arsenal,2,20-21,0.1,0.5,1.9,90.0,1.4,1.9,1.8,1.8,1.8,1.8,0.2,0.2,0.2,0.2,1.9,1.4,1.1,1.1,1.1,1.1,1.5,1.5,1.5,1.5,1.0,,,,,,,,,,,,,,,,5.0,,,,,0.1,0.5,1.9,90.0,,,,,5.0,,,,,0.1,0.5,1.9,90.0,,,,,5.0,,,,,0.1,0.5,1.9,90.0,,,,,5.0,,,,,0.1,0.5,1.9,90.0,,,,,5.0,,,,,0.1,0.5,1.9,90.0,,,,,5.0,,,,,0.1,0.5,1.9,90.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
2,0,2,58,1,,18.4,95,11,5.0,0,4,7,Alexandre,1,2,16.9,20,8,71.6,25,9,162,85,0,0,3.0,0,7.0,0,0,Lacazette,5.1,79.0,14,10,14,Lacazette,0,Arsenal,2,20-21,0.1,0.0,1.9,76.0,1.4,1.9,1.8,1.8,1.8,1.8,0.2,0.2,0.2,0.2,1.9,1.4,1.1,1.1,1.1,1.1,1.5,1.5,1.5,1.5,1.0,,,,,,,,,,,,,,,,7.0,,,,,0.1,0.0,1.9,76.0,,,,,7.0,,,,,0.1,0.0,1.9,76.0,,,,,7.0,,,,,0.1,0.0,1.9,76.0,,,,,7.0,,,,,0.1,0.0,1.9,76.0,,,,,7.0,,,,,0.1,0.0,1.9,76.0,,,,,8.289474,,,,,0.118421,0.0,2.25,90.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
3,0,1,47,1,,0.0,460,47,,0,1,2,Bernd,1,0,3.2,191,12,32.0,100,12,180,50,0,0,,0,4.5,0,4,Leno,9.0,0.0,435,47,9,Leno,0,Arsenal,2,20-21,0.0,0.0,1.9,90.0,1.4,1.9,1.8,1.8,1.8,1.8,0.2,0.2,0.2,0.2,1.9,1.4,1.1,1.1,1.1,1.1,1.5,1.5,1.5,1.5,1.0,,,,,,,,,,,,,,,,2.0,,,,,0.0,0.0,1.9,90.0,,,,,2.0,,,,,0.0,0.0,1.9,90.0,,,,,2.0,,,,,0.0,0.0,1.9,90.0,,,,,2.0,,,,,0.0,0.0,1.9,90.0,,,,,2.0,,,,,0.0,0.0,1.9,90.0,,,,,2.0,,,,,0.0,0.0,1.9,90.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
4,0,0,21,1,,23.9,74,49,2.0,0,3,2,Granit,1,0,6.8,88,44,23.2,133,47,167,55,0,0,,0,2.5,0,0,Xhaka,0.8,21.0,92,49,5,Xhaka,0,Arsenal,2,20-21,0.0,0.0,1.9,90.0,1.4,1.9,1.8,1.8,1.8,1.8,0.2,0.2,0.2,0.2,1.9,1.4,1.1,1.1,1.1,1.1,1.5,1.5,1.5,1.5,1.0,,,,,,,,,,,,,,,,2.0,,,,,0.0,0.0,1.9,90.0,,,,,2.0,,,,,0.0,0.0,1.9,90.0,,,,,2.0,,,,,0.0,0.0,1.9,90.0,,,,,2.0,,,,,0.0,0.0,1.9,90.0,,,,,2.0,,,,,0.0,0.0,1.9,90.0,,,,,2.0,,,,,0.0,0.0,1.9,90.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


(21525, 209)

In [10]:
# rolling team data from past seasons
filepath = Path('../data/modeling/team_data.csv')
team_data = pd.read_csv(filepath, index_col=0)
display(team_data.head())
display(team_data.tail())
display(team_data.shape)

Unnamed: 0,xg_home,xg_away,team_h_score,team_a_score,event,kickoff_time,id,variable,value,season,xG,xGA,xG_ewm_5,xGA_ewm_5,xG_ewm_10,xGA_ewm_10,xG_ewm_20,xGA_ewm_20,xG_ewm_40,xGA_ewm_40
0,0.2,1.8,0,3,1,2020-09-12 11:30:00+00:00,2,home_team,Fulham,20-21,0.2,1.8,,,,,,,,
1,0.2,1.8,0,3,1,2020-09-12 11:30:00+00:00,2,away_team,Arsenal,20-21,1.8,0.2,,,,,,,,
2,0.7,0.8,1,0,1,2020-09-12 14:00:00+00:00,1,home_team,Crystal Palace,20-21,0.7,0.8,,,,,,,,
3,0.7,0.8,1,0,1,2020-09-12 14:00:00+00:00,1,away_team,Southampton,20-21,0.8,0.7,,,,,,,,
4,3.3,0.6,4,3,1,2020-09-12 16:30:00+00:00,3,home_team,Liverpool,20-21,3.3,0.6,,,,,,,,


Unnamed: 0,xg_home,xg_away,team_h_score,team_a_score,event,kickoff_time,id,variable,value,season,xG,xGA,xG_ewm_5,xGA_ewm_5,xG_ewm_10,xGA_ewm_10,xG_ewm_20,xGA_ewm_20,xG_ewm_40,xGA_ewm_40
2275,1.3,1.6,1,0,38,2023-05-28 15:30:00+00:00,373,home_team,Brentford,22-23,1.3,1.6,1.654129,1.315968,1.59556,1.325587,1.524553,1.323285,1.459707,1.318193
2276,2.8,1.4,2,1,38,2023-05-28 15:30:00+00:00,372,home_team,Aston Villa,22-23,2.8,1.4,1.281753,1.06216,1.312362,1.222565,1.29623,1.31772,1.272177,1.346428
2277,2.8,0.5,5,0,38,2023-05-28 15:30:00+00:00,371,home_team,Arsenal,22-23,2.8,0.5,1.39976,1.341984,1.672355,1.320939,1.76582,1.234457,1.738285,1.186636
2278,1.8,4.0,4,4,38,2023-05-28 15:30:00+00:00,380,home_team,Southampton,22-23,1.8,4.0,1.012371,2.062474,0.997118,1.867827,1.008089,1.700506,1.049643,1.602657
2279,1.8,4.0,4,4,38,2023-05-28 15:30:00+00:00,380,away_team,Liverpool,22-23,4.0,1.8,1.837224,1.026413,1.90141,1.166376,1.929213,1.204443,1.979823,1.175962


(2280, 20)

In [11]:
# fpl fixtures data from this season
filepath = Path('../data/fixtures/fpl_fixtures.csv')
fixtures_fpl = pd.read_csv(filepath, index_col=0)
fixtures_fpl = fixtures_fpl[fixtures_fpl.finished]
display(fixtures_fpl.head())
display(fixtures_fpl.shape)

Unnamed: 0,code,event,finished,finished_provisional,id,kickoff_time,minutes,provisional_start_time,started,team_a,team_a_score,team_h,team_h_score,stats,team_h_difficulty,team_a_difficulty,pulse_id,home_team,away_team
2,2367538,1.0,True,True,1,2023-08-11T19:00:00Z,90,False,True,13,3.0,6,0.0,"[{'identifier': 'goals_scored', 'a': [{'value'...",5,2,93321,Burnley,Manchester City
3,2367540,1.0,True,True,2,2023-08-12T12:00:00Z,90,False,True,16,1.0,1,2.0,"[{'identifier': 'goals_scored', 'a': [{'value'...",2,4,93322,Arsenal,Nottingham Forest
4,2367539,1.0,True,True,3,2023-08-12T14:00:00Z,90,False,True,19,1.0,3,1.0,"[{'identifier': 'goals_scored', 'a': [{'value'...",2,2,93323,Bournemouth,West Ham
5,2367541,1.0,True,True,4,2023-08-12T14:00:00Z,90,False,True,12,1.0,5,4.0,"[{'identifier': 'goals_scored', 'a': [{'value'...",2,3,93324,Brighton,Luton
6,2367542,1.0,True,True,5,2023-08-12T14:00:00Z,90,False,True,10,1.0,9,0.0,"[{'identifier': 'goals_scored', 'a': [{'value'...",2,2,93325,Everton,Fulham


(10, 19)

In [12]:
# fbref fixtures data from this season
filepath = Path('../data/fixtures/fbref_fixtures.csv')
fixtures_fbref = pd.read_csv(filepath, index_col=0)
display(fixtures_fbref.head())
display(fixtures_fbref.shape)

Unnamed: 0,Wk,Day,Date,Time,Home,xG_home,Score,xG_away,Away,Attendance,Venue,Referee,Match Report,Notes
0,1.0,Fri,2023-08-11,20:00,Burnley,0.3,0–3,1.9,Manchester City,21572.0,Turf Moor,Craig Pawson,Match Report,
1,1.0,Sat,2023-08-12,12:30,Arsenal,0.8,2–1,1.2,Nott'ham Forest,59984.0,Emirates Stadium,Michael Oliver,Match Report,
2,1.0,Sat,2023-08-12,15:00,Everton,2.7,0–1,1.5,Fulham,39940.0,Goodison Park,Stuart Attwell,Match Report,
3,1.0,Sat,2023-08-12,15:00,Sheffield Utd,0.5,0–1,1.9,Crystal Palace,31194.0,Bramall Lane,John Brooks,Match Report,
4,1.0,Sat,2023-08-12,15:00,Brighton,4.0,4–1,1.5,Luton Town,31872.0,The American Express Community Stadium,David Coote,Match Report,


(16, 14)

# Process FPL data

In [13]:
# find how many minutes a player played on a given gameweek
fpl_df['gameweek_minutes'] = fpl_df.groupby(['first_name', 'second_name', 'season'])['minutes'].diff()
# fill na caused at the start of each season by taking diff (but don't fill for season 22-23 where early season data is missing)
fpl_df['gameweek_minutes'] = fpl_df.apply(lambda x: my_fill_na(x, 'gameweek_minutes', 'minutes'), axis=1)
print('Number of rows with zero minutes played in a gameweek:')
display(fpl_df[fpl_df.gameweek_minutes==0].shape[0])
print('Number of rows with over 90 minutes played in a gameweek:')
display(fpl_df[fpl_df.gameweek_minutes>90].shape[0])

Number of rows with zero minutes played in a gameweek:


199

Number of rows with over 90 minutes played in a gameweek:


1595

In [14]:
# check does the latest season have any problem data (ok if '23-24' does not appear here)
display(fpl_df.loc[fpl_df.gameweek_minutes>90, 'season'].unique())
display(fpl_df.loc[fpl_df.gameweek_minutes==0, 'season'].unique())

array(['20-21', '21-22', '22-23'], dtype=object)

array(['20-21', '21-22', '22-23'], dtype=object)

In [15]:
# drop rows with 0 minutes or more than 90 minutes
fpl_df = fpl_df[(fpl_df.gameweek_minutes>0) & (fpl_df.gameweek_minutes<=90)].reset_index(drop=True)
display(fpl_df.head())
display(fpl_df.shape)

Unnamed: 0,assists,bonus,bps,clean_sheets,corners_and_indirect_freekicks_order,creativity,creativity_rank,creativity_rank_type,direct_freekicks_order,dreamteam_count,element_type,event_points,first_name,goals_conceded,goals_scored,ict_index,ict_index_rank,ict_index_rank_type,influence,influence_rank,influence_rank_type,minutes,now_cost,own_goals,penalties_missed,penalties_order,penalties_saved,points_per_game,red_cards,saves,second_name,selected_by_percent,threat,threat_rank,threat_rank_type,total_points,web_name,yellow_cards,team_name,gameweek,season,gameweek_xG,gameweek_xA,gameweek_xGA,gameweek_minutes,team_xG,team_xGA,team_xG_ewm_5,team_xG_ewm_10,team_xG_ewm_20,team_xG_ewm_40,team_xGA_ewm_5,team_xGA_ewm_10,team_xGA_ewm_20,team_xGA_ewm_40,opponent_xG,opponent_xGA,opponent_xG_ewm_5,opponent_xG_ewm_10,opponent_xG_ewm_20,opponent_xG_ewm_40,opponent_xGA_ewm_5,opponent_xGA_ewm_10,opponent_xGA_ewm_20,opponent_xGA_ewm_40,home,gameweek_assists,gameweek_bps,gameweek_creativity,gameweek_goals_scored,gameweek_goals_conceded,gameweek_own_goals,gameweek_penalties_saved,gameweek_red_cards,gameweek_saves,gameweek_threat,gameweek_yellow_cards,gameweek_xPoints,gameweek_assists_ewm_5,gameweek_bps_ewm_5,gameweek_creativity_ewm_5,event_points_ewm_5,gameweek_goals_scored_ewm_5,gameweek_goals_conceded_ewm_5,gameweek_saves_ewm_5,gameweek_threat_ewm_5,gameweek_xG_ewm_5,gameweek_xA_ewm_5,gameweek_xGA_ewm_5,gameweek_minutes_ewm_5,gameweek_xPoints_ewm_5,gameweek_assists_ewm_10,gameweek_bps_ewm_10,gameweek_creativity_ewm_10,event_points_ewm_10,gameweek_goals_scored_ewm_10,gameweek_goals_conceded_ewm_10,gameweek_saves_ewm_10,gameweek_threat_ewm_10,gameweek_xG_ewm_10,gameweek_xA_ewm_10,gameweek_xGA_ewm_10,gameweek_minutes_ewm_10,gameweek_xPoints_ewm_10,gameweek_assists_ewm_20,gameweek_bps_ewm_20,gameweek_creativity_ewm_20,event_points_ewm_20,gameweek_goals_scored_ewm_20,gameweek_goals_conceded_ewm_20,gameweek_saves_ewm_20,gameweek_threat_ewm_20,gameweek_xG_ewm_20,gameweek_xA_ewm_20,gameweek_xGA_ewm_20,gameweek_minutes_ewm_20,gameweek_xPoints_ewm_20,gameweek_assists_ewm_40,gameweek_bps_ewm_40,gameweek_creativity_ewm_40,event_points_ewm_40,gameweek_goals_scored_ewm_40,gameweek_goals_conceded_ewm_40,gameweek_saves_ewm_40,gameweek_threat_ewm_40,gameweek_xG_ewm_40,gameweek_xA_ewm_40,gameweek_xGA_ewm_40,gameweek_minutes_ewm_40,gameweek_xPoints_ewm_40,gameweek_assists_expanding,gameweek_bps_expanding,gameweek_creativity_expanding,event_points_expanding,gameweek_goals_scored_expanding,gameweek_goals_conceded_expanding,gameweek_saves_expanding,gameweek_threat_expanding,gameweek_xG_expanding,gameweek_xA_expanding,gameweek_xGA_expanding,gameweek_minutes_expanding,gameweek_xPoints_expanding,gameweek_assists_expanding_per90,gameweek_bps_expanding_per90,gameweek_creativity_expanding_per90,event_points_expanding_per90,gameweek_goals_scored_expanding_per90,gameweek_goals_conceded_expanding_per90,gameweek_saves_expanding_per90,gameweek_threat_expanding_per90,gameweek_xG_expanding_per90,gameweek_xA_expanding_per90,gameweek_xGA_expanding_per90,gameweek_minutes_expanding_per90,gameweek_xPoints_expanding_per90,xG_overperformance,chance_of_playing_next_round,chance_of_playing_this_round,code,cost_change_event,cost_change_event_fall,cost_change_start,cost_change_start_fall,ep_next,ep_this,form,id,in_dreamteam,news,news_added,photo,special,squad_number,status,team,team_code,transfers_in,transfers_in_event,transfers_out,transfers_out_event,value_form,value_season,starts,expected_goals,expected_assists,expected_goal_involvements,expected_goals_conceded,corners_and_indirect_freekicks_text,direct_freekicks_text,penalties_text,expected_goals_per_90,saves_per_90,expected_assists_per_90,expected_goal_involvements_per_90,expected_goals_conceded_per_90,goals_conceded_per_90,now_cost_rank,now_cost_rank_type,form_rank,form_rank_type,points_per_game_rank,points_per_game_rank_type,selected_rank,selected_rank_type,starts_per_90,clean_sheets_per_90,name,data_retrieved_datetime
0,0,0,3,0,,0.0,493,188,4.0,0,2,1,David,0,0,0.0,497,188,0.0,490,188,1,55,0,0,,0,1.0,0,0,Luiz Moreira Marinho,0.9,0.0,479,186,1,David Luiz,0,Arsenal,2,20-21,0.0,0.0,1.9,1.0,1.4,1.9,1.8,1.8,1.8,1.8,0.2,0.2,0.2,0.2,1.9,1.4,1.1,1.1,1.1,1.1,1.5,1.5,1.5,1.5,1.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.993836,0.0,3.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.9,1.0,0.993836,0.0,3.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.9,1.0,0.993836,0.0,3.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.9,1.0,0.993836,0.0,3.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.9,1.0,0.993836,0.0,3.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.9,1.0,0.993836,0.0,270.0,0.0,90.0,0.0,0.0,0.0,0.0,0.0,0.0,171.0,90.0,89.445204,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1,0,0,9,0,,5.4,176,43,,0,2,2,Sead,1,0,1.3,259,91,3.6,252,92,90,49,0,0,,0,2.0,0,0,Kolasinac,0.3,4.0,172,48,2,Kolasinac,0,Arsenal,2,20-21,0.0,0.0,1.9,90.0,1.4,1.9,1.8,1.8,1.8,1.8,0.2,0.2,0.2,0.2,1.9,1.4,1.1,1.1,1.1,1.1,1.5,1.5,1.5,1.5,1.0,0.0,9.0,5.4,0.0,1.0,0.0,0.0,0.0,0.0,4.0,0.0,2.033159,0.0,9.0,5.4,2.0,0.0,1.0,0.0,4.0,0.0,0.0,1.9,90.0,2.033159,0.0,9.0,5.4,2.0,0.0,1.0,0.0,4.0,0.0,0.0,1.9,90.0,2.033159,0.0,9.0,5.4,2.0,0.0,1.0,0.0,4.0,0.0,0.0,1.9,90.0,2.033159,0.0,9.0,5.4,2.0,0.0,1.0,0.0,4.0,0.0,0.0,1.9,90.0,2.033159,0.0,9.0,5.4,2.0,0.0,1.0,0.0,4.0,0.0,0.0,1.9,90.0,2.033159,0.0,9.0,5.4,2.0,0.0,1.0,0.0,4.0,0.0,0.0,1.9,90.0,2.033159,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
2,0,0,6,0,1.0,17.6,97,64,3.0,0,3,1,Nicolas,0,0,3.4,184,86,3.0,257,115,41,78,0,0,2.0,0,1.0,0,0,Pépé,1.1,14.0,123,69,2,Pépé,0,Arsenal,2,20-21,0.1,0.0,1.9,41.0,1.4,1.9,1.8,1.8,1.8,1.8,0.2,0.2,0.2,0.2,1.9,1.4,1.1,1.1,1.1,1.1,1.5,1.5,1.5,1.5,1.0,0.0,6.0,17.6,0.0,0.0,0.0,0.0,0.0,0.0,14.0,0.0,1.500366,0.0,6.0,17.6,1.0,0.0,0.0,0.0,14.0,0.1,0.0,1.9,26.0,1.500366,0.0,6.0,17.6,1.0,0.0,0.0,0.0,14.0,0.1,0.0,1.9,26.0,1.500366,0.0,6.0,17.6,1.0,0.0,0.0,0.0,14.0,0.1,0.0,1.9,26.0,1.500366,0.0,6.0,17.6,1.0,0.0,0.0,0.0,14.0,0.1,0.0,1.9,26.0,1.500366,0.0,6.0,17.6,1.0,0.0,0.0,0.0,14.0,0.1,0.0,1.9,26.0,1.500366,0.0,20.769231,60.923077,3.461538,0.0,0.0,0.0,48.461538,0.346154,0.0,6.576923,90.0,5.193575,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
3,0,3,34,0,,0.1,276,37,,0,4,8,Edward,0,1,5.5,118,23,34.0,95,16,16,59,0,0,,0,4.5,0,0,Nketiah,1.2,21.0,93,27,9,Nketiah,0,Arsenal,2,20-21,0.9,0.0,1.9,16.0,1.4,1.9,1.8,1.8,1.8,1.8,0.2,0.2,0.2,0.2,1.9,1.4,1.1,1.1,1.1,1.1,1.5,1.5,1.5,1.5,1.0,0.0,34.0,0.1,1.0,0.0,0.0,0.0,0.0,0.0,21.0,0.0,6.641532,0.0,34.0,0.1,8.0,1.0,0.0,0.0,21.0,0.9,0.0,1.9,13.0,6.641532,0.0,34.0,0.1,8.0,1.0,0.0,0.0,21.0,0.9,0.0,1.9,13.0,6.641532,0.0,34.0,0.1,8.0,1.0,0.0,0.0,21.0,0.9,0.0,1.9,13.0,6.641532,0.0,34.0,0.1,8.0,1.0,0.0,0.0,21.0,0.9,0.0,1.9,13.0,6.641532,0.0,34.0,0.1,8.0,1.0,0.0,0.0,21.0,0.9,0.0,1.9,13.0,6.641532,0.0,235.384615,0.692308,55.384615,6.923077,0.0,0.0,145.384615,6.230769,0.0,13.153846,90.0,45.979837,1.111111,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
4,0,0,8,0,3.0,20.3,89,58,,0,3,2,Bukayo,1,0,5.8,110,58,7.4,222,88,88,54,0,0,,0,2.0,0,0,Saka,4.4,30.0,64,27,2,Saka,0,Arsenal,2,20-21,0.1,0.0,1.9,88.0,1.4,1.9,1.8,1.8,1.8,1.8,0.2,0.2,0.2,0.2,1.9,1.4,1.1,1.1,1.1,1.1,1.5,1.5,1.5,1.5,1.0,0.0,8.0,20.3,0.0,1.0,0.0,0.0,0.0,0.0,30.0,0.0,2.650318,0.0,8.0,20.3,2.0,0.0,1.0,0.0,30.0,0.1,0.0,1.9,88.0,2.650318,0.0,8.0,20.3,2.0,0.0,1.0,0.0,30.0,0.1,0.0,1.9,88.0,2.650318,0.0,8.0,20.3,2.0,0.0,1.0,0.0,30.0,0.1,0.0,1.9,88.0,2.650318,0.0,8.0,20.3,2.0,0.0,1.0,0.0,30.0,0.1,0.0,1.9,88.0,2.650318,0.0,8.0,20.3,2.0,0.0,1.0,0.0,30.0,0.1,0.0,1.9,88.0,2.650318,0.0,8.181818,20.761364,2.045455,0.0,1.022727,0.0,30.681818,0.102273,0.0,1.943182,90.0,2.710552,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


(19141, 209)

### Add xG data to FPL fixtures data

In [16]:
np.sort(pd.concat([fixtures_fpl.home_team, fixtures_fpl.away_team]).unique())

array(['Arsenal', 'Aston Villa', 'Bournemouth', 'Brentford', 'Brighton',
       'Burnley', 'Chelsea', 'Crystal Palace', 'Everton', 'Fulham',
       'Liverpool', 'Luton', 'Manchester City', 'Manchester Utd',
       'Newcastle Utd', 'Nottingham Forest', 'Sheffield Utd', 'Tottenham',
       'West Ham', 'Wolves'], dtype=object)

In [17]:
# map fbref team names to fpl team names
fbref_teams = np.sort(pd.concat([fixtures_fbref.Home, fixtures_fbref.Away]).unique())
fpl_teams = np.sort(pd.concat([fixtures_fpl.home_team, fixtures_fpl.away_team]).unique())
team_name_dict = dict(zip(fbref_teams, fpl_teams))
display(team_name_dict)

fixtures_fbref['Home'] = fixtures_fbref['Home'].apply(lambda x: team_name_dict[x])
fixtures_fbref['Away'] = fixtures_fbref['Away'].apply(lambda x: team_name_dict[x])
display(fixtures_fbref.head())

{'Arsenal': 'Arsenal',
 'Aston Villa': 'Aston Villa',
 'Bournemouth': 'Bournemouth',
 'Brentford': 'Brentford',
 'Brighton': 'Brighton',
 'Burnley': 'Burnley',
 'Chelsea': 'Chelsea',
 'Crystal Palace': 'Crystal Palace',
 'Everton': 'Everton',
 'Fulham': 'Fulham',
 'Liverpool': 'Liverpool',
 'Luton Town': 'Luton',
 'Manchester City': 'Manchester City',
 'Manchester Utd': 'Manchester Utd',
 'Newcastle Utd': 'Newcastle Utd',
 "Nott'ham Forest": 'Nottingham Forest',
 'Sheffield Utd': 'Sheffield Utd',
 'Tottenham': 'Tottenham',
 'West Ham': 'West Ham',
 'Wolves': 'Wolves'}

Unnamed: 0,Wk,Day,Date,Time,Home,xG_home,Score,xG_away,Away,Attendance,Venue,Referee,Match Report,Notes
0,1.0,Fri,2023-08-11,20:00,Burnley,0.3,0–3,1.9,Manchester City,21572.0,Turf Moor,Craig Pawson,Match Report,
1,1.0,Sat,2023-08-12,12:30,Arsenal,0.8,2–1,1.2,Nottingham Forest,59984.0,Emirates Stadium,Michael Oliver,Match Report,
2,1.0,Sat,2023-08-12,15:00,Everton,2.7,0–1,1.5,Fulham,39940.0,Goodison Park,Stuart Attwell,Match Report,
3,1.0,Sat,2023-08-12,15:00,Sheffield Utd,0.5,0–1,1.9,Crystal Palace,31194.0,Bramall Lane,John Brooks,Match Report,
4,1.0,Sat,2023-08-12,15:00,Brighton,4.0,4–1,1.5,Luton,31872.0,The American Express Community Stadium,David Coote,Match Report,


In [18]:
home_xg = []
away_xg = []
for ix, row in fixtures_fpl.iterrows():
    home_team = row.home_team
    away_team = row.away_team
    home_team_xg = fixtures_fbref.loc[(fixtures_fbref['Home']==home_team) & (fixtures_fbref['Away']==away_team), 'xG_home'].values[0]
    away_team_xg = fixtures_fbref.loc[(fixtures_fbref['Home']==home_team) & (fixtures_fbref['Away']==away_team), 'xG_away'].values[0]
    home_xg.append( home_team_xg )
    away_xg.append( away_team_xg )

fixtures_fpl['xg_home'] = home_xg
fixtures_fpl['xg_away'] = away_xg

display(fixtures_fpl.head())
print('Nulls:')
display(fixtures_fpl[['xg_home', 'xg_away']].isnull().sum())

Unnamed: 0,code,event,finished,finished_provisional,id,kickoff_time,minutes,provisional_start_time,started,team_a,team_a_score,team_h,team_h_score,stats,team_h_difficulty,team_a_difficulty,pulse_id,home_team,away_team,xg_home,xg_away
2,2367538,1.0,True,True,1,2023-08-11T19:00:00Z,90,False,True,13,3.0,6,0.0,"[{'identifier': 'goals_scored', 'a': [{'value'...",5,2,93321,Burnley,Manchester City,0.3,1.9
3,2367540,1.0,True,True,2,2023-08-12T12:00:00Z,90,False,True,16,1.0,1,2.0,"[{'identifier': 'goals_scored', 'a': [{'value'...",2,4,93322,Arsenal,Nottingham Forest,0.8,1.2
4,2367539,1.0,True,True,3,2023-08-12T14:00:00Z,90,False,True,19,1.0,3,1.0,"[{'identifier': 'goals_scored', 'a': [{'value'...",2,2,93323,Bournemouth,West Ham,1.3,1.1
5,2367541,1.0,True,True,4,2023-08-12T14:00:00Z,90,False,True,12,1.0,5,4.0,"[{'identifier': 'goals_scored', 'a': [{'value'...",2,3,93324,Brighton,Luton,4.0,1.5
6,2367542,1.0,True,True,5,2023-08-12T14:00:00Z,90,False,True,10,1.0,9,0.0,"[{'identifier': 'goals_scored', 'a': [{'value'...",2,2,93325,Everton,Fulham,2.7,1.5


Nulls:


xg_home    0
xg_away    0
dtype: int64

### Calculate exponentially weighted moving averages for each teams' xG data

In [19]:
fixtures_melt = fixtures_fpl.melt(id_vars=['xg_home', 'xg_away', 'team_h_score', 'team_a_score', 'event', 'kickoff_time', 'id'], value_vars=['home_team', 'away_team'])
fixtures_melt['season'] = '23-24'
display(fixtures_melt)

Unnamed: 0,xg_home,xg_away,team_h_score,team_a_score,event,kickoff_time,id,variable,value,season
0,0.3,1.9,0.0,3.0,1.0,2023-08-11T19:00:00Z,1,home_team,Burnley,23-24
1,0.8,1.2,2.0,1.0,1.0,2023-08-12T12:00:00Z,2,home_team,Arsenal,23-24
2,1.3,1.1,1.0,1.0,1.0,2023-08-12T14:00:00Z,3,home_team,Bournemouth,23-24
3,4.0,1.5,4.0,1.0,1.0,2023-08-12T14:00:00Z,4,home_team,Brighton,23-24
4,2.7,1.5,0.0,1.0,1.0,2023-08-12T14:00:00Z,5,home_team,Everton,23-24
5,0.5,1.9,0.0,1.0,1.0,2023-08-12T14:00:00Z,7,home_team,Sheffield Utd,23-24
6,3.3,1.8,5.0,1.0,1.0,2023-08-12T16:30:00Z,6,home_team,Newcastle Utd,23-24
7,2.2,1.3,2.0,2.0,1.0,2023-08-13T13:00:00Z,8,home_team,Brentford,23-24
8,1.4,1.3,1.0,1.0,1.0,2023-08-13T15:30:00Z,9,home_team,Chelsea,23-24
9,2.2,2.2,1.0,0.0,1.0,2023-08-14T19:00:00Z,10,home_team,Manchester Utd,23-24


In [20]:
# concatenate fixtures_melt with team data (previous seasons)
fixtures_melt = pd.concat([team_data, fixtures_melt], ignore_index=True)
display(fixtures_melt)

Unnamed: 0,xg_home,xg_away,team_h_score,team_a_score,event,kickoff_time,id,variable,value,season,xG,xGA,xG_ewm_5,xGA_ewm_5,xG_ewm_10,xGA_ewm_10,xG_ewm_20,xGA_ewm_20,xG_ewm_40,xGA_ewm_40
0,0.2,1.8,0.0,3.0,1.0,2020-09-12 11:30:00+00:00,2,home_team,Fulham,20-21,0.2,1.8,,,,,,,,
1,0.2,1.8,0.0,3.0,1.0,2020-09-12 11:30:00+00:00,2,away_team,Arsenal,20-21,1.8,0.2,,,,,,,,
2,0.7,0.8,1.0,0.0,1.0,2020-09-12 14:00:00+00:00,1,home_team,Crystal Palace,20-21,0.7,0.8,,,,,,,,
3,0.7,0.8,1.0,0.0,1.0,2020-09-12 14:00:00+00:00,1,away_team,Southampton,20-21,0.8,0.7,,,,,,,,
4,3.3,0.6,4.0,3.0,1.0,2020-09-12 16:30:00+00:00,3,home_team,Liverpool,20-21,3.3,0.6,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2295,0.5,1.9,0.0,1.0,1.0,2023-08-12T14:00:00Z,7,away_team,Crystal Palace,23-24,,,,,,,,,,
2296,3.3,1.8,5.0,1.0,1.0,2023-08-12T16:30:00Z,6,away_team,Aston Villa,23-24,,,,,,,,,,
2297,2.2,1.3,2.0,2.0,1.0,2023-08-13T13:00:00Z,8,away_team,Tottenham,23-24,,,,,,,,,,
2298,1.4,1.3,1.0,1.0,1.0,2023-08-13T15:30:00Z,9,away_team,Liverpool,23-24,,,,,,,,,,


In [21]:
# get team's xG (home xG if at home, away xG if at an away game)
fixtures_melt['xG'] = fixtures_melt.apply(lambda x: x['xg_home'] if x['variable']=='home_team' else x['xg_away'], axis=1)
fixtures_melt['xGA'] = fixtures_melt.apply(lambda x: x['xg_away'] if x['variable']=='home_team' else x['xg_home'], axis=1)

# sort by date
fixtures_melt = fixtures_melt.sort_values(by='kickoff_time').reset_index(drop=True)

# calculate rolling averages
rolling_windows = [5,10,20,40]

for i in rolling_windows:
    fixtures_melt[f'xG_ewm_{i}'] = (fixtures_melt[['value','xG']].groupby(by='value').ewm(alpha=1/i).mean()
                                    .reset_index().sort_values(by='level_1')['xG'].values)
    fixtures_melt[f'xGA_ewm_{i}'] = (fixtures_melt[['value','xGA']].groupby(by='value').ewm(alpha=1/i).mean()
                                    .reset_index().sort_values(by='level_1')['xGA'].values)
    
display(fixtures_melt)

Unnamed: 0,xg_home,xg_away,team_h_score,team_a_score,event,kickoff_time,id,variable,value,season,xG,xGA,xG_ewm_5,xGA_ewm_5,xG_ewm_10,xGA_ewm_10,xG_ewm_20,xGA_ewm_20,xG_ewm_40,xGA_ewm_40
0,0.2,1.8,0.0,3.0,1.0,2020-09-12 11:30:00+00:00,2,home_team,Fulham,20-21,0.2,1.8,0.200000,1.800000,0.200000,1.800000,0.200000,1.800000,0.200000,1.800000
1,0.2,1.8,0.0,3.0,1.0,2020-09-12 11:30:00+00:00,2,away_team,Arsenal,20-21,1.8,0.2,1.800000,0.200000,1.800000,0.200000,1.800000,0.200000,1.800000,0.200000
2,0.7,0.8,1.0,0.0,1.0,2020-09-12 14:00:00+00:00,1,home_team,Crystal Palace,20-21,0.7,0.8,0.700000,0.800000,0.700000,0.800000,0.700000,0.800000,0.700000,0.800000
3,0.7,0.8,1.0,0.0,1.0,2020-09-12 14:00:00+00:00,1,away_team,Southampton,20-21,0.8,0.7,0.800000,0.700000,0.800000,0.700000,0.800000,0.700000,0.800000,0.700000
4,3.3,0.6,4.0,3.0,1.0,2020-09-12 16:30:00+00:00,3,home_team,Liverpool,20-21,3.3,0.6,3.300000,0.600000,3.300000,0.600000,3.300000,0.600000,3.300000,0.600000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2295,2.2,1.3,2.0,2.0,1.0,2023-08-13T13:00:00Z,8,away_team,Tottenham,23-24,1.3,2.2,1.622074,1.621318,1.573028,1.519011,1.563669,1.379550,1.573434,1.283144
2296,1.4,1.3,1.0,1.0,1.0,2023-08-13T15:30:00Z,9,away_team,Liverpool,23-24,1.3,1.4,2.075823,1.224904,2.030143,1.246765,1.996299,1.242614,2.013924,1.197971
2297,1.4,1.3,1.0,1.0,1.0,2023-08-13T15:30:00Z,9,home_team,Chelsea,23-24,1.4,1.3,1.510291,1.863070,1.436814,1.623333,1.427765,1.415216,1.481851,1.253515
2298,2.2,2.2,1.0,0.0,1.0,2023-08-14T19:00:00Z,10,home_team,Manchester Utd,23-24,2.2,2.2,2.529005,1.598337,2.222463,1.497346,1.939903,1.420614,1.754520,1.367280


In [22]:
# shift team xg data by one so that the target game result is not included
cols_to_shift = [col for col in fixtures_melt if 'ewm' in col]
fixtures_melt[cols_to_shift] = fixtures_melt.groupby('value')[cols_to_shift].shift(1)

In [23]:
# check what a given team's stats look like
fixtures_melt[fixtures_melt.value=='Fulham']

Unnamed: 0,xg_home,xg_away,team_h_score,team_a_score,event,kickoff_time,id,variable,value,season,xG,xGA,xG_ewm_5,xGA_ewm_5,xG_ewm_10,xGA_ewm_10,xG_ewm_20,xGA_ewm_20,xG_ewm_40,xGA_ewm_40
0,0.2,1.8,0.0,3.0,1.0,2020-09-12 11:30:00+00:00,2,home_team,Fulham,20-21,0.2,1.8,,,,,,,,
18,1.7,1.8,4.0,3.0,2.0,2020-09-19 14:00:00+00:00,13,away_team,Fulham,20-21,1.8,1.7,0.2,1.8,0.2,1.8,0.2,1.8,0.2,1.8
53,0.7,1.6,0.0,3.0,3.0,2020-09-28 16:45:00+00:00,22,home_team,Fulham,20-21,0.7,1.6,1.088889,1.744444,1.042105,1.747368,1.020513,1.748718,1.010127,1.749367
69,1.2,0.7,1.0,0.0,4.0,2020-10-04 13:00:00+00:00,38,away_team,Fulham,20-21,0.7,1.2,0.929508,1.685246,0.915867,1.692989,0.908151,1.696582,0.904123,1.698312
84,1.5,1.7,1.0,1.0,5.0,2020-10-18 11:00:00+00:00,46,away_team,Fulham,20-21,1.7,1.5,0.851762,1.520867,0.853097,1.549637,0.852044,1.562728,0.851138,1.568964
100,1.1,2.6,1.0,2.0,6.0,2020-10-24 14:00:00+00:00,53,home_team,Fulham,20-21,1.1,2.6,1.104093,1.51466,1.059906,1.537516,1.039463,1.548863,1.029614,1.554464
132,1.5,0.5,2.0,0.0,7.0,2020-11-02 17:30:00+00:00,61,home_team,Fulham,20-21,1.5,0.5,1.102984,1.808847,1.068463,1.764271,1.050889,1.74726,1.0421,1.739933
147,1.3,1.6,1.0,0.0,8.0,2020-11-07 20:00:00+00:00,78,away_team,Fulham,20-21,1.6,1.3,1.203458,1.477613,1.15118,1.521936,1.125328,1.540529,1.112586,1.549066
165,2.0,1.6,2.0,3.0,9.0,2020-11-22 12:00:00+00:00,81,home_team,Fulham,20-21,2.0,1.6,1.298754,1.434929,1.229985,1.482968,1.195842,1.504798,1.179046,1.515105
192,0.9,1.8,1.0,2.0,10.0,2020-11-30 17:30:00+00:00,94,away_team,Fulham,20-21,1.8,0.9,1.460746,1.473061,1.355685,1.502073,1.304586,1.517672,1.279769,1.525521


In [24]:
# save fixtures_melt
filepath = Path('../data/team_data.csv')
fixtures_melt.to_csv(filepath)

### Add team data to FPL data

In [25]:
# columns to be fetched from team data
col_names = ['xG', 'xGA']
col_names += [f'xG_ewm_{i}' for i in rolling_windows]
col_names += [f'xGA_ewm_{i}' for i in rolling_windows]
nr_cols = len(col_names) 
team_data = []
opponent_data = []
home_indicator = []
count_non_one_games = 0
for ix, row in fpl_df[fpl_df.season=='23-24'].iterrows():
    gameweek = row.gameweek
    team = row.team_name
    season = row.season
    games = fixtures_melt[(fixtures_melt.value==team) & (fixtures_melt.event==gameweek) & (fixtures_melt.season==season)]
    if games.shape[0]!=1:
        team_data.append( np.array([np.nan]*nr_cols) )
        opponent_data.append( np.array([np.nan]*nr_cols) )
        home_indicator.append( np.array([np.nan]) )
        count_non_one_games += 1
    elif games.shape[0]==1:
        # add team data
        team_data.append( games[col_names].values.flatten() )
        # find opponent data
        home_game = games.variable.values[0]=='home_team'
        game_id = games.id.values[0]
        if home_game:
            home_indicator.append( np.array([1]) )
            opponent_team = fixtures_fpl.loc[(fixtures_fpl.home_team==team) & (fixtures_fpl.event==gameweek), 'away_team'].values[0]
        else:
            home_indicator.append( np.array([0]) )
            opponent_team = fixtures_fpl.loc[(fixtures_fpl.away_team==team) & (fixtures_fpl.event==gameweek), 'home_team'].values[0]
        opponent_games = fixtures_melt[(fixtures_melt.value==opponent_team) & (fixtures_melt.event==gameweek) & (fixtures_melt.season==season) & (fixtures_melt.id==game_id)]
        # add opponent data
        opponent_data.append( opponent_games[col_names].values.flatten() )
    else:
        print(f'Check number of games for ix {ix}!')
     

new_col_names = ['team_'+col for col in col_names]
team_data_df = pd.DataFrame(team_data, columns=new_col_names, index=fpl_df[fpl_df.season=='23-24'].index)
new_oppo_col_names = ['opponent_'+col for col in col_names]
opponent_data_df = pd.DataFrame(opponent_data, columns=new_oppo_col_names, index=fpl_df[fpl_df.season=='23-24'].index)
home_indicator_df = pd.DataFrame(home_indicator, columns=['home'], index=fpl_df[fpl_df.season=='23-24'].index)

fpl_df.loc[fpl_df.season=='23-24', new_col_names] = team_data_df
fpl_df.loc[fpl_df.season=='23-24', new_oppo_col_names] = opponent_data_df
fpl_df.loc[fpl_df.season=='23-24', 'home'] = home_indicator_df

display(fpl_df.head())
display(fpl_df.tail())
display(fpl_df.shape)
print(f'Number of non-one-games: {count_non_one_games}')


Unnamed: 0,assists,bonus,bps,clean_sheets,corners_and_indirect_freekicks_order,creativity,creativity_rank,creativity_rank_type,direct_freekicks_order,dreamteam_count,element_type,event_points,first_name,goals_conceded,goals_scored,ict_index,ict_index_rank,ict_index_rank_type,influence,influence_rank,influence_rank_type,minutes,now_cost,own_goals,penalties_missed,penalties_order,penalties_saved,points_per_game,red_cards,saves,second_name,selected_by_percent,threat,threat_rank,threat_rank_type,total_points,web_name,yellow_cards,team_name,gameweek,season,gameweek_xG,gameweek_xA,gameweek_xGA,gameweek_minutes,team_xG,team_xGA,team_xG_ewm_5,team_xG_ewm_10,team_xG_ewm_20,team_xG_ewm_40,team_xGA_ewm_5,team_xGA_ewm_10,team_xGA_ewm_20,team_xGA_ewm_40,opponent_xG,opponent_xGA,opponent_xG_ewm_5,opponent_xG_ewm_10,opponent_xG_ewm_20,opponent_xG_ewm_40,opponent_xGA_ewm_5,opponent_xGA_ewm_10,opponent_xGA_ewm_20,opponent_xGA_ewm_40,home,gameweek_assists,gameweek_bps,gameweek_creativity,gameweek_goals_scored,gameweek_goals_conceded,gameweek_own_goals,gameweek_penalties_saved,gameweek_red_cards,gameweek_saves,gameweek_threat,gameweek_yellow_cards,gameweek_xPoints,gameweek_assists_ewm_5,gameweek_bps_ewm_5,gameweek_creativity_ewm_5,event_points_ewm_5,gameweek_goals_scored_ewm_5,gameweek_goals_conceded_ewm_5,gameweek_saves_ewm_5,gameweek_threat_ewm_5,gameweek_xG_ewm_5,gameweek_xA_ewm_5,gameweek_xGA_ewm_5,gameweek_minutes_ewm_5,gameweek_xPoints_ewm_5,gameweek_assists_ewm_10,gameweek_bps_ewm_10,gameweek_creativity_ewm_10,event_points_ewm_10,gameweek_goals_scored_ewm_10,gameweek_goals_conceded_ewm_10,gameweek_saves_ewm_10,gameweek_threat_ewm_10,gameweek_xG_ewm_10,gameweek_xA_ewm_10,gameweek_xGA_ewm_10,gameweek_minutes_ewm_10,gameweek_xPoints_ewm_10,gameweek_assists_ewm_20,gameweek_bps_ewm_20,gameweek_creativity_ewm_20,event_points_ewm_20,gameweek_goals_scored_ewm_20,gameweek_goals_conceded_ewm_20,gameweek_saves_ewm_20,gameweek_threat_ewm_20,gameweek_xG_ewm_20,gameweek_xA_ewm_20,gameweek_xGA_ewm_20,gameweek_minutes_ewm_20,gameweek_xPoints_ewm_20,gameweek_assists_ewm_40,gameweek_bps_ewm_40,gameweek_creativity_ewm_40,event_points_ewm_40,gameweek_goals_scored_ewm_40,gameweek_goals_conceded_ewm_40,gameweek_saves_ewm_40,gameweek_threat_ewm_40,gameweek_xG_ewm_40,gameweek_xA_ewm_40,gameweek_xGA_ewm_40,gameweek_minutes_ewm_40,gameweek_xPoints_ewm_40,gameweek_assists_expanding,gameweek_bps_expanding,gameweek_creativity_expanding,event_points_expanding,gameweek_goals_scored_expanding,gameweek_goals_conceded_expanding,gameweek_saves_expanding,gameweek_threat_expanding,gameweek_xG_expanding,gameweek_xA_expanding,gameweek_xGA_expanding,gameweek_minutes_expanding,gameweek_xPoints_expanding,gameweek_assists_expanding_per90,gameweek_bps_expanding_per90,gameweek_creativity_expanding_per90,event_points_expanding_per90,gameweek_goals_scored_expanding_per90,gameweek_goals_conceded_expanding_per90,gameweek_saves_expanding_per90,gameweek_threat_expanding_per90,gameweek_xG_expanding_per90,gameweek_xA_expanding_per90,gameweek_xGA_expanding_per90,gameweek_minutes_expanding_per90,gameweek_xPoints_expanding_per90,xG_overperformance,chance_of_playing_next_round,chance_of_playing_this_round,code,cost_change_event,cost_change_event_fall,cost_change_start,cost_change_start_fall,ep_next,ep_this,form,id,in_dreamteam,news,news_added,photo,special,squad_number,status,team,team_code,transfers_in,transfers_in_event,transfers_out,transfers_out_event,value_form,value_season,starts,expected_goals,expected_assists,expected_goal_involvements,expected_goals_conceded,corners_and_indirect_freekicks_text,direct_freekicks_text,penalties_text,expected_goals_per_90,saves_per_90,expected_assists_per_90,expected_goal_involvements_per_90,expected_goals_conceded_per_90,goals_conceded_per_90,now_cost_rank,now_cost_rank_type,form_rank,form_rank_type,points_per_game_rank,points_per_game_rank_type,selected_rank,selected_rank_type,starts_per_90,clean_sheets_per_90,name,data_retrieved_datetime
0,0,0,3,0,,0.0,493,188,4.0,0,2,1,David,0,0,0.0,497,188,0.0,490,188,1,55,0,0,,0,1.0,0,0,Luiz Moreira Marinho,0.9,0.0,479,186,1,David Luiz,0,Arsenal,2,20-21,0.0,0.0,1.9,1.0,1.4,1.9,1.8,1.8,1.8,1.8,0.2,0.2,0.2,0.2,1.9,1.4,1.1,1.1,1.1,1.1,1.5,1.5,1.5,1.5,1.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.993836,0.0,3.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.9,1.0,0.993836,0.0,3.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.9,1.0,0.993836,0.0,3.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.9,1.0,0.993836,0.0,3.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.9,1.0,0.993836,0.0,3.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.9,1.0,0.993836,0.0,270.0,0.0,90.0,0.0,0.0,0.0,0.0,0.0,0.0,171.0,90.0,89.445204,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1,0,0,9,0,,5.4,176,43,,0,2,2,Sead,1,0,1.3,259,91,3.6,252,92,90,49,0,0,,0,2.0,0,0,Kolasinac,0.3,4.0,172,48,2,Kolasinac,0,Arsenal,2,20-21,0.0,0.0,1.9,90.0,1.4,1.9,1.8,1.8,1.8,1.8,0.2,0.2,0.2,0.2,1.9,1.4,1.1,1.1,1.1,1.1,1.5,1.5,1.5,1.5,1.0,0.0,9.0,5.4,0.0,1.0,0.0,0.0,0.0,0.0,4.0,0.0,2.033159,0.0,9.0,5.4,2.0,0.0,1.0,0.0,4.0,0.0,0.0,1.9,90.0,2.033159,0.0,9.0,5.4,2.0,0.0,1.0,0.0,4.0,0.0,0.0,1.9,90.0,2.033159,0.0,9.0,5.4,2.0,0.0,1.0,0.0,4.0,0.0,0.0,1.9,90.0,2.033159,0.0,9.0,5.4,2.0,0.0,1.0,0.0,4.0,0.0,0.0,1.9,90.0,2.033159,0.0,9.0,5.4,2.0,0.0,1.0,0.0,4.0,0.0,0.0,1.9,90.0,2.033159,0.0,9.0,5.4,2.0,0.0,1.0,0.0,4.0,0.0,0.0,1.9,90.0,2.033159,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
2,0,0,6,0,1.0,17.6,97,64,3.0,0,3,1,Nicolas,0,0,3.4,184,86,3.0,257,115,41,78,0,0,2.0,0,1.0,0,0,Pépé,1.1,14.0,123,69,2,Pépé,0,Arsenal,2,20-21,0.1,0.0,1.9,41.0,1.4,1.9,1.8,1.8,1.8,1.8,0.2,0.2,0.2,0.2,1.9,1.4,1.1,1.1,1.1,1.1,1.5,1.5,1.5,1.5,1.0,0.0,6.0,17.6,0.0,0.0,0.0,0.0,0.0,0.0,14.0,0.0,1.500366,0.0,6.0,17.6,1.0,0.0,0.0,0.0,14.0,0.1,0.0,1.9,26.0,1.500366,0.0,6.0,17.6,1.0,0.0,0.0,0.0,14.0,0.1,0.0,1.9,26.0,1.500366,0.0,6.0,17.6,1.0,0.0,0.0,0.0,14.0,0.1,0.0,1.9,26.0,1.500366,0.0,6.0,17.6,1.0,0.0,0.0,0.0,14.0,0.1,0.0,1.9,26.0,1.500366,0.0,6.0,17.6,1.0,0.0,0.0,0.0,14.0,0.1,0.0,1.9,26.0,1.500366,0.0,20.769231,60.923077,3.461538,0.0,0.0,0.0,48.461538,0.346154,0.0,6.576923,90.0,5.193575,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
3,0,3,34,0,,0.1,276,37,,0,4,8,Edward,0,1,5.5,118,23,34.0,95,16,16,59,0,0,,0,4.5,0,0,Nketiah,1.2,21.0,93,27,9,Nketiah,0,Arsenal,2,20-21,0.9,0.0,1.9,16.0,1.4,1.9,1.8,1.8,1.8,1.8,0.2,0.2,0.2,0.2,1.9,1.4,1.1,1.1,1.1,1.1,1.5,1.5,1.5,1.5,1.0,0.0,34.0,0.1,1.0,0.0,0.0,0.0,0.0,0.0,21.0,0.0,6.641532,0.0,34.0,0.1,8.0,1.0,0.0,0.0,21.0,0.9,0.0,1.9,13.0,6.641532,0.0,34.0,0.1,8.0,1.0,0.0,0.0,21.0,0.9,0.0,1.9,13.0,6.641532,0.0,34.0,0.1,8.0,1.0,0.0,0.0,21.0,0.9,0.0,1.9,13.0,6.641532,0.0,34.0,0.1,8.0,1.0,0.0,0.0,21.0,0.9,0.0,1.9,13.0,6.641532,0.0,34.0,0.1,8.0,1.0,0.0,0.0,21.0,0.9,0.0,1.9,13.0,6.641532,0.0,235.384615,0.692308,55.384615,6.923077,0.0,0.0,145.384615,6.230769,0.0,13.153846,90.0,45.979837,1.111111,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
4,0,0,8,0,3.0,20.3,89,58,,0,3,2,Bukayo,1,0,5.8,110,58,7.4,222,88,88,54,0,0,,0,2.0,0,0,Saka,4.4,30.0,64,27,2,Saka,0,Arsenal,2,20-21,0.1,0.0,1.9,88.0,1.4,1.9,1.8,1.8,1.8,1.8,0.2,0.2,0.2,0.2,1.9,1.4,1.1,1.1,1.1,1.1,1.5,1.5,1.5,1.5,1.0,0.0,8.0,20.3,0.0,1.0,0.0,0.0,0.0,0.0,30.0,0.0,2.650318,0.0,8.0,20.3,2.0,0.0,1.0,0.0,30.0,0.1,0.0,1.9,88.0,2.650318,0.0,8.0,20.3,2.0,0.0,1.0,0.0,30.0,0.1,0.0,1.9,88.0,2.650318,0.0,8.0,20.3,2.0,0.0,1.0,0.0,30.0,0.1,0.0,1.9,88.0,2.650318,0.0,8.0,20.3,2.0,0.0,1.0,0.0,30.0,0.1,0.0,1.9,88.0,2.650318,0.0,8.0,20.3,2.0,0.0,1.0,0.0,30.0,0.1,0.0,1.9,88.0,2.650318,0.0,8.181818,20.761364,2.045455,0.0,1.022727,0.0,30.681818,0.102273,0.0,1.943182,90.0,2.710552,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


Unnamed: 0,assists,bonus,bps,clean_sheets,corners_and_indirect_freekicks_order,creativity,creativity_rank,creativity_rank_type,direct_freekicks_order,dreamteam_count,element_type,event_points,first_name,goals_conceded,goals_scored,ict_index,ict_index_rank,ict_index_rank_type,influence,influence_rank,influence_rank_type,minutes,now_cost,own_goals,penalties_missed,penalties_order,penalties_saved,points_per_game,red_cards,saves,second_name,selected_by_percent,threat,threat_rank,threat_rank_type,total_points,web_name,yellow_cards,team_name,gameweek,season,gameweek_xG,gameweek_xA,gameweek_xGA,gameweek_minutes,team_xG,team_xGA,team_xG_ewm_5,team_xG_ewm_10,team_xG_ewm_20,team_xG_ewm_40,team_xGA_ewm_5,team_xGA_ewm_10,team_xGA_ewm_20,team_xGA_ewm_40,opponent_xG,opponent_xGA,opponent_xG_ewm_5,opponent_xG_ewm_10,opponent_xG_ewm_20,opponent_xG_ewm_40,opponent_xGA_ewm_5,opponent_xGA_ewm_10,opponent_xGA_ewm_20,opponent_xGA_ewm_40,home,gameweek_assists,gameweek_bps,gameweek_creativity,gameweek_goals_scored,gameweek_goals_conceded,gameweek_own_goals,gameweek_penalties_saved,gameweek_red_cards,gameweek_saves,gameweek_threat,gameweek_yellow_cards,gameweek_xPoints,gameweek_assists_ewm_5,gameweek_bps_ewm_5,gameweek_creativity_ewm_5,event_points_ewm_5,gameweek_goals_scored_ewm_5,gameweek_goals_conceded_ewm_5,gameweek_saves_ewm_5,gameweek_threat_ewm_5,gameweek_xG_ewm_5,gameweek_xA_ewm_5,gameweek_xGA_ewm_5,gameweek_minutes_ewm_5,gameweek_xPoints_ewm_5,gameweek_assists_ewm_10,gameweek_bps_ewm_10,gameweek_creativity_ewm_10,event_points_ewm_10,gameweek_goals_scored_ewm_10,gameweek_goals_conceded_ewm_10,gameweek_saves_ewm_10,gameweek_threat_ewm_10,gameweek_xG_ewm_10,gameweek_xA_ewm_10,gameweek_xGA_ewm_10,gameweek_minutes_ewm_10,gameweek_xPoints_ewm_10,gameweek_assists_ewm_20,gameweek_bps_ewm_20,gameweek_creativity_ewm_20,event_points_ewm_20,gameweek_goals_scored_ewm_20,gameweek_goals_conceded_ewm_20,gameweek_saves_ewm_20,gameweek_threat_ewm_20,gameweek_xG_ewm_20,gameweek_xA_ewm_20,gameweek_xGA_ewm_20,gameweek_minutes_ewm_20,gameweek_xPoints_ewm_20,gameweek_assists_ewm_40,gameweek_bps_ewm_40,gameweek_creativity_ewm_40,event_points_ewm_40,gameweek_goals_scored_ewm_40,gameweek_goals_conceded_ewm_40,gameweek_saves_ewm_40,gameweek_threat_ewm_40,gameweek_xG_ewm_40,gameweek_xA_ewm_40,gameweek_xGA_ewm_40,gameweek_minutes_ewm_40,gameweek_xPoints_ewm_40,gameweek_assists_expanding,gameweek_bps_expanding,gameweek_creativity_expanding,event_points_expanding,gameweek_goals_scored_expanding,gameweek_goals_conceded_expanding,gameweek_saves_expanding,gameweek_threat_expanding,gameweek_xG_expanding,gameweek_xA_expanding,gameweek_xGA_expanding,gameweek_minutes_expanding,gameweek_xPoints_expanding,gameweek_assists_expanding_per90,gameweek_bps_expanding_per90,gameweek_creativity_expanding_per90,event_points_expanding_per90,gameweek_goals_scored_expanding_per90,gameweek_goals_conceded_expanding_per90,gameweek_saves_expanding_per90,gameweek_threat_expanding_per90,gameweek_xG_expanding_per90,gameweek_xA_expanding_per90,gameweek_xGA_expanding_per90,gameweek_minutes_expanding_per90,gameweek_xPoints_expanding_per90,xG_overperformance,chance_of_playing_next_round,chance_of_playing_this_round,code,cost_change_event,cost_change_event_fall,cost_change_start,cost_change_start_fall,ep_next,ep_this,form,id,in_dreamteam,news,news_added,photo,special,squad_number,status,team,team_code,transfers_in,transfers_in_event,transfers_out,transfers_out_event,value_form,value_season,starts,expected_goals,expected_assists,expected_goal_involvements,expected_goals_conceded,corners_and_indirect_freekicks_text,direct_freekicks_text,penalties_text,expected_goals_per_90,saves_per_90,expected_assists_per_90,expected_goal_involvements_per_90,expected_goals_conceded_per_90,goals_conceded_per_90,now_cost_rank,now_cost_rank_type,form_rank,form_rank_type,points_per_game_rank,points_per_game_rank_type,selected_rank,selected_rank_type,starts_per_90,clean_sheets_per_90,name,data_retrieved_datetime
19136,0,0,19,0,,65.4,6,4,3.0,0,3,1,Pedro,1,0,13.2,11,6,20.2,73,24,90,55,0,0,,0,1.0,0,0,Lomba Neto,0.2,46.0,12,5,1,Neto,1,Wolves,1,23-24,,,,90.0,2.2,2.2,0.886099,0.937099,0.956701,0.965691,2.178985,1.917649,1.749276,1.632264,2.2,2.2,2.611257,2.224959,1.926174,1.742422,1.447922,1.419273,1.379475,1.344666,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,247632.0,0.0,0.0,0.0,0.0,2.1,1.6,1.0,567.0,False,,,247632.jpg,False,,a,20.0,39.0,1999.0,1999.0,886.0,886.0,0.2,0.2,1.0,0.79,0.35,1.14,1.46,,,,0.79,0.0,0.35,1.14,1.46,1.0,132.0,85.0,261.0,122.0,261.0,122.0,339.0,103.0,1.0,0.0,Pedro Lomba Neto,2023-08-15 10:04:04.755472
19137,0,0,12,0,,0.0,530,69,,0,1,2,José,1,0,1.9,162,9,19.2,79,8,90,50,0,0,,0,2.0,0,2,Malheiro de Sá,1.1,0.0,514,69,2,José Sá,0,Wolves,1,23-24,,,,90.0,2.2,2.2,0.886099,0.937099,0.956701,0.965691,2.178985,1.917649,1.749276,1.632264,2.2,2.2,2.611257,2.224959,1.926174,1.742422,1.447922,1.419273,1.379475,1.344666,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,149065.0,0.0,0.0,0.0,0.0,3.2,2.7,2.0,569.0,False,,,149065.jpg,False,,a,20.0,39.0,2160.0,2160.0,3423.0,3423.0,0.4,0.4,1.0,0.0,0.0,0.0,1.46,,,,0.0,2.0,0.0,0.0,1.46,1.0,197.0,7.0,126.0,17.0,126.0,17.0,162.0,20.0,1.0,0.0,José Malheiro de Sá,2023-08-15 10:04:04.755472
19138,0,0,6,1,2.0,34.9,20,13,2.0,0,3,3,Pablo,0,0,5.5,64,34,10.8,131,50,62,50,0,0,,0,3.0,0,0,Sarabia,0.1,9.0,83,48,3,Sarabia,0,Wolves,1,23-24,,,,62.0,2.2,2.2,0.886099,0.937099,0.956701,0.965691,2.178985,1.917649,1.749276,1.632264,2.2,2.2,2.611257,2.224959,1.926174,1.742422,1.447922,1.419273,1.379475,1.344666,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,88484.0,0.0,0.0,0.0,0.0,1.6,1.1,3.0,570.0,False,,,88484.jpg,False,,a,20.0,39.0,1074.0,1074.0,555.0,555.0,0.6,0.6,1.0,0.06,0.3,0.36,0.74,,,,0.09,0.0,0.44,0.53,1.07,0.0,173.0,110.0,71.0,26.0,71.0,26.0,428.0,141.0,1.45,1.45,Pablo Sarabia,2023-08-15 10:04:04.755472
19139,0,0,17,0,,17.3,65,13,,0,2,2,Nélson,1,0,4.7,84,23,27.4,45,14,90,45,0,0,,0,2.0,0,0,Cabral Semedo,0.3,2.0,140,42,2,N.Semedo,0,Wolves,1,23-24,,,,90.0,2.2,2.2,0.886099,0.937099,0.956701,0.965691,2.178985,1.917649,1.749276,1.632264,2.2,2.2,2.611257,2.224959,1.926174,1.742422,1.447922,1.419273,1.379475,1.344666,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,200402.0,0.0,0.0,0.0,0.0,2.0,1.5,2.0,572.0,False,,,200402.jpg,False,,a,20.0,39.0,1903.0,1903.0,973.0,973.0,0.4,0.4,1.0,0.0,0.13,0.13,1.46,,,,0.0,0.0,0.13,0.13,1.46,1.0,398.0,98.0,108.0,37.0,108.0,37.0,268.0,99.0,1.0,0.0,Nélson Cabral Semedo,2023-08-15 10:04:04.755472
19140,0,0,3,0,,11.7,97,8,,0,4,2,Matheus,1,0,4.2,91,14,6.2,171,17,76,55,0,0,1.0,0,2.0,0,0,Santos Carneiro Da Cunha,0.3,24.0,43,16,2,Cunha,0,Wolves,1,23-24,,,,76.0,2.2,2.2,0.886099,0.937099,0.956701,0.965691,2.178985,1.917649,1.749276,1.632264,2.2,2.2,2.611257,2.224959,1.926174,1.742422,1.447922,1.419273,1.379475,1.344666,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,75.0,75.0,430871.0,0.0,0.0,0.0,0.0,1.4,1.0,2.0,590.0,False,Knock - 75% chance of playing,2023-08-11T13:30:07.246810Z,430871.jpg,False,,d,20.0,39.0,3388.0,3388.0,3603.0,3603.0,0.4,0.4,1.0,0.49,0.03,0.52,1.18,,,,0.58,0.0,0.04,0.62,1.4,1.18,134.0,34.0,156.0,19.0,156.0,19.0,288.0,51.0,1.18,0.0,Matheus Santos Carneiro Da Cunha,2023-08-15 10:04:04.755472


(19141, 209)

Number of non-one-games: 0


### FPL gameweek stats

In [26]:
# calculate gameweek stats by looking at differences in cumulative stats

diff_columns = ['assists', 'bps', 'creativity', 'goals_scored', 'goals_conceded', 'own_goals', 'penalties_saved', 
                'red_cards', 'saves', 'threat', 'yellow_cards']

for col in diff_columns:
    fpl_df[f'gameweek_{col}'] = fpl_df.groupby(['web_name', 'season'])[col].diff()
    fpl_df[f'gameweek_{col}'] = fpl_df.apply(lambda x: my_fill_na(x, f'gameweek_{col}', col), axis=1)

In [27]:
# sanity check
fpl_df.loc[fpl_df.web_name=='Aubameyang', ['goals_scored', 'gameweek_goals_scored']]

Unnamed: 0,goals_scored,gameweek_goals_scored
94,1,
305,1,0.0
549,1,0.0
793,1,0.0
1043,2,1.0
1295,2,0.0
1542,2,0.0
1795,2,0.0
2044,2,0.0
2268,2,0.0


### FPL expected points

In [28]:
fpl_df['gameweek_xPoints'] = fpl_df.apply(lambda x: calculate_xPoints(x,clf), axis=1)

In [29]:
# proportion of nans
fpl_df['gameweek_xPoints'].isnull().sum() / fpl_df.shape[0]

0.044877488114518575

In [30]:
fig = px.histogram(fpl_df, x='gameweek_xPoints', nbins=40)
fig.show()

In [31]:
# sanity check
fpl_df.loc[fpl_df.web_name=='Aubameyang', ['gameweek_xPoints']]

Unnamed: 0,gameweek_xPoints
94,
305,2.820277
549,3.723653
793,4.104416
1043,8.16875
1295,2.783207
1542,3.582265
1795,4.246686
2044,3.606897
2268,1.706601


### FPL moving averages

In [32]:
# calculate moving averages based on gameweek stats

ewm_columns = ['gameweek_assists', 'gameweek_bps', 'gameweek_creativity', 'event_points', 'gameweek_goals_scored', 'gameweek_goals_conceded', 'gameweek_saves', 
               'gameweek_threat', 'gameweek_xG', 'gameweek_xA', 'gameweek_xGA', 'gameweek_minutes', 'gameweek_xPoints']

for i in rolling_windows:
    new_columns = [col+f'_ewm_{i}' for col in ewm_columns]
    fpl_df[new_columns] = fpl_df.groupby('web_name')[ewm_columns].ewm(alpha=1/i).mean().reset_index().sort_values(by='level_1')[ewm_columns].values
    #fpl_df[new_columns] = fpl_df.groupby('web_name')[ewm_columns].rolling(i, min_periods=1, closed='left').mean().reset_index().sort_values(by='level_1')[ewm_columns].values

display(fpl_df.head())
display(fpl_df.shape)

Unnamed: 0,assists,bonus,bps,clean_sheets,corners_and_indirect_freekicks_order,creativity,creativity_rank,creativity_rank_type,direct_freekicks_order,dreamteam_count,element_type,event_points,first_name,goals_conceded,goals_scored,ict_index,ict_index_rank,ict_index_rank_type,influence,influence_rank,influence_rank_type,minutes,now_cost,own_goals,penalties_missed,penalties_order,penalties_saved,points_per_game,red_cards,saves,second_name,selected_by_percent,threat,threat_rank,threat_rank_type,total_points,web_name,yellow_cards,team_name,gameweek,season,gameweek_xG,gameweek_xA,gameweek_xGA,gameweek_minutes,team_xG,team_xGA,team_xG_ewm_5,team_xG_ewm_10,team_xG_ewm_20,team_xG_ewm_40,team_xGA_ewm_5,team_xGA_ewm_10,team_xGA_ewm_20,team_xGA_ewm_40,opponent_xG,opponent_xGA,opponent_xG_ewm_5,opponent_xG_ewm_10,opponent_xG_ewm_20,opponent_xG_ewm_40,opponent_xGA_ewm_5,opponent_xGA_ewm_10,opponent_xGA_ewm_20,opponent_xGA_ewm_40,home,gameweek_assists,gameweek_bps,gameweek_creativity,gameweek_goals_scored,gameweek_goals_conceded,gameweek_own_goals,gameweek_penalties_saved,gameweek_red_cards,gameweek_saves,gameweek_threat,gameweek_yellow_cards,gameweek_xPoints,gameweek_assists_ewm_5,gameweek_bps_ewm_5,gameweek_creativity_ewm_5,event_points_ewm_5,gameweek_goals_scored_ewm_5,gameweek_goals_conceded_ewm_5,gameweek_saves_ewm_5,gameweek_threat_ewm_5,gameweek_xG_ewm_5,gameweek_xA_ewm_5,gameweek_xGA_ewm_5,gameweek_minutes_ewm_5,gameweek_xPoints_ewm_5,gameweek_assists_ewm_10,gameweek_bps_ewm_10,gameweek_creativity_ewm_10,event_points_ewm_10,gameweek_goals_scored_ewm_10,gameweek_goals_conceded_ewm_10,gameweek_saves_ewm_10,gameweek_threat_ewm_10,gameweek_xG_ewm_10,gameweek_xA_ewm_10,gameweek_xGA_ewm_10,gameweek_minutes_ewm_10,gameweek_xPoints_ewm_10,gameweek_assists_ewm_20,gameweek_bps_ewm_20,gameweek_creativity_ewm_20,event_points_ewm_20,gameweek_goals_scored_ewm_20,gameweek_goals_conceded_ewm_20,gameweek_saves_ewm_20,gameweek_threat_ewm_20,gameweek_xG_ewm_20,gameweek_xA_ewm_20,gameweek_xGA_ewm_20,gameweek_minutes_ewm_20,gameweek_xPoints_ewm_20,gameweek_assists_ewm_40,gameweek_bps_ewm_40,gameweek_creativity_ewm_40,event_points_ewm_40,gameweek_goals_scored_ewm_40,gameweek_goals_conceded_ewm_40,gameweek_saves_ewm_40,gameweek_threat_ewm_40,gameweek_xG_ewm_40,gameweek_xA_ewm_40,gameweek_xGA_ewm_40,gameweek_minutes_ewm_40,gameweek_xPoints_ewm_40,gameweek_assists_expanding,gameweek_bps_expanding,gameweek_creativity_expanding,event_points_expanding,gameweek_goals_scored_expanding,gameweek_goals_conceded_expanding,gameweek_saves_expanding,gameweek_threat_expanding,gameweek_xG_expanding,gameweek_xA_expanding,gameweek_xGA_expanding,gameweek_minutes_expanding,gameweek_xPoints_expanding,gameweek_assists_expanding_per90,gameweek_bps_expanding_per90,gameweek_creativity_expanding_per90,event_points_expanding_per90,gameweek_goals_scored_expanding_per90,gameweek_goals_conceded_expanding_per90,gameweek_saves_expanding_per90,gameweek_threat_expanding_per90,gameweek_xG_expanding_per90,gameweek_xA_expanding_per90,gameweek_xGA_expanding_per90,gameweek_minutes_expanding_per90,gameweek_xPoints_expanding_per90,xG_overperformance,chance_of_playing_next_round,chance_of_playing_this_round,code,cost_change_event,cost_change_event_fall,cost_change_start,cost_change_start_fall,ep_next,ep_this,form,id,in_dreamteam,news,news_added,photo,special,squad_number,status,team,team_code,transfers_in,transfers_in_event,transfers_out,transfers_out_event,value_form,value_season,starts,expected_goals,expected_assists,expected_goal_involvements,expected_goals_conceded,corners_and_indirect_freekicks_text,direct_freekicks_text,penalties_text,expected_goals_per_90,saves_per_90,expected_assists_per_90,expected_goal_involvements_per_90,expected_goals_conceded_per_90,goals_conceded_per_90,now_cost_rank,now_cost_rank_type,form_rank,form_rank_type,points_per_game_rank,points_per_game_rank_type,selected_rank,selected_rank_type,starts_per_90,clean_sheets_per_90,name,data_retrieved_datetime
0,0,0,3,0,,0.0,493,188,4.0,0,2,1,David,0,0,0.0,497,188,0.0,490,188,1,55,0,0,,0,1.0,0,0,Luiz Moreira Marinho,0.9,0.0,479,186,1,David Luiz,0,Arsenal,2,20-21,0.0,0.0,1.9,1.0,1.4,1.9,1.8,1.8,1.8,1.8,0.2,0.2,0.2,0.2,1.9,1.4,1.1,1.1,1.1,1.1,1.5,1.5,1.5,1.5,1.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.993836,0.0,3.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.9,1.0,0.993836,0.0,3.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.9,1.0,0.993836,0.0,3.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.9,1.0,0.993836,0.0,3.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.9,1.0,0.993836,0.0,3.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.9,1.0,0.993836,0.0,270.0,0.0,90.0,0.0,0.0,0.0,0.0,0.0,0.0,171.0,90.0,89.445204,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1,0,0,9,0,,5.4,176,43,,0,2,2,Sead,1,0,1.3,259,91,3.6,252,92,90,49,0,0,,0,2.0,0,0,Kolasinac,0.3,4.0,172,48,2,Kolasinac,0,Arsenal,2,20-21,0.0,0.0,1.9,90.0,1.4,1.9,1.8,1.8,1.8,1.8,0.2,0.2,0.2,0.2,1.9,1.4,1.1,1.1,1.1,1.1,1.5,1.5,1.5,1.5,1.0,0.0,9.0,5.4,0.0,1.0,0.0,0.0,0.0,0.0,4.0,0.0,2.033159,0.0,9.0,5.4,2.0,0.0,1.0,0.0,4.0,0.0,0.0,1.9,90.0,2.033159,0.0,9.0,5.4,2.0,0.0,1.0,0.0,4.0,0.0,0.0,1.9,90.0,2.033159,0.0,9.0,5.4,2.0,0.0,1.0,0.0,4.0,0.0,0.0,1.9,90.0,2.033159,0.0,9.0,5.4,2.0,0.0,1.0,0.0,4.0,0.0,0.0,1.9,90.0,2.033159,0.0,9.0,5.4,2.0,0.0,1.0,0.0,4.0,0.0,0.0,1.9,90.0,2.033159,0.0,9.0,5.4,2.0,0.0,1.0,0.0,4.0,0.0,0.0,1.9,90.0,2.033159,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
2,0,0,6,0,1.0,17.6,97,64,3.0,0,3,1,Nicolas,0,0,3.4,184,86,3.0,257,115,41,78,0,0,2.0,0,1.0,0,0,Pépé,1.1,14.0,123,69,2,Pépé,0,Arsenal,2,20-21,0.1,0.0,1.9,41.0,1.4,1.9,1.8,1.8,1.8,1.8,0.2,0.2,0.2,0.2,1.9,1.4,1.1,1.1,1.1,1.1,1.5,1.5,1.5,1.5,1.0,0.0,6.0,17.6,0.0,0.0,0.0,0.0,0.0,0.0,14.0,0.0,1.500366,0.0,6.0,17.6,1.0,0.0,0.0,0.0,14.0,0.1,0.0,1.9,41.0,1.500366,0.0,6.0,17.6,1.0,0.0,0.0,0.0,14.0,0.1,0.0,1.9,41.0,1.500366,0.0,6.0,17.6,1.0,0.0,0.0,0.0,14.0,0.1,0.0,1.9,41.0,1.500366,0.0,6.0,17.6,1.0,0.0,0.0,0.0,14.0,0.1,0.0,1.9,41.0,1.500366,0.0,6.0,17.6,1.0,0.0,0.0,0.0,14.0,0.1,0.0,1.9,26.0,1.500366,0.0,20.769231,60.923077,3.461538,0.0,0.0,0.0,48.461538,0.346154,0.0,6.576923,90.0,5.193575,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
3,0,3,34,0,,0.1,276,37,,0,4,8,Edward,0,1,5.5,118,23,34.0,95,16,16,59,0,0,,0,4.5,0,0,Nketiah,1.2,21.0,93,27,9,Nketiah,0,Arsenal,2,20-21,0.9,0.0,1.9,16.0,1.4,1.9,1.8,1.8,1.8,1.8,0.2,0.2,0.2,0.2,1.9,1.4,1.1,1.1,1.1,1.1,1.5,1.5,1.5,1.5,1.0,0.0,34.0,0.1,1.0,0.0,0.0,0.0,0.0,0.0,21.0,0.0,6.641532,0.0,34.0,0.1,8.0,1.0,0.0,0.0,21.0,0.9,0.0,1.9,16.0,6.641532,0.0,34.0,0.1,8.0,1.0,0.0,0.0,21.0,0.9,0.0,1.9,16.0,6.641532,0.0,34.0,0.1,8.0,1.0,0.0,0.0,21.0,0.9,0.0,1.9,16.0,6.641532,0.0,34.0,0.1,8.0,1.0,0.0,0.0,21.0,0.9,0.0,1.9,16.0,6.641532,0.0,34.0,0.1,8.0,1.0,0.0,0.0,21.0,0.9,0.0,1.9,13.0,6.641532,0.0,235.384615,0.692308,55.384615,6.923077,0.0,0.0,145.384615,6.230769,0.0,13.153846,90.0,45.979837,1.111111,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
4,0,0,8,0,3.0,20.3,89,58,,0,3,2,Bukayo,1,0,5.8,110,58,7.4,222,88,88,54,0,0,,0,2.0,0,0,Saka,4.4,30.0,64,27,2,Saka,0,Arsenal,2,20-21,0.1,0.0,1.9,88.0,1.4,1.9,1.8,1.8,1.8,1.8,0.2,0.2,0.2,0.2,1.9,1.4,1.1,1.1,1.1,1.1,1.5,1.5,1.5,1.5,1.0,0.0,8.0,20.3,0.0,1.0,0.0,0.0,0.0,0.0,30.0,0.0,2.650318,0.0,8.0,20.3,2.0,0.0,1.0,0.0,30.0,0.1,0.0,1.9,88.0,2.650318,0.0,8.0,20.3,2.0,0.0,1.0,0.0,30.0,0.1,0.0,1.9,88.0,2.650318,0.0,8.0,20.3,2.0,0.0,1.0,0.0,30.0,0.1,0.0,1.9,88.0,2.650318,0.0,8.0,20.3,2.0,0.0,1.0,0.0,30.0,0.1,0.0,1.9,88.0,2.650318,0.0,8.0,20.3,2.0,0.0,1.0,0.0,30.0,0.1,0.0,1.9,88.0,2.650318,0.0,8.181818,20.761364,2.045455,0.0,1.022727,0.0,30.681818,0.102273,0.0,1.943182,90.0,2.710552,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


(19141, 209)

In [33]:
# sanity check
fpl_df.loc[fpl_df.web_name=='Aubameyang', ['gameweek_goals_scored', 'gameweek_goals_scored_ewm_5', 'gameweek_goals_scored_ewm_10', 
                                           'gameweek_goals_scored_ewm_20', 'gameweek_goals_scored_ewm_40']]

Unnamed: 0,gameweek_goals_scored,gameweek_goals_scored_ewm_5,gameweek_goals_scored_ewm_10,gameweek_goals_scored_ewm_20,gameweek_goals_scored_ewm_40
94,,,,,
305,0.0,0.0,0.0,0.0,0.0
549,0.0,0.0,0.0,0.0,0.0
793,0.0,0.0,0.0,0.0,0.0
1043,1.0,0.338753,0.290782,0.269551,0.259573
1295,0.0,0.237982,0.219775,0.209973,0.204997
1542,0.0,0.173476,0.17287,0.170342,0.168632
1795,0.0,0.129574,0.139735,0.142108,0.142674
2044,0.0,0.098435,0.1152,0.120998,0.12322
2268,0.0,0.075696,0.096394,0.104636,0.108102


# FPL expanding stats

In [34]:
expanding_columns = ['gameweek_assists', 'gameweek_bps', 'gameweek_creativity', 'event_points', 'gameweek_goals_scored', 'gameweek_goals_conceded', 'gameweek_saves', 
               'gameweek_threat', 'gameweek_xG', 'gameweek_xA', 'gameweek_xGA', 'gameweek_minutes', 'gameweek_xPoints']
expanding_col_names = [col+'_expanding' for col in expanding_columns]

fpl_df[expanding_col_names] = (
    fpl_df
    .groupby(['first_name', 'second_name'])[expanding_columns]
    .expanding()
    .sum()
    .reset_index()
    .sort_values('level_2')[expanding_columns]
    .values
)

display(fpl_df.head())
display(fpl_df.shape)

Unnamed: 0,assists,bonus,bps,clean_sheets,corners_and_indirect_freekicks_order,creativity,creativity_rank,creativity_rank_type,direct_freekicks_order,dreamteam_count,element_type,event_points,first_name,goals_conceded,goals_scored,ict_index,ict_index_rank,ict_index_rank_type,influence,influence_rank,influence_rank_type,minutes,now_cost,own_goals,penalties_missed,penalties_order,penalties_saved,points_per_game,red_cards,saves,second_name,selected_by_percent,threat,threat_rank,threat_rank_type,total_points,web_name,yellow_cards,team_name,gameweek,season,gameweek_xG,gameweek_xA,gameweek_xGA,gameweek_minutes,team_xG,team_xGA,team_xG_ewm_5,team_xG_ewm_10,team_xG_ewm_20,team_xG_ewm_40,team_xGA_ewm_5,team_xGA_ewm_10,team_xGA_ewm_20,team_xGA_ewm_40,opponent_xG,opponent_xGA,opponent_xG_ewm_5,opponent_xG_ewm_10,opponent_xG_ewm_20,opponent_xG_ewm_40,opponent_xGA_ewm_5,opponent_xGA_ewm_10,opponent_xGA_ewm_20,opponent_xGA_ewm_40,home,gameweek_assists,gameweek_bps,gameweek_creativity,gameweek_goals_scored,gameweek_goals_conceded,gameweek_own_goals,gameweek_penalties_saved,gameweek_red_cards,gameweek_saves,gameweek_threat,gameweek_yellow_cards,gameweek_xPoints,gameweek_assists_ewm_5,gameweek_bps_ewm_5,gameweek_creativity_ewm_5,event_points_ewm_5,gameweek_goals_scored_ewm_5,gameweek_goals_conceded_ewm_5,gameweek_saves_ewm_5,gameweek_threat_ewm_5,gameweek_xG_ewm_5,gameweek_xA_ewm_5,gameweek_xGA_ewm_5,gameweek_minutes_ewm_5,gameweek_xPoints_ewm_5,gameweek_assists_ewm_10,gameweek_bps_ewm_10,gameweek_creativity_ewm_10,event_points_ewm_10,gameweek_goals_scored_ewm_10,gameweek_goals_conceded_ewm_10,gameweek_saves_ewm_10,gameweek_threat_ewm_10,gameweek_xG_ewm_10,gameweek_xA_ewm_10,gameweek_xGA_ewm_10,gameweek_minutes_ewm_10,gameweek_xPoints_ewm_10,gameweek_assists_ewm_20,gameweek_bps_ewm_20,gameweek_creativity_ewm_20,event_points_ewm_20,gameweek_goals_scored_ewm_20,gameweek_goals_conceded_ewm_20,gameweek_saves_ewm_20,gameweek_threat_ewm_20,gameweek_xG_ewm_20,gameweek_xA_ewm_20,gameweek_xGA_ewm_20,gameweek_minutes_ewm_20,gameweek_xPoints_ewm_20,gameweek_assists_ewm_40,gameweek_bps_ewm_40,gameweek_creativity_ewm_40,event_points_ewm_40,gameweek_goals_scored_ewm_40,gameweek_goals_conceded_ewm_40,gameweek_saves_ewm_40,gameweek_threat_ewm_40,gameweek_xG_ewm_40,gameweek_xA_ewm_40,gameweek_xGA_ewm_40,gameweek_minutes_ewm_40,gameweek_xPoints_ewm_40,gameweek_assists_expanding,gameweek_bps_expanding,gameweek_creativity_expanding,event_points_expanding,gameweek_goals_scored_expanding,gameweek_goals_conceded_expanding,gameweek_saves_expanding,gameweek_threat_expanding,gameweek_xG_expanding,gameweek_xA_expanding,gameweek_xGA_expanding,gameweek_minutes_expanding,gameweek_xPoints_expanding,gameweek_assists_expanding_per90,gameweek_bps_expanding_per90,gameweek_creativity_expanding_per90,event_points_expanding_per90,gameweek_goals_scored_expanding_per90,gameweek_goals_conceded_expanding_per90,gameweek_saves_expanding_per90,gameweek_threat_expanding_per90,gameweek_xG_expanding_per90,gameweek_xA_expanding_per90,gameweek_xGA_expanding_per90,gameweek_minutes_expanding_per90,gameweek_xPoints_expanding_per90,xG_overperformance,chance_of_playing_next_round,chance_of_playing_this_round,code,cost_change_event,cost_change_event_fall,cost_change_start,cost_change_start_fall,ep_next,ep_this,form,id,in_dreamteam,news,news_added,photo,special,squad_number,status,team,team_code,transfers_in,transfers_in_event,transfers_out,transfers_out_event,value_form,value_season,starts,expected_goals,expected_assists,expected_goal_involvements,expected_goals_conceded,corners_and_indirect_freekicks_text,direct_freekicks_text,penalties_text,expected_goals_per_90,saves_per_90,expected_assists_per_90,expected_goal_involvements_per_90,expected_goals_conceded_per_90,goals_conceded_per_90,now_cost_rank,now_cost_rank_type,form_rank,form_rank_type,points_per_game_rank,points_per_game_rank_type,selected_rank,selected_rank_type,starts_per_90,clean_sheets_per_90,name,data_retrieved_datetime
0,0,0,3,0,,0.0,493,188,4.0,0,2,1,David,0,0,0.0,497,188,0.0,490,188,1,55,0,0,,0,1.0,0,0,Luiz Moreira Marinho,0.9,0.0,479,186,1,David Luiz,0,Arsenal,2,20-21,0.0,0.0,1.9,1.0,1.4,1.9,1.8,1.8,1.8,1.8,0.2,0.2,0.2,0.2,1.9,1.4,1.1,1.1,1.1,1.1,1.5,1.5,1.5,1.5,1.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.993836,0.0,3.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.9,1.0,0.993836,0.0,3.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.9,1.0,0.993836,0.0,3.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.9,1.0,0.993836,0.0,3.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.9,1.0,0.993836,0.0,3.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.9,1.0,0.993836,0.0,270.0,0.0,90.0,0.0,0.0,0.0,0.0,0.0,0.0,171.0,90.0,89.445204,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1,0,0,9,0,,5.4,176,43,,0,2,2,Sead,1,0,1.3,259,91,3.6,252,92,90,49,0,0,,0,2.0,0,0,Kolasinac,0.3,4.0,172,48,2,Kolasinac,0,Arsenal,2,20-21,0.0,0.0,1.9,90.0,1.4,1.9,1.8,1.8,1.8,1.8,0.2,0.2,0.2,0.2,1.9,1.4,1.1,1.1,1.1,1.1,1.5,1.5,1.5,1.5,1.0,0.0,9.0,5.4,0.0,1.0,0.0,0.0,0.0,0.0,4.0,0.0,2.033159,0.0,9.0,5.4,2.0,0.0,1.0,0.0,4.0,0.0,0.0,1.9,90.0,2.033159,0.0,9.0,5.4,2.0,0.0,1.0,0.0,4.0,0.0,0.0,1.9,90.0,2.033159,0.0,9.0,5.4,2.0,0.0,1.0,0.0,4.0,0.0,0.0,1.9,90.0,2.033159,0.0,9.0,5.4,2.0,0.0,1.0,0.0,4.0,0.0,0.0,1.9,90.0,2.033159,0.0,9.0,5.4,2.0,0.0,1.0,0.0,4.0,0.0,0.0,1.9,90.0,2.033159,0.0,9.0,5.4,2.0,0.0,1.0,0.0,4.0,0.0,0.0,1.9,90.0,2.033159,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
2,0,0,6,0,1.0,17.6,97,64,3.0,0,3,1,Nicolas,0,0,3.4,184,86,3.0,257,115,41,78,0,0,2.0,0,1.0,0,0,Pépé,1.1,14.0,123,69,2,Pépé,0,Arsenal,2,20-21,0.1,0.0,1.9,41.0,1.4,1.9,1.8,1.8,1.8,1.8,0.2,0.2,0.2,0.2,1.9,1.4,1.1,1.1,1.1,1.1,1.5,1.5,1.5,1.5,1.0,0.0,6.0,17.6,0.0,0.0,0.0,0.0,0.0,0.0,14.0,0.0,1.500366,0.0,6.0,17.6,1.0,0.0,0.0,0.0,14.0,0.1,0.0,1.9,41.0,1.500366,0.0,6.0,17.6,1.0,0.0,0.0,0.0,14.0,0.1,0.0,1.9,41.0,1.500366,0.0,6.0,17.6,1.0,0.0,0.0,0.0,14.0,0.1,0.0,1.9,41.0,1.500366,0.0,6.0,17.6,1.0,0.0,0.0,0.0,14.0,0.1,0.0,1.9,41.0,1.500366,0.0,6.0,17.6,1.0,0.0,0.0,0.0,14.0,0.1,0.0,1.9,41.0,1.500366,0.0,20.769231,60.923077,3.461538,0.0,0.0,0.0,48.461538,0.346154,0.0,6.576923,90.0,5.193575,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
3,0,3,34,0,,0.1,276,37,,0,4,8,Edward,0,1,5.5,118,23,34.0,95,16,16,59,0,0,,0,4.5,0,0,Nketiah,1.2,21.0,93,27,9,Nketiah,0,Arsenal,2,20-21,0.9,0.0,1.9,16.0,1.4,1.9,1.8,1.8,1.8,1.8,0.2,0.2,0.2,0.2,1.9,1.4,1.1,1.1,1.1,1.1,1.5,1.5,1.5,1.5,1.0,0.0,34.0,0.1,1.0,0.0,0.0,0.0,0.0,0.0,21.0,0.0,6.641532,0.0,34.0,0.1,8.0,1.0,0.0,0.0,21.0,0.9,0.0,1.9,16.0,6.641532,0.0,34.0,0.1,8.0,1.0,0.0,0.0,21.0,0.9,0.0,1.9,16.0,6.641532,0.0,34.0,0.1,8.0,1.0,0.0,0.0,21.0,0.9,0.0,1.9,16.0,6.641532,0.0,34.0,0.1,8.0,1.0,0.0,0.0,21.0,0.9,0.0,1.9,16.0,6.641532,0.0,34.0,0.1,8.0,1.0,0.0,0.0,21.0,0.9,0.0,1.9,16.0,6.641532,0.0,235.384615,0.692308,55.384615,6.923077,0.0,0.0,145.384615,6.230769,0.0,13.153846,90.0,45.979837,1.111111,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
4,0,0,8,0,3.0,20.3,89,58,,0,3,2,Bukayo,1,0,5.8,110,58,7.4,222,88,88,54,0,0,,0,2.0,0,0,Saka,4.4,30.0,64,27,2,Saka,0,Arsenal,2,20-21,0.1,0.0,1.9,88.0,1.4,1.9,1.8,1.8,1.8,1.8,0.2,0.2,0.2,0.2,1.9,1.4,1.1,1.1,1.1,1.1,1.5,1.5,1.5,1.5,1.0,0.0,8.0,20.3,0.0,1.0,0.0,0.0,0.0,0.0,30.0,0.0,2.650318,0.0,8.0,20.3,2.0,0.0,1.0,0.0,30.0,0.1,0.0,1.9,88.0,2.650318,0.0,8.0,20.3,2.0,0.0,1.0,0.0,30.0,0.1,0.0,1.9,88.0,2.650318,0.0,8.0,20.3,2.0,0.0,1.0,0.0,30.0,0.1,0.0,1.9,88.0,2.650318,0.0,8.0,20.3,2.0,0.0,1.0,0.0,30.0,0.1,0.0,1.9,88.0,2.650318,0.0,8.0,20.3,2.0,0.0,1.0,0.0,30.0,0.1,0.0,1.9,88.0,2.650318,0.0,8.181818,20.761364,2.045455,0.0,1.022727,0.0,30.681818,0.102273,0.0,1.943182,90.0,2.710552,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


(19141, 209)

In [35]:
# sanity check
fpl_df.loc[fpl_df.web_name=='Aubameyang', expanding_columns + expanding_col_names + ['season']]

Unnamed: 0,gameweek_assists,gameweek_bps,gameweek_creativity,event_points,gameweek_goals_scored,gameweek_goals_conceded,gameweek_saves,gameweek_threat,gameweek_xG,gameweek_xA,gameweek_xGA,gameweek_minutes,gameweek_xPoints,gameweek_assists_expanding,gameweek_bps_expanding,gameweek_creativity_expanding,event_points_expanding,gameweek_goals_scored_expanding,gameweek_goals_conceded_expanding,gameweek_saves_expanding,gameweek_threat_expanding,gameweek_xG_expanding,gameweek_xA_expanding,gameweek_xGA_expanding,gameweek_minutes_expanding,gameweek_xPoints_expanding,season
94,,,,2,,,,,0.0,0.0,3.0,90.0,,,,,2.0,,,,,0.0,0.0,3.0,90.0,,20-21
305,0.0,10.0,4.6,2,0.0,1.0,0.0,30.0,0.0,0.0,0.2,90.0,2.820277,0.0,10.0,4.6,4.0,0.0,1.0,0.0,30.0,0.0,0.0,3.2,180.0,2.820277,20-21
549,0.0,7.0,29.9,2,0.0,1.0,0.0,2.0,0.0,0.5,1.4,90.0,3.723653,0.0,17.0,34.5,6.0,0.0,2.0,0.0,32.0,0.0,0.5,4.6,270.0,6.54393,20-21
793,0.0,12.0,16.0,2,0.0,1.0,0.0,12.0,0.3,0.1,1.2,90.0,4.104416,0.0,29.0,50.5,8.0,0.0,3.0,0.0,44.0,0.3,0.6,5.8,360.0,10.648346,20-21
1043,0.0,31.0,16.4,8,1.0,0.0,0.0,26.0,0.9,0.1,0.3,86.0,8.16875,0.0,60.0,66.9,16.0,1.0,3.0,0.0,70.0,1.2,0.7,6.1,446.0,18.817096,20-21
1295,0.0,7.0,28.3,2,0.0,3.0,0.0,8.0,0.0,0.2,1.8,90.0,2.783207,0.0,67.0,95.2,18.0,1.0,6.0,0.0,78.0,1.2,0.9,7.9,536.0,21.600303,20-21
1542,0.0,4.0,14.4,3,0.0,0.0,0.0,39.0,0.3,0.0,2.5,90.0,3.582265,0.0,71.0,109.6,21.0,1.0,6.0,0.0,117.0,1.5,0.9,10.4,626.0,25.182568,20-21
1795,0.0,2.0,1.3,2,0.0,2.0,0.0,61.0,0.4,0.0,1.4,90.0,4.246686,0.0,73.0,110.9,23.0,1.0,8.0,0.0,178.0,1.9,0.9,11.8,716.0,29.429254,20-21
2044,0.0,6.0,7.3,2,0.0,2.0,0.0,26.0,0.2,0.0,0.5,90.0,3.606897,0.0,79.0,118.2,25.0,1.0,10.0,0.0,204.0,2.1,0.9,12.3,806.0,33.036151,20-21
2268,0.0,-1.0,18.5,0,0.0,1.0,0.0,68.0,0.2,0.1,0.9,90.0,1.706601,0.0,78.0,136.7,25.0,1.0,11.0,0.0,272.0,2.3,1.0,13.2,896.0,34.742752,20-21


# FPL per 90 stats

In [36]:
per_90_columns = [col+'_per90' for col in expanding_col_names]

for i in range(len(per_90_columns)):
    fpl_df[per_90_columns[i]] = fpl_df[expanding_col_names[i]] / fpl_df['gameweek_minutes_expanding'] * 90

In [37]:
# sanity check
fpl_df.loc[fpl_df.web_name=='Aubameyang', per_90_columns + expanding_col_names + ['season']]

Unnamed: 0,gameweek_assists_expanding_per90,gameweek_bps_expanding_per90,gameweek_creativity_expanding_per90,event_points_expanding_per90,gameweek_goals_scored_expanding_per90,gameweek_goals_conceded_expanding_per90,gameweek_saves_expanding_per90,gameweek_threat_expanding_per90,gameweek_xG_expanding_per90,gameweek_xA_expanding_per90,gameweek_xGA_expanding_per90,gameweek_minutes_expanding_per90,gameweek_xPoints_expanding_per90,gameweek_assists_expanding,gameweek_bps_expanding,gameweek_creativity_expanding,event_points_expanding,gameweek_goals_scored_expanding,gameweek_goals_conceded_expanding,gameweek_saves_expanding,gameweek_threat_expanding,gameweek_xG_expanding,gameweek_xA_expanding,gameweek_xGA_expanding,gameweek_minutes_expanding,gameweek_xPoints_expanding,season
94,,,,2.0,,,,,0.0,0.0,3.0,90.0,,,,,2.0,,,,,0.0,0.0,3.0,90.0,,20-21
305,0.0,5.0,2.3,2.0,0.0,0.5,0.0,15.0,0.0,0.0,1.6,90.0,1.410138,0.0,10.0,4.6,4.0,0.0,1.0,0.0,30.0,0.0,0.0,3.2,180.0,2.820277,20-21
549,0.0,5.666667,11.5,2.0,0.0,0.666667,0.0,10.666667,0.0,0.166667,1.533333,90.0,2.18131,0.0,17.0,34.5,6.0,0.0,2.0,0.0,32.0,0.0,0.5,4.6,270.0,6.54393,20-21
793,0.0,7.25,12.625,2.0,0.0,0.75,0.0,11.0,0.075,0.15,1.45,90.0,2.662087,0.0,29.0,50.5,8.0,0.0,3.0,0.0,44.0,0.3,0.6,5.8,360.0,10.648346,20-21
1043,0.0,12.107623,13.5,3.2287,0.201794,0.605381,0.0,14.125561,0.242152,0.141256,1.230942,90.0,3.797172,0.0,60.0,66.9,16.0,1.0,3.0,0.0,70.0,1.2,0.7,6.1,446.0,18.817096,20-21
1295,0.0,11.25,15.985075,3.022388,0.16791,1.007463,0.0,13.097015,0.201493,0.151119,1.326493,90.0,3.626917,0.0,67.0,95.2,18.0,1.0,6.0,0.0,78.0,1.2,0.9,7.9,536.0,21.600303,20-21
1542,0.0,10.207668,15.757188,3.019169,0.14377,0.86262,0.0,16.821086,0.215655,0.129393,1.495208,90.0,3.620497,0.0,71.0,109.6,21.0,1.0,6.0,0.0,117.0,1.5,0.9,10.4,626.0,25.182568,20-21
1795,0.0,9.175978,13.939944,2.891061,0.125698,1.005587,0.0,22.374302,0.238827,0.113128,1.48324,90.0,3.699208,0.0,73.0,110.9,23.0,1.0,8.0,0.0,178.0,1.9,0.9,11.8,716.0,29.429254,20-21
2044,0.0,8.82134,13.198511,2.791563,0.111663,1.116625,0.0,22.779156,0.234491,0.100496,1.373449,90.0,3.6889,0.0,79.0,118.2,25.0,1.0,10.0,0.0,204.0,2.1,0.9,12.3,806.0,33.036151,20-21
2268,0.0,7.834821,13.731027,2.511161,0.100446,1.104911,0.0,27.321429,0.231027,0.100446,1.325893,90.0,3.489785,0.0,78.0,136.7,25.0,1.0,11.0,0.0,272.0,2.3,1.0,13.2,896.0,34.742752,20-21


# Add xG overperfomance

In [38]:
fpl_df['xG_overperformance'] = fpl_df['gameweek_goals_scored_expanding'] / fpl_df['gameweek_xG_expanding']
# fix if division with zero
fpl_df.loc[np.isinf(fpl_df['xG_overperformance']), 'xG_overperformance'] = 1

In [39]:
# sanity check
fpl_df.loc[fpl_df.web_name=='Son', ['gameweek_goals_scored', 'gameweek_xG', 
                'gameweek_goals_scored_expanding', 'gameweek_xG_expanding', 'gameweek_minutes_expanding',
                'xG_overperformance', 'season']]

Unnamed: 0,gameweek_goals_scored,gameweek_xG,gameweek_goals_scored_expanding,gameweek_xG_expanding,gameweek_minutes_expanding,xG_overperformance,season
265,,0.1,,0.1,45.0,,20-21
504,2.0,0.5,2.0,0.6,117.0,3.333333,20-21
746,1.0,0.5,3.0,1.1,196.0,2.727273,20-21
997,1.0,0.3,4.0,1.4,286.0,2.857143,20-21
1249,0.0,0.2,4.0,1.6,370.0,2.5,20-21
1493,0.0,0.5,4.0,2.1,460.0,1.904762,20-21
1748,1.0,0.4,5.0,2.5,550.0,2.0,20-21
1997,0.0,0.0,5.0,2.5,640.0,2.0,20-21
2222,1.0,0.0,6.0,2.5,727.0,2.4,20-21
2471,0.0,0.1,6.0,2.6,817.0,2.307692,20-21


In [40]:
fpl_df.loc[fpl_df.web_name=='Son', ['first_name', 'second_name', 'web_name', 'season']]

Unnamed: 0,first_name,second_name,web_name,season
265,Heung-Min,Son,Son,20-21
504,Heung-Min,Son,Son,20-21
746,Heung-Min,Son,Son,20-21
997,Heung-Min,Son,Son,20-21
1249,Heung-Min,Son,Son,20-21
1493,Heung-Min,Son,Son,20-21
1748,Heung-Min,Son,Son,20-21
1997,Heung-Min,Son,Son,20-21
2222,Heung-Min,Son,Son,20-21
2471,Heung-Min,Son,Son,20-21


# Save data

In [41]:
filepath = Path('../data/fpl_df.csv')
fpl_df.to_csv(filepath)