# DATA PROCESSING AND CALCULATION OF xG-ADJUSTED FPL POINTS

## Setup and pre-processing

In [1]:
# give the number of the latest FPL round
latest_gameweek = 32

In [2]:
import sys

In [3]:
sys.path.append('..')

In [4]:
# import basic libraries
import pandas as pd
import numpy as np
import json
import requests
from scipy.stats import poisson
from py_files.fbref_data import fetch_player_data, fetch_team_data
from pathlib import Path

# allow more data columns to be shown than by default
pd.set_option('max_columns',500)

In [5]:
# download player data from fbref.com
playerStats = fetch_player_data()
# save player stats
path = Path(f'../data/fbref/player_stats_week{latest_gameweek}.csv')
playerStats.to_csv(path)
playerStats.head()

Unnamed: 0_level_0,General,General,General,General,General,General,General,Playing Time,Playing Time,Playing Time,Playing Time,Performance,Performance,Performance,Performance,Performance,Performance,Performance,Per 90 Minutes,Per 90 Minutes,Per 90 Minutes,Per 90 Minutes,Per 90 Minutes,Expected,Expected,Expected,Expected,Per 90 Minutes,Per 90 Minutes,Per 90 Minutes,Per 90 Minutes,Per 90 Minutes,General
Unnamed: 0_level_1,Rk,Player,Nation,Pos,Squad,Age,Born,MP,Starts,Min,90s,Gls,Ast,G-PK,PK,PKatt,CrdY,CrdR,Gls,Ast,G+A,G-PK,G+A-PK,xG,npxG,xA,npxG+xA,xG,xA,xG+xA,npxG,npxG+xA,Matches
0,1,Max Aarons,eng ENG,DF,Norwich City,22-101,2000,28,27,2398,26.6,0,1,0,0,0,6,0,0.0,0.04,0.04,0.0,0.04,0.7,0.7,0.9,1.6,0.02,0.04,0.06,0.02,0.06,Matches
1,2,Che Adams,sct SCO,FW,Southampton,25-276,1996,25,21,1823,20.3,7,3,7,0,0,0,0,0.35,0.15,0.49,0.35,0.49,6.3,6.3,2.5,8.8,0.31,0.13,0.44,0.31,0.44,Matches
2,3,Rayan Aït Nouri,fr FRA,DF,Wolves,20-313,2001,18,15,1378,15.3,0,2,0,0,0,4,0,0.0,0.13,0.13,0.0,0.13,0.4,0.4,1.2,1.6,0.03,0.08,0.11,0.03,0.11,Matches
3,4,Kristoffer Ajer,no NOR,DF,Brentford,23-363,1998,19,18,1577,17.5,0,2,0,0,0,4,0,0.0,0.11,0.11,0.0,0.11,0.4,0.4,1.0,1.4,0.02,0.05,0.08,0.02,0.08,Matches
4,5,Nathan Aké,nl NED,DF,Manchester City,27-056,1995,10,8,761,8.5,1,0,1,0,0,0,0,0.12,0.0,0.12,0.12,0.12,0.7,0.7,0.5,1.2,0.09,0.06,0.14,0.09,0.14,Matches


In [6]:
# download team data from fbref.com
teamStats = fetch_team_data()
teamStats.set_index('Squad', inplace=True)
# save team stats
#path = Path(f'../data/fbref/team_stats_week{latest_gameweek}.csv')
#teamStats.to_csv(path)
teamStats

Unnamed: 0_level_0,Rk,MP,W,D,L,GF,GA,GD,Pts,xG,xGA,xGD,xGD/90,Last 5,Attendance,Top Team Scorer,Goalkeeper,Notes
Squad,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
Manchester City,1,31,23,5,3,72,20,52,74,70.6,22.4,48.2,1.55,W W D W D,52715,Kevin De Bruyne - 11,Ederson,
Liverpool,2,31,22,7,2,79,22,57,73,75.2,30.3,44.9,1.45,W W W W D,53435,Mohamed Salah - 20,Alisson,
Chelsea,3,30,18,8,4,64,23,41,62,53.0,27.5,25.4,0.85,W W W L W,38013,Mason Mount - 10,Edouard Mendy,
Tottenham,4,31,18,3,10,56,37,19,57,52.6,34.7,17.8,0.58,L W W W W,55468,Son Heung-min - 17,Hugo Lloris,
Arsenal,5,30,17,3,10,45,36,9,54,45.9,36.4,9.5,0.32,W L W L L,59569,"Emile Smith Rowe, Bukayo Saka - 9",Aaron Ramsdale,
West Ham,6,32,15,6,11,51,42,9,51,42.3,40.9,1.3,0.04,L W L W L,58071,Jarrod Bowen - 9,Łukasz Fabiański,
Manchester Utd,7,31,14,9,8,49,42,7,51,46.9,42.4,4.6,0.15,D L W D L,73089,Cristiano Ronaldo - 12,David de Gea,
Wolves,8,32,15,4,13,33,28,5,49,30.3,46.5,-16.2,-0.51,W W L W L,30720,Raúl Jiménez - 6,José Sá,
Leicester City,9,29,11,7,11,45,48,-3,40,39.7,49.0,-9.3,-0.32,W L W D W,32206,Jamie Vardy - 10,Kasper Schmeichel,
Crystal Palace,10,31,8,13,10,43,40,3,37,37.8,36.0,1.8,0.06,D W D W L,24484,Wilfried Zaha - 11,Vicente Guaita,


In [7]:
# fetch FPL data online
data = json.loads(requests.get('https://fantasy.premierleague.com/api/bootstrap-static/').text)
df = pd.DataFrame(data['elements'])
df.set_index('id',inplace=True)
# this is used later
df_online=1

# fetch data locally
#df = pd.read_csv('../data/fpl/data_week' + str(latest_gameweek) + '.csv', index_col=0)
#df_online=0

df.head()

Unnamed: 0_level_0,chance_of_playing_next_round,chance_of_playing_this_round,code,cost_change_event,cost_change_event_fall,cost_change_start,cost_change_start_fall,dreamteam_count,element_type,ep_next,ep_this,event_points,first_name,form,in_dreamteam,news,news_added,now_cost,photo,points_per_game,second_name,selected_by_percent,special,squad_number,status,team,team_code,total_points,transfers_in,transfers_in_event,transfers_out,transfers_out_event,value_form,value_season,web_name,minutes,goals_scored,assists,clean_sheets,goals_conceded,own_goals,penalties_saved,penalties_missed,yellow_cards,red_cards,saves,bonus,bps,influence,creativity,threat,ict_index,influence_rank,influence_rank_type,creativity_rank,creativity_rank_type,threat_rank,threat_rank_type,ict_index_rank,ict_index_rank_type,corners_and_indirect_freekicks_order,corners_and_indirect_freekicks_text,direct_freekicks_order,direct_freekicks_text,penalties_order,penalties_text
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1
1,100.0,100.0,80201,0,0,-5,5,1,1,3.5,2.0,0,Bernd,1.5,False,,2022-02-11T08:00:15.144286Z,45,80201.jpg,2.5,Leno,0.9,False,,a,1,3,10,79746,226,199265,2544,0.3,2.2,Leno,360,0,0,1,9,0,0,0,0,0,10,0,69,85.0,0.0,0.0,8.5,377,29,606,69,588,60,433,30,,,,,,
2,0.0,0.0,115918,0,0,0,0,0,1,0.0,0.0,0,Rúnar Alex,0.0,False,Joined OH Leuven on a season-long loan - Expec...,2021-08-31T22:00:09.069158Z,40,115918.jpg,0.0,Rúnarsson,0.5,False,,u,1,3,0,19017,0,79879,179,0.0,0.0,Rúnarsson,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,533,54,518,33,483,19,537,54,,,,,,
3,0.0,0.0,47431,0,0,-2,2,0,3,0.0,0.0,0,Willian,0.0,False,Transferred to Corinthians,2021-08-20T09:30:14.065783Z,63,47431.jpg,0.0,Borges Da Silva,0.1,False,,u,1,3,0,914,0,20505,8,0.0,0.0,Willian,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,718,299,718,299,718,299,718,299,,,,,,
4,0.0,0.0,54694,0,0,-4,4,0,4,0.0,0.0,0,Pierre-Emerick,0.0,False,Left club by mutual consent,2022-02-02T08:21:28.428217Z,96,54694.jpg,3.1,Aubameyang,1.3,False,,u,1,3,44,742898,0,897547,439,0.0,4.6,Aubameyang,1036,4,1,6,16,0,0,2,3,0,0,7,131,217.6,132.4,582.0,92.9,264,33,221,33,46,19,133,26,,,,,,
5,100.0,100.0,58822,0,0,-3,3,0,2,4.5,2.5,1,Cédric,2.0,False,,2022-01-13T23:00:13.573747Z,42,58822.jpg,2.0,Soares,0.5,False,,a,1,3,26,50482,7561,33841,1539,0.5,6.2,Cédric,984,0,0,3,18,0,0,0,3,0,0,0,193,182.2,199.3,64.0,44.7,289,108,163,39,318,106,279,93,2.0,,,,,


In [8]:
# list all team names
team_names = ['Arsenal', 'Aston Villa', 'Brentford', 'Brighton', 'Burnley', 'Chelsea', 'Crystal Palace', 'Everton',
        'Leicester City', 'Leeds United', 'Liverpool', 'Manchester City', 'Manchester Utd', 'Newcastle Utd', 
        'Norwich City', 'Southampton', 'Tottenham', 'Watford', 'West Ham', 'Wolves']
# assign proper team names for each player
df['team_name'] = [team_names[i] for i in df['team']-1]

## Probability to keep a clean sheet

Here, we estimate for each team the probability that the team keeps a clean sheet (against average opposition). We do this by first calculating the expected goals allowed per game for each team. Then, we assume that conceding goals follows a Poisson distribution, from which we then get the desired probability.

In [9]:
teamStats['xG per game'] = teamStats['xG'] / teamStats['MP']
teamStats['xGA per game'] = teamStats['xGA'] / teamStats['MP']
teamStats['probability no goals allowed'] = poisson.pmf(0,teamStats['xGA per game'])
teamStats

Unnamed: 0_level_0,Rk,MP,W,D,L,GF,GA,GD,Pts,xG,xGA,xGD,xGD/90,Last 5,Attendance,Top Team Scorer,Goalkeeper,Notes,xG per game,xGA per game,probability no goals allowed
Squad,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Manchester City,1,31,23,5,3,72,20,52,74,70.6,22.4,48.2,1.55,W W D W D,52715,Kevin De Bruyne - 11,Ederson,,2.277419,0.722581,0.485498
Liverpool,2,31,22,7,2,79,22,57,73,75.2,30.3,44.9,1.45,W W W W D,53435,Mohamed Salah - 20,Alisson,,2.425806,0.977419,0.376281
Chelsea,3,30,18,8,4,64,23,41,62,53.0,27.5,25.4,0.85,W W W L W,38013,Mason Mount - 10,Edouard Mendy,,1.766667,0.916667,0.39985
Tottenham,4,31,18,3,10,56,37,19,57,52.6,34.7,17.8,0.58,L W W W W,55468,Son Heung-min - 17,Hugo Lloris,,1.696774,1.119355,0.32649
Arsenal,5,30,17,3,10,45,36,9,54,45.9,36.4,9.5,0.32,W L W L L,59569,"Emile Smith Rowe, Bukayo Saka - 9",Aaron Ramsdale,,1.53,1.213333,0.297205
West Ham,6,32,15,6,11,51,42,9,51,42.3,40.9,1.3,0.04,L W L W L,58071,Jarrod Bowen - 9,Łukasz Fabiański,,1.321875,1.278125,0.278559
Manchester Utd,7,31,14,9,8,49,42,7,51,46.9,42.4,4.6,0.15,D L W D L,73089,Cristiano Ronaldo - 12,David de Gea,,1.512903,1.367742,0.254681
Wolves,8,32,15,4,13,33,28,5,49,30.3,46.5,-16.2,-0.51,W W L W L,30720,Raúl Jiménez - 6,José Sá,,0.946875,1.453125,0.233838
Leicester City,9,29,11,7,11,45,48,-3,40,39.7,49.0,-9.3,-0.32,W L W D W,32206,Jamie Vardy - 10,Kasper Schmeichel,,1.368966,1.689655,0.184583
Crystal Palace,10,31,8,13,10,43,40,3,37,37.8,36.0,1.8,0.06,D W D W L,24484,Wilfried Zaha - 11,Vicente Guaita,,1.219355,1.16129,0.313082


## xG-adjusted points

Next, we determine for each player their 'adjusted points'. To do this, we first subtract for each player all the points they have accumulated through goals, assists and clean sheets. Then, we add points for each player based on their expected goals, assists and clean sheets. This gives a much improved estimate of each player's true point generating capability. 

A player who gives an assist that directly leads to a shot, is assigned the xG-value of the shot in xA (expected assists), i.e. xA is a measure of 'goal assists'. In FPL, however, the definition of an assist is somewhat more relaxed, e.g. goals resulting from a rebound of parried shot will award an assist to the player making the initial shot. For this reason, we calculate the total number of assists awarded in FPL and the total sum of xA of all players and get an estimate of the proportion of assists that xA covers in FPL. Then, we modify that proportion of players' assists based on their xA.

In [10]:
# auxiliary information and variables
df['points_per_game'] = df['points_per_game'].astype(float)
#df['games played'] = df['total_points']/df['points_per_game']
#xA_proportion = playerStats['xA'].sum()/df['assists'].sum()
# from last season
xA_proportion = 0.743

In [11]:
if latest_gameweek>1:
    df_previous_week = pd.read_csv('../data/fpl/data_week' + str(latest_gameweek-1) + '.csv', index_col=0)
elif latest_gameweek == 1:
    df_previous_week = pd.read_csv('../data/fpl/pre_season.csv', index_col=0)
    df_previous_week['minutes'] = 0
    df_previous_week['xG'] = np.nan
    df_previous_week['xA'] = np.nan
df_previous_week.head()

Unnamed: 0_level_0,adjusted points,adjusted points per game,assists,assists_week1,assists_week10,assists_week11,assists_week12,assists_week13,assists_week14,assists_week15,assists_week16,assists_week17,assists_week18,assists_week19,assists_week2,assists_week20,assists_week21,assists_week22,assists_week23,assists_week24,assists_week25,assists_week26,assists_week27,assists_week28,assists_week29,assists_week3,assists_week30,assists_week31,assists_week4,assists_week5,assists_week6,assists_week7,assists_week8,assists_week9,bonus,bonus_week1,bonus_week10,bonus_week11,bonus_week12,bonus_week13,bonus_week14,bonus_week15,bonus_week16,bonus_week17,bonus_week18,bonus_week19,bonus_week2,bonus_week20,bonus_week21,bonus_week22,bonus_week23,bonus_week24,bonus_week25,bonus_week26,bonus_week27,bonus_week28,bonus_week29,bonus_week3,bonus_week30,bonus_week31,bonus_week4,bonus_week5,bonus_week6,bonus_week7,bonus_week8,bonus_week9,bps,chance_of_playing_next_round,chance_of_playing_this_round,clean_sheet_points,clean_sheets,cleansheet_week1,cleansheet_week10,cleansheet_week11,cleansheet_week12,cleansheet_week13,cleansheet_week14,cleansheet_week15,cleansheet_week16,cleansheet_week17,cleansheet_week18,cleansheet_week19,cleansheet_week2,cleansheet_week20,cleansheet_week21,cleansheet_week22,cleansheet_week23,cleansheet_week24,cleansheet_week25,cleansheet_week26,cleansheet_week27,cleansheet_week28,cleansheet_week29,cleansheet_week3,cleansheet_week30,cleansheet_week31,cleansheet_week4,cleansheet_week5,cleansheet_week6,cleansheet_week7,cleansheet_week8,cleansheet_week9,code,corners_and_indirect_freekicks_order,corners_and_indirect_freekicks_text,cost_change_event,cost_change_event_fall,cost_change_start,cost_change_start_fall,creativity,creativity_rank,creativity_rank_type,direct_freekicks_order,direct_freekicks_text,dreamteam_count,element_type,ep_next,ep_this,event_points,first_name,form,form 10,form 15,form 20,form 5,games played,gameweek,gameweek 10 prediction,gameweek 11 prediction,gameweek 12 prediction,gameweek 13 prediction,gameweek 14 prediction,gameweek 15 prediction,gameweek 16 prediction,gameweek 17 prediction,gameweek 18 prediction,gameweek 19 prediction,gameweek 2 prediction,gameweek 20 prediction,gameweek 21 prediction,gameweek 22 prediction,gameweek 23 prediction,gameweek 24 prediction,gameweek 25 prediction,gameweek 26 prediction,gameweek 27 prediction,gameweek 28 prediction,gameweek 29 prediction,gameweek 3 prediction,gameweek 30 prediction,gameweek 31 prediction,gameweek 32 prediction,gameweek 33 prediction,gameweek 34 prediction,gameweek 35 prediction,gameweek 36 prediction,gameweek 37 prediction,gameweek 38 prediction,gameweek 39 prediction,gameweek 4 prediction,gameweek 40 prediction,gameweek 5 prediction,gameweek 6 prediction,gameweek 7 prediction,gameweek 8 prediction,gameweek 9 prediction,goals_conceded,goals_scored,goals_week1,goals_week10,goals_week11,goals_week12,goals_week13,goals_week14,goals_week15,goals_week16,goals_week17,goals_week18,goals_week19,goals_week2,goals_week20,goals_week21,goals_week22,goals_week23,goals_week24,goals_week25,goals_week26,goals_week27,goals_week28,goals_week29,goals_week3,goals_week30,goals_week31,goals_week4,goals_week5,goals_week6,goals_week7,goals_week8,goals_week9,ict_index,ict_index_rank,ict_index_rank_type,in_dreamteam,influence,influence_rank,influence_rank_type,minutes,news,news_added,now_cost,own_goals,penalties_missed,penalties_order,penalties_saved,penalties_text,photo,points_per_game,red_cards,saves,second_name,selected_by_percent,special,squad_number,status,team,team_code,team_name,threat,threat_rank,threat_rank_type,total_points,transfers_in,transfers_in_event,transfers_out,transfers_out_event,value,value form 10,value form 5,valuePoints metric,value_form,value_next10,value_season,web_name,xA,xA_points,xA_week1,xA_week10,xA_week11,xA_week12,xA_week13,xA_week14,xA_week15,xA_week16,xA_week17,xA_week18,xA_week19,xA_week2,xA_week20,xA_week21,xA_week22,xA_week23,xA_week24,xA_week25,xA_week26,xA_week27,xA_week28,xA_week29,xA_week3,xA_week30,xA_week31,xA_week4,xA_week5,xA_week6,xA_week7,xA_week8,xA_week9,xG,xG+xA,xG+xA per game,xGA_week1,xGA_week10,xGA_week11,xGA_week12,xGA_week13,xGA_week14,xGA_week15,xGA_week16,xGA_week17,xGA_week18,xGA_week19,xGA_week2,xGA_week20,xGA_week21,xGA_week22,xGA_week23,xGA_week24,xGA_week25,xGA_week26,xGA_week27,xGA_week28,xGA_week29,xGA_week3,xGA_week30,xGA_week31,xGA_week4,xGA_week5,xGA_week6,xGA_week7,xGA_week8,xGA_week9,xG_points,xG_week1,xG_week10,xG_week11,xG_week12,xG_week13,xG_week14,xG_week15,xG_week16,xG_week17,xG_week18,xG_week19,xG_week2,xG_week20,xG_week21,xG_week22,xG_week23,xG_week24,xG_week25,xG_week26,xG_week27,xG_week28,xG_week29,xG_week3,xG_week30,xG_week31,xG_week4,xG_week5,xG_week6,xG_week7,xG_week8,xG_week9,xPoints,xPoints week 1,xPoints week 10,xPoints week 11,xPoints week 12,xPoints week 13,xPoints week 14,xPoints week 15,xPoints week 16,xPoints week 17,xPoints week 18,xPoints week 19,xPoints week 2,xPoints week 20,xPoints week 21,xPoints week 22,xPoints week 23,xPoints week 24,xPoints week 25,xPoints week 26,xPoints week 27,xPoints week 28,xPoints week 29,xPoints week 3,xPoints week 30,xPoints week 31,xPoints week 4,xPoints week 5,xPoints week 6,xPoints week 7,xPoints week 8,xPoints week 9,xPoints_next10,yellow_cards,gameweek 41 prediction
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1,Unnamed: 82_level_1,Unnamed: 83_level_1,Unnamed: 84_level_1,Unnamed: 85_level_1,Unnamed: 86_level_1,Unnamed: 87_level_1,Unnamed: 88_level_1,Unnamed: 89_level_1,Unnamed: 90_level_1,Unnamed: 91_level_1,Unnamed: 92_level_1,Unnamed: 93_level_1,Unnamed: 94_level_1,Unnamed: 95_level_1,Unnamed: 96_level_1,Unnamed: 97_level_1,Unnamed: 98_level_1,Unnamed: 99_level_1,Unnamed: 100_level_1,Unnamed: 101_level_1,Unnamed: 102_level_1,Unnamed: 103_level_1,Unnamed: 104_level_1,Unnamed: 105_level_1,Unnamed: 106_level_1,Unnamed: 107_level_1,Unnamed: 108_level_1,Unnamed: 109_level_1,Unnamed: 110_level_1,Unnamed: 111_level_1,Unnamed: 112_level_1,Unnamed: 113_level_1,Unnamed: 114_level_1,Unnamed: 115_level_1,Unnamed: 116_level_1,Unnamed: 117_level_1,Unnamed: 118_level_1,Unnamed: 119_level_1,Unnamed: 120_level_1,Unnamed: 121_level_1,Unnamed: 122_level_1,Unnamed: 123_level_1,Unnamed: 124_level_1,Unnamed: 125_level_1,Unnamed: 126_level_1,Unnamed: 127_level_1,Unnamed: 128_level_1,Unnamed: 129_level_1,Unnamed: 130_level_1,Unnamed: 131_level_1,Unnamed: 132_level_1,Unnamed: 133_level_1,Unnamed: 134_level_1,Unnamed: 135_level_1,Unnamed: 136_level_1,Unnamed: 137_level_1,Unnamed: 138_level_1,Unnamed: 139_level_1,Unnamed: 140_level_1,Unnamed: 141_level_1,Unnamed: 142_level_1,Unnamed: 143_level_1,Unnamed: 144_level_1,Unnamed: 145_level_1,Unnamed: 146_level_1,Unnamed: 147_level_1,Unnamed: 148_level_1,Unnamed: 149_level_1,Unnamed: 150_level_1,Unnamed: 151_level_1,Unnamed: 152_level_1,Unnamed: 153_level_1,Unnamed: 154_level_1,Unnamed: 155_level_1,Unnamed: 156_level_1,Unnamed: 157_level_1,Unnamed: 158_level_1,Unnamed: 159_level_1,Unnamed: 160_level_1,Unnamed: 161_level_1,Unnamed: 162_level_1,Unnamed: 163_level_1,Unnamed: 164_level_1,Unnamed: 165_level_1,Unnamed: 166_level_1,Unnamed: 167_level_1,Unnamed: 168_level_1,Unnamed: 169_level_1,Unnamed: 170_level_1,Unnamed: 171_level_1,Unnamed: 172_level_1,Unnamed: 173_level_1,Unnamed: 174_level_1,Unnamed: 175_level_1,Unnamed: 176_level_1,Unnamed: 177_level_1,Unnamed: 178_level_1,Unnamed: 179_level_1,Unnamed: 180_level_1,Unnamed: 181_level_1,Unnamed: 182_level_1,Unnamed: 183_level_1,Unnamed: 184_level_1,Unnamed: 185_level_1,Unnamed: 186_level_1,Unnamed: 187_level_1,Unnamed: 188_level_1,Unnamed: 189_level_1,Unnamed: 190_level_1,Unnamed: 191_level_1,Unnamed: 192_level_1,Unnamed: 193_level_1,Unnamed: 194_level_1,Unnamed: 195_level_1,Unnamed: 196_level_1,Unnamed: 197_level_1,Unnamed: 198_level_1,Unnamed: 199_level_1,Unnamed: 200_level_1,Unnamed: 201_level_1,Unnamed: 202_level_1,Unnamed: 203_level_1,Unnamed: 204_level_1,Unnamed: 205_level_1,Unnamed: 206_level_1,Unnamed: 207_level_1,Unnamed: 208_level_1,Unnamed: 209_level_1,Unnamed: 210_level_1,Unnamed: 211_level_1,Unnamed: 212_level_1,Unnamed: 213_level_1,Unnamed: 214_level_1,Unnamed: 215_level_1,Unnamed: 216_level_1,Unnamed: 217_level_1,Unnamed: 218_level_1,Unnamed: 219_level_1,Unnamed: 220_level_1,Unnamed: 221_level_1,Unnamed: 222_level_1,Unnamed: 223_level_1,Unnamed: 224_level_1,Unnamed: 225_level_1,Unnamed: 226_level_1,Unnamed: 227_level_1,Unnamed: 228_level_1,Unnamed: 229_level_1,Unnamed: 230_level_1,Unnamed: 231_level_1,Unnamed: 232_level_1,Unnamed: 233_level_1,Unnamed: 234_level_1,Unnamed: 235_level_1,Unnamed: 236_level_1,Unnamed: 237_level_1,Unnamed: 238_level_1,Unnamed: 239_level_1,Unnamed: 240_level_1,Unnamed: 241_level_1,Unnamed: 242_level_1,Unnamed: 243_level_1,Unnamed: 244_level_1,Unnamed: 245_level_1,Unnamed: 246_level_1,Unnamed: 247_level_1,Unnamed: 248_level_1,Unnamed: 249_level_1,Unnamed: 250_level_1,Unnamed: 251_level_1,Unnamed: 252_level_1,Unnamed: 253_level_1,Unnamed: 254_level_1,Unnamed: 255_level_1,Unnamed: 256_level_1,Unnamed: 257_level_1,Unnamed: 258_level_1,Unnamed: 259_level_1,Unnamed: 260_level_1,Unnamed: 261_level_1,Unnamed: 262_level_1,Unnamed: 263_level_1,Unnamed: 264_level_1,Unnamed: 265_level_1,Unnamed: 266_level_1,Unnamed: 267_level_1,Unnamed: 268_level_1,Unnamed: 269_level_1,Unnamed: 270_level_1,Unnamed: 271_level_1,Unnamed: 272_level_1,Unnamed: 273_level_1,Unnamed: 274_level_1,Unnamed: 275_level_1,Unnamed: 276_level_1,Unnamed: 277_level_1,Unnamed: 278_level_1,Unnamed: 279_level_1,Unnamed: 280_level_1,Unnamed: 281_level_1,Unnamed: 282_level_1,Unnamed: 283_level_1,Unnamed: 284_level_1,Unnamed: 285_level_1,Unnamed: 286_level_1,Unnamed: 287_level_1,Unnamed: 288_level_1,Unnamed: 289_level_1,Unnamed: 290_level_1,Unnamed: 291_level_1,Unnamed: 292_level_1,Unnamed: 293_level_1,Unnamed: 294_level_1,Unnamed: 295_level_1,Unnamed: 296_level_1,Unnamed: 297_level_1,Unnamed: 298_level_1,Unnamed: 299_level_1,Unnamed: 300_level_1,Unnamed: 301_level_1,Unnamed: 302_level_1,Unnamed: 303_level_1,Unnamed: 304_level_1,Unnamed: 305_level_1,Unnamed: 306_level_1,Unnamed: 307_level_1,Unnamed: 308_level_1,Unnamed: 309_level_1,Unnamed: 310_level_1,Unnamed: 311_level_1,Unnamed: 312_level_1,Unnamed: 313_level_1,Unnamed: 314_level_1,Unnamed: 315_level_1,Unnamed: 316_level_1,Unnamed: 317_level_1,Unnamed: 318_level_1,Unnamed: 319_level_1,Unnamed: 320_level_1,Unnamed: 321_level_1,Unnamed: 322_level_1,Unnamed: 323_level_1,Unnamed: 324_level_1,Unnamed: 325_level_1,Unnamed: 326_level_1,Unnamed: 327_level_1,Unnamed: 328_level_1,Unnamed: 329_level_1,Unnamed: 330_level_1,Unnamed: 331_level_1,Unnamed: 332_level_1,Unnamed: 333_level_1,Unnamed: 334_level_1,Unnamed: 335_level_1,Unnamed: 336_level_1,Unnamed: 337_level_1,Unnamed: 338_level_1,Unnamed: 339_level_1,Unnamed: 340_level_1,Unnamed: 341_level_1,Unnamed: 342_level_1,Unnamed: 343_level_1,Unnamed: 344_level_1,Unnamed: 345_level_1,Unnamed: 346_level_1,Unnamed: 347_level_1,Unnamed: 348_level_1,Unnamed: 349_level_1,Unnamed: 350_level_1,Unnamed: 351_level_1,Unnamed: 352_level_1,Unnamed: 353_level_1,Unnamed: 354_level_1,Unnamed: 355_level_1,Unnamed: 356_level_1,Unnamed: 357_level_1,Unnamed: 358_level_1,Unnamed: 359_level_1,Unnamed: 360_level_1,Unnamed: 361_level_1,Unnamed: 362_level_1,Unnamed: 363_level_1,Unnamed: 364_level_1,Unnamed: 365_level_1,Unnamed: 366_level_1,Unnamed: 367_level_1,Unnamed: 368_level_1,Unnamed: 369_level_1,Unnamed: 370_level_1,Unnamed: 371_level_1,Unnamed: 372_level_1,Unnamed: 373_level_1,Unnamed: 374_level_1,Unnamed: 375_level_1,Unnamed: 376_level_1,Unnamed: 377_level_1
1,10.655764,2.663941,0,0.0,,,,,,,,,,,0.0,,,,,,,,,,,0.0,0.0,,,,,,,,0,0.0,,,,,,,,,,,0.0,,,,,,,,,,,0.0,0.0,,,,,,,,69,100.0,100.0,4.655764,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,80201,,,0,0,-5,5,0.0,603,69,,,1,1,2.0,2.0,0,Bernd,1.5,4.68128,4.68128,4.68128,4.68128,4.0,31,,,,,,,,,,,,,,,,,,,,,,,,,4.055686,7.561533,3.680223,3.786288,7.699088,4.266116,4.055686,0.0,,0.0,,,,,,9,0,0.0,,,,,,,,,,,0.0,,,,,,,,,,,0.0,0.0,,,,,,,,8.5,430,30,False,85.0,373,29,360,,2022-02-11T08:00:15.144286Z,45,0,0,,0,,80201.jpg,2.5,0,10,Leno,0.9,False,,a,1,3,Arsenal,0.0,585,60,10,79447,1342,195516,3696,0.591987,1.040284,1.040284,1.255794,0.3,7.801026,2.2,Leno,0.0,0.0,0.0,,,,,,,,,,,0.0,,,,,,,,,,,0.0,0.0,,,,,,,,0.0,0.0,0.0,1.3,1.2,0.6,4.4,0.2,1.8,1.0,0.6,0.4,1.5,0.2,2.9,0.0,1.7,0.0,0.5,0.9,0.0,0.6,1.0,1.4,1.1,3.8,0.4,1.6,0.7,1.2,1.1,1.4,0.8,1.5,0.0,0.0,,,,,,,,,,,0.0,,,,,,,,,,,0.0,0.0,,,,,,,,,2.090127,,,,,,,,,,,2.220093,,,,,,,,,,,1.089483,4.68128,,,,,,,,35.104619,0,0.0
2,,,0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0,0.0,0.0,,0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,115918,,,0,0,0,0,0.0,514,32,,,0,1,0.0,0.0,0,Rúnar Alex,0.0,,,,,,31,,,,,,,,,,,,,,,,,,,,,,,,,3.296703,5.928557,2.958416,3.160272,6.119012,3.704049,3.296703,0.0,,0.0,,,,,,0,0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0,534,54,False,0.0,530,54,0,Joined OH Leuven on a season-long loan - Expec...,2021-08-31T22:00:09.069158Z,40,0,0,,0,,115918.jpg,0.0,0,0,Rúnarsson,0.5,False,,u,1,3,Arsenal,0.0,479,19,0,19017,0,79667,130,,,,,0.0,7.115928,0.0,Rúnarsson,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,28.463711,0,0.0
3,,,0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0,0.0,0.0,,0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,47431,,,0,0,-2,2,0.0,716,298,,,0,3,0.0,0.0,0,Willian,0.0,,,,,,31,,,,,,,,,,,,,,,,,,,,,,,,,2.336736,4.498886,2.243584,2.348449,4.696223,2.654848,2.336736,0.0,,0.0,,,,,,0,0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0,716,298,False,0.0,716,298,0,Transferred to Corinthians,2021-08-20T09:30:14.065783Z,63,0,0,,0,,47431.jpg,0.0,0,0,Borges Da Silva,0.1,False,,u,1,3,Arsenal,0.0,716,298,0,914,0,20497,4,,,,,0.0,3.351661,0.0,Willian,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,21.115462,0,0.0
4,50.411,3.600786,1,,0.0,0.0,0.0,0.0,0.0,0.0,,,,,0.0,,,,,,,,,,,0.0,,,0.0,0.0,0.0,0.0,0.0,1.0,7,,0.0,0.0,0.0,0.0,0.0,0.0,,,,,0.0,,,,,,,,,,,0.0,,,3.0,0.0,1.0,0.0,3.0,0.0,131,0.0,0.0,0.0,6,,1.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,1.0,0.0,0.0,54694,,,0,0,-4,4,132.4,218,32,,,0,4,0.0,0.0,0,Pierre-Emerick,0.0,,,3.1,,14.0,31,,,,,,,,,,,,,,,,,,,,,,,,,2.953807,5.168448,2.616637,2.706346,5.581922,3.586074,2.988724,0.0,,0.0,,,,,,16,4,,0.0,0.0,0.0,0.0,0.0,0.0,,,,,0.0,,,,,,,,,,,0.0,,,1.0,0.0,1.0,0.0,1.0,1.0,92.9,128,26,False,217.6,263,33,1036,Left club by mutual consent,2022-02-02T08:21:28.428217Z,96,0,2,,0,,54694.jpg,3.1,0,0,Aubameyang,1.3,False,,u,1,3,Arsenal,582.0,45,18,44,742898,0,897062,387,0.375082,,,1.162149,0.0,2.666871,4.6,Aubameyang,0.8,2.4,,0.0,0.0,0.0,0.0,0.0,0.0,,,,,0.0,,,,,,,,,,,0.0,,,0.1,0.4,0.0,0.0,0.2,0.1,5.8,6.6,0.471429,,1.2,0.6,4.4,0.2,1.8,1.0,0.6,0.4,1.5,0.2,2.9,0.0,1.7,0.0,0.5,0.9,0.0,0.6,1.0,1.4,1.1,3.8,0.4,1.6,0.7,1.2,1.1,1.4,0.8,1.5,22.24,,0.3,0.9,0.2,0.6,0.5,0.3,,,,,0.0,,,,,,,,,,,0.0,,,1.3,0.1,0.2,0.1,0.3,1.0,,,3.2,2.6,2.8,3.4,4.0,2.2,,,,,1.0,,,,,,,,,,,1.0,,,10.5,3.6,3.8,2.4,6.8,4.071,25.601958,3,0.0
5,29.667291,2.472274,0,,,,,,,,,,0.0,,0.0,,,,,0.0,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,,0,,,,,,,,,,0.0,,0.0,,,,,0.0,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,,173,100.0,100.0,13.967291,3,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,58822,2.0,,0,0,-3,3,147.6,202,52,,,0,2,3.7,3.7,1,Cédric,3.2,3.214254,2.937473,2.937473,2.695651,12.0,31,,,,,,,,,,,,,,,,,,,,,,,,,3.115302,5.361676,2.818623,2.924688,5.642043,3.427736,3.198048,0.0,,0.0,,,,,,16,0,,,,,,,,,,0.0,,0.0,,,,,0.0,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,,36.5,303,102,False,160.4,306,117,894,,2022-01-13T23:00:13.573747Z,42,0,0,,0,,58822.jpg,2.1,0,0,Soares,0.4,False,,a,1,3,Arsenal,56.0,324,108,25,39269,5951,31968,818,0.588637,0.765299,0.641822,1.206346,0.8,6.306694,6.0,Cédric,0.5,1.5,,,,,,,,,,0.0,,0.1,,,,,0.0,,0.2,0.0,0.1,0.0,0.0,0.1,0.0,0.0,,,,,,0.2,0.7,0.058333,,1.2,0.6,4.4,0.2,1.8,1.0,0.6,0.4,1.5,0.2,2.9,0.0,1.7,0.0,0.5,0.9,0.0,0.6,1.0,1.4,1.1,3.8,0.4,1.6,0.7,1.2,1.1,1.4,0.8,1.5,1.2,,,,,,,,,,0.0,,0.0,,,,,0.0,,0.1,0.0,0.0,0.1,0.0,0.0,0.0,0.0,,,,,,1.807586,,,,,,,,,,1.0,,1.520093,,,,,3.626279,,5.395247,1.471518,1.286388,4.931484,-0.910517,3.98128,1.807586,1.0,,,,,,26.488117,3,0.0


In [12]:
if latest_gameweek > 1:
    filepath = '../data/fbref/team_stats_week' + str(latest_gameweek-1) + '.csv'
    teamStats_previous_week = pd.read_csv(filepath, index_col='Squad')
    display(teamStats_previous_week.head())
elif latest_gameweek == 1:
    teamStats_previous_week = pd.DataFrame(np.nan, index=team_names, columns=['xGA'])
    display(teamStats_previous_week)

Unnamed: 0_level_0,Rk,MP,W,D,L,GF,GA,GD,Pts,xG,xGA,xGD,xGD/90,Last 5,Attendance,Top Team Scorer,Goalkeeper,Notes,xG per game,xGA per game,probability no goals allowed
Squad,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Manchester City,1,30,23,4,3,70,18,52,73,68.8,21.3,47.4,1.58,L W W D W,52681,"Riyad Mahrez, Raheem Sterling... - 10",Ederson,,2.293333,0.71,0.491644
Liverpool,2,30,22,6,2,77,20,57,72,74.0,28.1,45.9,1.53,W W W W W,53435,Mohamed Salah - 20,Alisson,,2.466667,0.936667,0.391932
Chelsea,3,29,17,8,4,58,23,35,59,48.6,27.0,21.6,0.75,W W W W L,38013,Mason Mount - 8,Edouard Mendy,,1.675862,0.931034,0.394146
Tottenham,4,30,17,3,10,52,37,15,54,51.7,33.1,18.5,0.62,W L W W W,55468,Son Heung-min - 14,Hugo Lloris,,1.723333,1.103333,0.331763
Arsenal,5,29,17,3,9,44,34,10,54,44.0,35.8,8.1,0.28,W W L W L,59533,"Emile Smith Rowe, Bukayo Saka - 9",Aaron Ramsdale,,1.517241,1.234483,0.290985


In [13]:
def points_for_goal(position):
    if position < 3:
        return 6
    elif position==3:
        return 5
    elif position==4:
        return 4

def points_for_clean_sheet(position):
    if position < 3:
        return 4
    elif position==3:
        return 1
    elif position==4:
        return 0

In [14]:
# column names to be used below
xPoints_column = 'xPoints week ' + str(latest_gameweek)
xG_column = 'xG_week' + str(latest_gameweek) 
goals_column = 'goals_week' + str(latest_gameweek) 
bonus_column = 'bonus_week' + str(latest_gameweek) 
xGA_column = 'xGA_week' + str(latest_gameweek) 
cleansheet_column = 'cleansheet_week' + str(latest_gameweek) 
xA_column = 'xA_week' + str(latest_gameweek) 
assists_column = 'assists_week' + str(latest_gameweek) 

In [15]:
def incorporate_xG(indicator, ix):
    xG = playerStats.loc[indicator, ('Expected','xG')].values[0]  
    penalty_attempts = playerStats.loc[indicator,('Performance','PKatt')].values[0]
    position = df.loc[ix, 'element_type']
    
    # 0.24 is the probability to miss a penalty, which incurs -2 points
    df.loc[ix, 'adjusted points'] =  df.loc[ix, 'total_points'] \
                                        - points_for_goal(position) * (df.loc[ix, 'goals_scored'] - xG) \
                                        - 2 * 0.24 * penalty_attempts    
    df.loc[ix, 'xG'] = xG
    df.loc[ix, 'xG_points'] = points_for_goal(position)*xG - 2 * 0.24 * penalty_attempts
    
    try:
        if df.loc[ix,'minutes'] > df_previous_week.loc[ix,'minutes']:
            if ~np.isnan(df_previous_week.loc[ix, 'xG']):
                df.loc[ix, xG_column] = df.loc[ix, 'xG'] - df_previous_week.loc[ix, 'xG']
                df.loc[ix, goals_column] = df.loc[ix, 'goals_scored'] - df_previous_week.loc[ix, 'goals_scored']
                df.loc[ix, xPoints_column] = df.loc[ix, 'event_points'] - points_for_goal(position) * \
                                                        (df.loc[ix, goals_column] - df.loc[ix, xG_column])
                df.loc[ix, bonus_column] = df.loc[ix, 'bonus'] - df_previous_week.loc[ix, 'bonus']
            else:
                df.loc[ix, xG_column] = xG
                df.loc[ix, goals_column] = df.loc[ix, 'goals_scored']
                df.loc[ix, xPoints_column] = df.loc[ix, 'event_points'] - points_for_goal(position) * \
                                                        (df.loc[ix, goals_column] - df.loc[ix, xG_column])
                df.loc[ix, bonus_column] = df.loc[ix, 'bonus']
    except KeyError:  
        print(str(ix) + ' is a new index. (xG)')

In [16]:
# always run 'team_xGA' AFTER 'incorporate_xG'
def team_xGA(indicator, ix):
    team = team_names[df.loc[ix, 'team']-1]
    clean_sheets = df.loc[ix, 'clean_sheets']
    probability_cleanSheet = teamStats.loc[team, 'probability no goals allowed']
    position = df.loc[ix, 'element_type']
    
    df.loc[ix, 'adjusted points'] =  df.loc[ix, 'adjusted points'] - points_for_clean_sheet(position) * \
                                ( df.loc[ix, 'clean_sheets'] - probability_cleanSheet*df.loc[ix, 'games played'] )
    df.loc[ix, 'clean_sheet_points'] = points_for_clean_sheet(position) * \
                                       df.loc[ix, 'games played'] * probability_cleanSheet
    try:
        if ~np.isnan(teamStats_previous_week.loc[team, 'xGA']):            
            df.loc[ix, xGA_column] = teamStats.loc[team, 'xGA'] - teamStats_previous_week.loc[team, 'xGA']
            df.loc[ix, cleansheet_column] = df.loc[ix, 'clean_sheets'] - df_previous_week.loc[ix, 'clean_sheets']
        else:
            df.loc[ix, xGA_column] = teamStats.loc[team, 'xGA']
            df.loc[ix, cleansheet_column] = df.loc[ix, 'clean_sheets']
        
        if df.loc[ix,'minutes'] - df_previous_week.loc[ix,'minutes'] >= 60:
            probability_cleansheet_thisweek = poisson.pmf(0,df.loc[ix, xGA_column])
            df.loc[ix, xPoints_column] = df.loc[ix, xPoints_column] - points_for_clean_sheet(position) * \
                                            (df.loc[ix, cleansheet_column] - probability_cleansheet_thisweek)
        
    except KeyError:  
        print(str(ix) + ' is a new index. (xGA)')

In [17]:
# always run 'xA' AFTER 'incorporate_xG'
def xA(indicator, ix):
    xA = playerStats.loc[indicator, ('Expected','xA')].values[0]
    df.loc[ix, 'adjusted points'] =  df.loc[ix, 'adjusted points'] -  3 * (xA_proportion*df.loc[ix, 'assists'] - xA)
    df.loc[ix, 'xA'] = xA
    df.loc[ix, 'xA_points'] = 3*xA
    try:
        if df.loc[ix,'minutes'] > df_previous_week.loc[ix,'minutes']:
            if ~np.isnan(df_previous_week.loc[ix, 'xA']):
                df.loc[ix, xA_column] = df.loc[ix, 'xA'] - df_previous_week.loc[ix, 'xA']
                df.loc[ix, assists_column] = df.loc[ix, 'assists'] - df_previous_week.loc[ix, 'assists']
                df.loc[ix, xPoints_column] = df.loc[ix, xPoints_column] - \
                                    3 * (xA_proportion*df.loc[ix, assists_column] - df.loc[ix, xA_column])
            else:
                df.loc[ix, xA_column] = xA
                df.loc[ix, assists_column] = df.loc[ix, 'assists']
                df.loc[ix, xPoints_column] = df.loc[ix, xPoints_column] - \
                                            3*(xA_proportion*df.loc[ix, assists_column] - df.loc[ix, xA_column])
    except KeyError:  
        print(str(ix) + ' is a new index. (xA)')

In [18]:
def calculateAdjustedPoints(indicator, index):
    incorporate_xG(indicator, index)
    team_xGA(indicator, index)
    xA(indicator, index)

## Main loop for assigning adjusted points

Below is the main loop where we calculate adjusted points for each player. Calculation of the adjusted points itself is straightforward, but there is some work required to match players in two different data sets. Comparing player names in both data sets gives unique matches in many cases, but some special cases need to be covered through individual solutions.

In [19]:
# players who move from one team to another are given separate entries in the FBREF data
# below we add the relevant stats from the whole season and remove the old team information

# here we identify "duplicate" players
duplicates = []
for ix in df[df['minutes']>0].index:
    full_name = df.loc[ix, 'first_name'].lower().replace(' ', '').replace('-', '') \
                + df.loc[ix, 'second_name'].lower().replace(' ', '').replace('-', '')
    full_name_indicator = playerStats[('General','Player')].str.lower().str.replace(' ', '').str.replace('-', '')\
                                                                        .str.contains(full_name)
    if (playerStats.loc[full_name_indicator].shape[0]>1) & \
                    (len(playerStats.loc[full_name_indicator][('General','Age')].unique())==1):
        fbref_ix = playerStats.loc[full_name_indicator].index
        duplicates.append((ix, fbref_ix.values, full_name))
        
duplicates

[(10, array([79, 80]), 'calumchambers'),
 (168, array([127, 128]), 'lucasdigne'),
 (42, array([185, 186]), 'anwarelghazi'),
 (282, array([41, 42]), 'donnyvandebeek'),
 (287, array([234, 235]), 'danieljames'),
 (39, array([486, 487]), 'matttargett'),
 (58, array([60, 61]), 'danburn'),
 (109, array([542, 543]), 'chriswood')]

In [20]:
# give the correct up-to-date team of each "duplicate" player
duplicate_correct_teams = ['Aston Villa', 'Aston Villa', 'Everton', 'Everton', 'Leeds United', 
                           'Newcastle Utd', 'Newcastle Utd', 'Newcastle Utd']

In [21]:
# add the relevant stats from the whole season and remove the old team information for "duplicate" players
for i in range(0,len(duplicates)):
    ix=duplicates[i][0]
    full_name = df.loc[ix, 'first_name'].lower().replace(' ', '').replace('-', '') \
                + df.loc[ix, 'second_name'].lower().replace(' ', '').replace('-', '')
    full_name_indicator = playerStats[('General','Player')].str.lower().str.replace(' ', '').str.replace('-', '')\
                                                                        .str.contains(full_name)
    team = team_names[df.loc[ix, 'team']-1]
    team_indicator = playerStats[('General','Squad')] == duplicate_correct_teams[i]
    playerStats.loc[full_name_indicator & team_indicator, 
            [('Playing Time', 'MP'), ('Expected','xG'),('Performance','PKatt'),('Expected','xA')]] = \
                    playerStats.loc[full_name_indicator, [('Playing Time', 'MP'), ('Expected','xG'),
                                                          ('Performance','PKatt'),('Expected','xA')]].sum().values
    fbref_index_to_drop = playerStats.loc[full_name_indicator & ~team_indicator].index
    playerStats.drop(fbref_index_to_drop, inplace=True)

In [22]:
# FIX DELE ALLI

if df.loc[363,'web_name']=='Dele':
    indicator = playerStats[('General','Player')].str.contains('Dele Alli')
    team = 'Everton'
    team_indicator = playerStats[('General','Squad')] == team
    playerStats.loc[indicator & team_indicator, 
            [('Playing Time', 'MP'), ('Expected','xG'),('Performance','PKatt'),('Expected','xA')]] = \
                    playerStats.loc[indicator, [('Playing Time', 'MP'), ('Expected','xG'),
                                                          ('Performance','PKatt'),('Expected','xA')]].sum().values
    fbref_index_to_drop = playerStats.loc[indicator & ~team_indicator].index
    playerStats.drop(fbref_index_to_drop, inplace=True)
else:
    print('Dele is not on index 363!')
    
playerStats[playerStats[('General','Player')].str.contains('Dele Alli')]

Unnamed: 0_level_0,General,General,General,General,General,General,General,Playing Time,Playing Time,Playing Time,Playing Time,Performance,Performance,Performance,Performance,Performance,Performance,Performance,Per 90 Minutes,Per 90 Minutes,Per 90 Minutes,Per 90 Minutes,Per 90 Minutes,Expected,Expected,Expected,Expected,Per 90 Minutes,Per 90 Minutes,Per 90 Minutes,Per 90 Minutes,Per 90 Minutes,General
Unnamed: 0_level_1,Rk,Player,Nation,Pos,Squad,Age,Born,MP,Starts,Min,90s,Gls,Ast,G-PK,PK,PKatt,CrdY,CrdR,Gls,Ast,G+A,G-PK,G+A-PK,xG,npxG,xA,npxG+xA,xG,xA,xG+xA,npxG,npxG+xA,Matches
10,11,Dele Alli,eng ENG,MF,Everton,26-004,1996,16.0,0,175,1.9,0,0,0,0,1.0,1,0,0.0,0.0,0.0,0.0,0.0,1.6,0.1,1.1,0.3,0.04,0.13,0.17,0.04,0.17,Matches


In [23]:
for ix in df[df['minutes']>0].index: 
    # player name in FPL data
    name = df.loc[ix, 'web_name'].lower().replace(' ', '').replace('-', '').replace('ü', 'u').replace('ö', 'o').\
                        replace('ä', 'a')
    # find FBREF data indexes where player name contains 'name'
    indicator = playerStats[('General','Player')].str.lower().str.replace(' ', '').str.replace('-', '')\
                    .str.replace('ü', 'u').str.replace('ö', 'o').str.replace('ä', 'a').str.contains(name)
    # if unique match is found, we can calculate 'adjusted points'
    if playerStats.loc[indicator].shape[0]==1:
        df.loc[ix,'games played'] = playerStats.loc[indicator,('Playing Time','MP')].values[0]
        calculateAdjustedPoints(indicator, ix)
    # if no match is found
    elif playerStats.loc[indicator].shape[0]==0:
        # try matching based on first name, helps in some cases
        first_name = df.loc[ix, 'first_name'].lower().replace(' ', '').replace('-', '').replace('ü', 'u')\
                                                                    .replace('ö', 'o').replace('ä', 'a')
        first_name_indicator = playerStats[('General','Player')].str.lower().str.replace(' ', '').str.replace('-', '')\
                                                                            .str.contains(first_name)
        # if unique match is found, we can calculate 'adjusted points'
        if playerStats.loc[first_name_indicator].shape[0]==1:
            df.loc[ix,'games played'] = playerStats.loc[first_name_indicator,('Playing Time','MP')].values[0]
            calculateAdjustedPoints(first_name_indicator, ix)
        # here we deal with some special cases individually
        else:
            if name == 'gudmundsson':
                first_name = 'hann berg '
                exceptional_case_indicator = playerStats[('General','Player')].str.lower().str.contains(first_name)
                if playerStats.loc[exceptional_case_indicator].shape[0]==1:
                    df.loc[ix,'games played'] = playerStats.loc[exceptional_case_indicator,('Playing Time','MP')].values[0]
                    calculateAdjustedPoints(exceptional_case_indicator, ix)
                else:
                    print(str(ix) + ': no player found (exceptional_case_indicator fail).')
            elif name == 'gundogan':
                exceptional_case_indicator = \
                    playerStats[('General','Player')].str.lower().str.replace(' ', '').str.replace('-', '')\
                    .str.replace('ü', 'u').str.replace('ö', 'o').str.replace('ä', 'a').str.contains('gundo')
                if playerStats.loc[exceptional_case_indicator].shape[0]==1:
                    df.loc[ix,'games played'] = playerStats.loc[exceptional_case_indicator,('Playing Time','MP')].values[0]
                    calculateAdjustedPoints(exceptional_case_indicator, ix)
                else:
                    print(str(ix) + ': no player found (exceptional_case_indicator fail).')
            elif name == 'fabianski':
                exceptional_case_indicator = \
                    (playerStats[('General','Player')].str.lower().str.replace(' ', '').str.replace('-', '')\
                    .str.replace('ü', 'u').str.replace('ö', 'o').str.replace('ä', 'a').str.contains('fabia')) & \
                    (playerStats[('General','Player')].str.lower().str.replace(' ', '').str.replace('-', '')\
                    .str.replace('ü', 'u').str.replace('ö', 'o').str.replace('ä', 'a').str.contains('ski'))
                if playerStats.loc[exceptional_case_indicator].shape[0]==1:
                    df.loc[ix,'games played'] = playerStats.loc[exceptional_case_indicator,('Playing Time','MP')].values[0]
                    calculateAdjustedPoints(exceptional_case_indicator, ix)
                else:
                    print(str(ix) + ': no player found (exceptional_case_indicator fail).')
            elif name == 'soucek':
                exceptional_case_indicator = \
                    (playerStats[('General','Player')].str.lower().str.replace(' ', '').str.replace('-', '')\
                    .str.replace('ü', 'u').str.replace('ö', 'o').str.replace('ä', 'a').str.contains('tom')) & \
                    playerStats[('General','Player')].str.lower().str.replace(' ', '').str.replace('-', '')\
                    .str.replace('ü', 'u').str.replace('ö', 'o').str.replace('ä', 'a').str.contains('sou')
                if playerStats.loc[exceptional_case_indicator].shape[0]==1:
                    df.loc[ix,'games played'] = playerStats.loc[exceptional_case_indicator,('Playing Time','MP')].values[0]
                    calculateAdjustedPoints(exceptional_case_indicator, ix)
                else:
                    print(str(ix) + ': no player found (exceptional_case_indicator fail).')
            elif name == 'fabiosilva':
                exceptional_case_indicator = \
                    playerStats[('General','Player')].str.lower().str.replace(' ', '').str.replace('-', '')\
                    .str.replace('ü', 'u').str.replace('ö', 'o').str.replace('ä', 'a').str.contains('biosilva')
                if playerStats.loc[exceptional_case_indicator].shape[0]==1:
                    df.loc[ix,'games played'] = playerStats.loc[exceptional_case_indicator,('Playing Time','MP')].values[0]
                    calculateAdjustedPoints(exceptional_case_indicator, ix)
                else:
                    print(str(ix) + ': no player found (exceptional_case_indicator fail).')
            elif name == 'elliott':
                exceptional_case_indicator = (playerStats[('General','Player')].str.lower().str.replace(' ', '')\
                                              .str.replace('-', '').str.contains('elliot'))
                if playerStats.loc[exceptional_case_indicator].shape[0]==1:
                    df.loc[ix,'games played'] = playerStats.loc[exceptional_case_indicator,('Playing Time','MP')].values[0]
                    calculateAdjustedPoints(exceptional_case_indicator, ix)
                else:
                    print(str(ix) + ': no player found (exceptional_case_indicator fail).')
            elif name == 'milivojevic':
                exceptional_case_indicator = (playerStats[('General','Player')].str.lower().str.replace(' ', '')\
                                              .str.replace('-', '').str.contains('milivoje'))
                if playerStats.loc[exceptional_case_indicator].shape[0]==1:
                    df.loc[ix,'games played'] = playerStats.loc[exceptional_case_indicator,('Playing Time','MP')].values[0]
                    calculateAdjustedPoints(exceptional_case_indicator, ix)
                else:
                    print(str(ix) + ': no player found (exceptional_case_indicator fail).')
            elif name == 'emersonroyal':
                exceptional_case_indicator = (playerStats[('General','Player')].str.lower().str.replace(' ', '')\
                                              .str.replace('-', '').str.contains('emerson')) & \
                                            (playerStats[('General','Squad')] == 'Tottenham')
                if playerStats.loc[exceptional_case_indicator].shape[0]==1:
                    df.loc[ix,'games played'] = playerStats.loc[exceptional_case_indicator,('Playing Time','MP')].values[0]
                    calculateAdjustedPoints(exceptional_case_indicator, ix)
                else:
                    print(str(ix) + ': no player found (exceptional_case_indicator fail).')
            elif name == 's.armstrong':
                exceptional_case_indicator = (playerStats[('General','Player')].str.lower().str.replace(' ', '')\
                                              .str.replace('-', '').str.contains('stuartarmstrong'))
                if playerStats.loc[exceptional_case_indicator].shape[0]==1:
                    df.loc[ix,'games played'] = playerStats.loc[exceptional_case_indicator,('Playing Time','MP')].values[0]
                    calculateAdjustedPoints(exceptional_case_indicator, ix)
                else:
                    print(str(ix) + ': no player found (exceptional_case_indicator fail).')
            elif name == 'placheta':
                exceptional_case_indicator = (playerStats[('General','Player')].str.lower().str.replace(' ', '')\
                                              .str.replace('-', '').str.contains('przemys'))
                if playerStats.loc[exceptional_case_indicator].shape[0]==1:
                    df.loc[ix,'games played'] = playerStats.loc[exceptional_case_indicator,('Playing Time','MP')].values[0]
                    calculateAdjustedPoints(exceptional_case_indicator, ix)
                else:
                    print(str(ix) + ': no player found (exceptional_case_indicator fail).')
            elif name == 'dubravka':
                exceptional_case_indicator = (playerStats[('General','Player')].str.lower().str.replace(' ', '')\
                                              .str.replace('-', '').str.contains('bravka'))
                if playerStats.loc[exceptional_case_indicator].shape[0]==1:
                    df.loc[ix,'games played'] = playerStats.loc[exceptional_case_indicator,('Playing Time','MP')].values[0]
                    calculateAdjustedPoints(exceptional_case_indicator, ix)
                else:
                    print(str(ix) + ': no player found (exceptional_case_indicator fail).')
            elif name == 'kral':
                exceptional_case_indicator = (playerStats[('General','Player')].str.lower().str.replace(' ', '')\
                                              .str.replace('-', '').str.contains('alexkr'))
                if playerStats.loc[exceptional_case_indicator].shape[0]==1:
                    df.loc[ix,'games played'] = playerStats.loc[exceptional_case_indicator,('Playing Time','MP')].values[0]
                    calculateAdjustedPoints(exceptional_case_indicator, ix)
                else:
                    print(str(ix) + ': no player found (exceptional_case_indicator fail).')
            else:
                print(str(ix) + ': no player found.')
    # if more than one matches were found in the original comparison
    elif playerStats.loc[indicator].shape[0]>1:
        # try matching full name
        full_name = df.loc[ix, 'first_name'].lower().replace(' ', '').replace('-', '') \
            + df.loc[ix, 'second_name'].lower().replace(' ', '').replace('-', '')
        full_name_indicator = playerStats[('General','Player')].str.lower().str.replace(' ', '').str.replace('-', '')\
                                                                            .str.contains(full_name)
        # if unique match is found, we can calculate 'adjusted points'
        if playerStats.loc[full_name_indicator].shape[0]==1:
            df.loc[ix,'games played'] = playerStats.loc[full_name_indicator,('Playing Time','MP')].values[0]
            calculateAdjustedPoints(full_name_indicator, ix)        
        # here we try to match players based both on their name and team
        else:
            team = team_names[df.loc[ix, 'team']-1]
            team_indicator = playerStats[('General','Squad')]==team
            if playerStats.loc[indicator & team_indicator].shape[0]==1:
                df.loc[ix,'games played'] = playerStats.loc[indicator & team_indicator,('Playing Time','MP')].values[0]
                calculateAdjustedPoints(indicator & team_indicator, ix)
            # special case
            else:
                if name=='son':
                    exceptional_case_indicator = playerStats[('General','Player')].str.lower().str.replace(' ', '')\
                                                .str.replace('-', '').str.contains('heung')
                    df.loc[ix,'games played'] = playerStats.loc[exceptional_case_indicator,('Playing Time','MP')].values[0]
                    calculateAdjustedPoints(exceptional_case_indicator, ix)
                elif name=='wood':
                    df.loc[ix,'games played'] = playerStats.loc[full_name_indicator & team_indicator,('Playing Time','MP')].values[0]
                    calculateAdjustedPoints(full_name_indicator & team_indicator, ix)
                elif full_name=='bamidelealli':
                    exceptional_case_indicator = playerStats[('General','Player')].str.lower().str.replace(' ', '')\
                                                .str.replace('-', '').str.contains('delealli')
                    df.loc[ix,'games played'] = playerStats.loc[exceptional_case_indicator,('Playing Time','MP')].values[0]
                    calculateAdjustedPoints(exceptional_case_indicator, ix)
                elif full_name=='rodrigohernandez':
                    exceptional_case_indicator = playerStats[('General','Player')].str.lower().str.replace(' ', '')\
                                                .str.replace('-', '').str.contains('rodrigobentancur')
                    df.loc[ix,'games played'] = playerStats.loc[exceptional_case_indicator,('Playing Time','MP')].values[0]
                    calculateAdjustedPoints(exceptional_case_indicator, ix)
                else:
                    print(str(ix) + ': non-unique name.')
    else:
        print(str(ix) + 'Player not found')

In [24]:
# FIX MAN CITY'S RODRI
ix=266
# player name in FPL data
name = df.loc[ix, 'web_name'].lower().replace(' ', '').replace('-', '').replace('ü', 'u').replace('ö', 'o').\
                    replace('ä', 'a')
if name=='rodrigo':
    # find FBREF data indexes where player name contains 'name'
    name_indicator = playerStats[('General','Player')].str.lower().str.replace(' ', '').str.replace('-', '')\
                .str.replace('ü', 'u').str.replace('ö', 'o').str.replace('ä', 'a').str.contains('rodri')
    team_indicator = playerStats[('General','Squad')]=='Manchester City'
    exceptional_case_indicator = name_indicator & team_indicator
    df.loc[ix,'games played'] = playerStats.loc[exceptional_case_indicator,('Playing Time','MP')].values[0]
    calculateAdjustedPoints(exceptional_case_indicator, ix)
else:
    print('Rodri is not on index 266!')

In [25]:
df['adjusted points per game'] = df['adjusted points'] / df['games played']

if (latest_gameweek > 1) & (df_online==1):
    column_list = []
    for i in range(1,latest_gameweek):
        column_list.append('xPoints week ' + str(i))
        column_list.append('xG_week' + str(i))
        column_list.append('goals_week' + str(i))
        column_list.append('bonus_week' + str(i)) 
        column_list.append('xGA_week' + str(i))
        column_list.append('cleansheet_week' + str(i))
        column_list.append('xA_week' + str(i))
        column_list.append('assists_week' + str(i))
    df = df.join( df_previous_week[column_list])

df.head()

Unnamed: 0_level_0,chance_of_playing_next_round,chance_of_playing_this_round,code,cost_change_event,cost_change_event_fall,cost_change_start,cost_change_start_fall,dreamteam_count,element_type,ep_next,ep_this,event_points,first_name,form,in_dreamteam,news,news_added,now_cost,photo,points_per_game,second_name,selected_by_percent,special,squad_number,status,team,team_code,total_points,transfers_in,transfers_in_event,transfers_out,transfers_out_event,value_form,value_season,web_name,minutes,goals_scored,assists,clean_sheets,goals_conceded,own_goals,penalties_saved,penalties_missed,yellow_cards,red_cards,saves,bonus,bps,influence,creativity,threat,ict_index,influence_rank,influence_rank_type,creativity_rank,creativity_rank_type,threat_rank,threat_rank_type,ict_index_rank,ict_index_rank_type,corners_and_indirect_freekicks_order,corners_and_indirect_freekicks_text,direct_freekicks_order,direct_freekicks_text,penalties_order,penalties_text,team_name,games played,adjusted points,xG,xG_points,clean_sheet_points,xGA_week32,cleansheet_week32,xA,xA_points,xG_week32,goals_week32,xPoints week 32,bonus_week32,xA_week32,assists_week32,adjusted points per game,xPoints week 1,xG_week1,goals_week1,bonus_week1,xGA_week1,cleansheet_week1,xA_week1,assists_week1,xPoints week 2,xG_week2,goals_week2,bonus_week2,xGA_week2,cleansheet_week2,xA_week2,assists_week2,xPoints week 3,xG_week3,goals_week3,bonus_week3,xGA_week3,cleansheet_week3,xA_week3,assists_week3,xPoints week 4,xG_week4,goals_week4,bonus_week4,xGA_week4,cleansheet_week4,xA_week4,assists_week4,xPoints week 5,xG_week5,goals_week5,bonus_week5,xGA_week5,cleansheet_week5,xA_week5,assists_week5,xPoints week 6,xG_week6,goals_week6,bonus_week6,xGA_week6,cleansheet_week6,xA_week6,assists_week6,xPoints week 7,xG_week7,goals_week7,bonus_week7,xGA_week7,cleansheet_week7,xA_week7,assists_week7,xPoints week 8,xG_week8,goals_week8,bonus_week8,xGA_week8,cleansheet_week8,xA_week8,assists_week8,xPoints week 9,xG_week9,goals_week9,bonus_week9,xGA_week9,cleansheet_week9,xA_week9,assists_week9,xPoints week 10,xG_week10,goals_week10,bonus_week10,xGA_week10,cleansheet_week10,xA_week10,assists_week10,xPoints week 11,xG_week11,goals_week11,bonus_week11,xGA_week11,cleansheet_week11,xA_week11,assists_week11,xPoints week 12,xG_week12,goals_week12,bonus_week12,xGA_week12,cleansheet_week12,xA_week12,assists_week12,xPoints week 13,xG_week13,goals_week13,bonus_week13,xGA_week13,cleansheet_week13,xA_week13,assists_week13,xPoints week 14,xG_week14,goals_week14,bonus_week14,xGA_week14,cleansheet_week14,xA_week14,assists_week14,xPoints week 15,xG_week15,goals_week15,bonus_week15,xGA_week15,cleansheet_week15,xA_week15,assists_week15,xPoints week 16,xG_week16,goals_week16,bonus_week16,xGA_week16,cleansheet_week16,xA_week16,assists_week16,xPoints week 17,xG_week17,goals_week17,bonus_week17,xGA_week17,cleansheet_week17,xA_week17,assists_week17,xPoints week 18,xG_week18,goals_week18,bonus_week18,xGA_week18,cleansheet_week18,xA_week18,assists_week18,xPoints week 19,xG_week19,goals_week19,bonus_week19,xGA_week19,cleansheet_week19,xA_week19,assists_week19,xPoints week 20,xG_week20,goals_week20,bonus_week20,xGA_week20,cleansheet_week20,xA_week20,assists_week20,xPoints week 21,xG_week21,goals_week21,bonus_week21,xGA_week21,cleansheet_week21,xA_week21,assists_week21,xPoints week 22,xG_week22,goals_week22,bonus_week22,xGA_week22,cleansheet_week22,xA_week22,assists_week22,xPoints week 23,xG_week23,goals_week23,bonus_week23,xGA_week23,cleansheet_week23,xA_week23,assists_week23,xPoints week 24,xG_week24,goals_week24,bonus_week24,xGA_week24,cleansheet_week24,xA_week24,assists_week24,xPoints week 25,xG_week25,goals_week25,bonus_week25,xGA_week25,cleansheet_week25,xA_week25,assists_week25,xPoints week 26,xG_week26,goals_week26,bonus_week26,xGA_week26,cleansheet_week26,xA_week26,assists_week26,xPoints week 27,xG_week27,goals_week27,bonus_week27,xGA_week27,cleansheet_week27,xA_week27,assists_week27,xPoints week 28,xG_week28,goals_week28,bonus_week28,xGA_week28,cleansheet_week28,xA_week28,assists_week28,xPoints week 29,xG_week29,goals_week29,bonus_week29,xGA_week29,cleansheet_week29,xA_week29,assists_week29,xPoints week 30,xG_week30,goals_week30,bonus_week30,xGA_week30,cleansheet_week30,xA_week30,assists_week30,xPoints week 31,xG_week31,goals_week31,bonus_week31,xGA_week31,cleansheet_week31,xA_week31,assists_week31
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1,Unnamed: 82_level_1,Unnamed: 83_level_1,Unnamed: 84_level_1,Unnamed: 85_level_1,Unnamed: 86_level_1,Unnamed: 87_level_1,Unnamed: 88_level_1,Unnamed: 89_level_1,Unnamed: 90_level_1,Unnamed: 91_level_1,Unnamed: 92_level_1,Unnamed: 93_level_1,Unnamed: 94_level_1,Unnamed: 95_level_1,Unnamed: 96_level_1,Unnamed: 97_level_1,Unnamed: 98_level_1,Unnamed: 99_level_1,Unnamed: 100_level_1,Unnamed: 101_level_1,Unnamed: 102_level_1,Unnamed: 103_level_1,Unnamed: 104_level_1,Unnamed: 105_level_1,Unnamed: 106_level_1,Unnamed: 107_level_1,Unnamed: 108_level_1,Unnamed: 109_level_1,Unnamed: 110_level_1,Unnamed: 111_level_1,Unnamed: 112_level_1,Unnamed: 113_level_1,Unnamed: 114_level_1,Unnamed: 115_level_1,Unnamed: 116_level_1,Unnamed: 117_level_1,Unnamed: 118_level_1,Unnamed: 119_level_1,Unnamed: 120_level_1,Unnamed: 121_level_1,Unnamed: 122_level_1,Unnamed: 123_level_1,Unnamed: 124_level_1,Unnamed: 125_level_1,Unnamed: 126_level_1,Unnamed: 127_level_1,Unnamed: 128_level_1,Unnamed: 129_level_1,Unnamed: 130_level_1,Unnamed: 131_level_1,Unnamed: 132_level_1,Unnamed: 133_level_1,Unnamed: 134_level_1,Unnamed: 135_level_1,Unnamed: 136_level_1,Unnamed: 137_level_1,Unnamed: 138_level_1,Unnamed: 139_level_1,Unnamed: 140_level_1,Unnamed: 141_level_1,Unnamed: 142_level_1,Unnamed: 143_level_1,Unnamed: 144_level_1,Unnamed: 145_level_1,Unnamed: 146_level_1,Unnamed: 147_level_1,Unnamed: 148_level_1,Unnamed: 149_level_1,Unnamed: 150_level_1,Unnamed: 151_level_1,Unnamed: 152_level_1,Unnamed: 153_level_1,Unnamed: 154_level_1,Unnamed: 155_level_1,Unnamed: 156_level_1,Unnamed: 157_level_1,Unnamed: 158_level_1,Unnamed: 159_level_1,Unnamed: 160_level_1,Unnamed: 161_level_1,Unnamed: 162_level_1,Unnamed: 163_level_1,Unnamed: 164_level_1,Unnamed: 165_level_1,Unnamed: 166_level_1,Unnamed: 167_level_1,Unnamed: 168_level_1,Unnamed: 169_level_1,Unnamed: 170_level_1,Unnamed: 171_level_1,Unnamed: 172_level_1,Unnamed: 173_level_1,Unnamed: 174_level_1,Unnamed: 175_level_1,Unnamed: 176_level_1,Unnamed: 177_level_1,Unnamed: 178_level_1,Unnamed: 179_level_1,Unnamed: 180_level_1,Unnamed: 181_level_1,Unnamed: 182_level_1,Unnamed: 183_level_1,Unnamed: 184_level_1,Unnamed: 185_level_1,Unnamed: 186_level_1,Unnamed: 187_level_1,Unnamed: 188_level_1,Unnamed: 189_level_1,Unnamed: 190_level_1,Unnamed: 191_level_1,Unnamed: 192_level_1,Unnamed: 193_level_1,Unnamed: 194_level_1,Unnamed: 195_level_1,Unnamed: 196_level_1,Unnamed: 197_level_1,Unnamed: 198_level_1,Unnamed: 199_level_1,Unnamed: 200_level_1,Unnamed: 201_level_1,Unnamed: 202_level_1,Unnamed: 203_level_1,Unnamed: 204_level_1,Unnamed: 205_level_1,Unnamed: 206_level_1,Unnamed: 207_level_1,Unnamed: 208_level_1,Unnamed: 209_level_1,Unnamed: 210_level_1,Unnamed: 211_level_1,Unnamed: 212_level_1,Unnamed: 213_level_1,Unnamed: 214_level_1,Unnamed: 215_level_1,Unnamed: 216_level_1,Unnamed: 217_level_1,Unnamed: 218_level_1,Unnamed: 219_level_1,Unnamed: 220_level_1,Unnamed: 221_level_1,Unnamed: 222_level_1,Unnamed: 223_level_1,Unnamed: 224_level_1,Unnamed: 225_level_1,Unnamed: 226_level_1,Unnamed: 227_level_1,Unnamed: 228_level_1,Unnamed: 229_level_1,Unnamed: 230_level_1,Unnamed: 231_level_1,Unnamed: 232_level_1,Unnamed: 233_level_1,Unnamed: 234_level_1,Unnamed: 235_level_1,Unnamed: 236_level_1,Unnamed: 237_level_1,Unnamed: 238_level_1,Unnamed: 239_level_1,Unnamed: 240_level_1,Unnamed: 241_level_1,Unnamed: 242_level_1,Unnamed: 243_level_1,Unnamed: 244_level_1,Unnamed: 245_level_1,Unnamed: 246_level_1,Unnamed: 247_level_1,Unnamed: 248_level_1,Unnamed: 249_level_1,Unnamed: 250_level_1,Unnamed: 251_level_1,Unnamed: 252_level_1,Unnamed: 253_level_1,Unnamed: 254_level_1,Unnamed: 255_level_1,Unnamed: 256_level_1,Unnamed: 257_level_1,Unnamed: 258_level_1,Unnamed: 259_level_1,Unnamed: 260_level_1,Unnamed: 261_level_1,Unnamed: 262_level_1,Unnamed: 263_level_1,Unnamed: 264_level_1,Unnamed: 265_level_1,Unnamed: 266_level_1,Unnamed: 267_level_1,Unnamed: 268_level_1,Unnamed: 269_level_1,Unnamed: 270_level_1,Unnamed: 271_level_1,Unnamed: 272_level_1,Unnamed: 273_level_1,Unnamed: 274_level_1,Unnamed: 275_level_1,Unnamed: 276_level_1,Unnamed: 277_level_1,Unnamed: 278_level_1,Unnamed: 279_level_1,Unnamed: 280_level_1,Unnamed: 281_level_1,Unnamed: 282_level_1,Unnamed: 283_level_1,Unnamed: 284_level_1,Unnamed: 285_level_1,Unnamed: 286_level_1,Unnamed: 287_level_1,Unnamed: 288_level_1,Unnamed: 289_level_1,Unnamed: 290_level_1,Unnamed: 291_level_1,Unnamed: 292_level_1,Unnamed: 293_level_1,Unnamed: 294_level_1,Unnamed: 295_level_1,Unnamed: 296_level_1,Unnamed: 297_level_1,Unnamed: 298_level_1,Unnamed: 299_level_1,Unnamed: 300_level_1,Unnamed: 301_level_1,Unnamed: 302_level_1,Unnamed: 303_level_1,Unnamed: 304_level_1,Unnamed: 305_level_1,Unnamed: 306_level_1,Unnamed: 307_level_1,Unnamed: 308_level_1,Unnamed: 309_level_1,Unnamed: 310_level_1,Unnamed: 311_level_1,Unnamed: 312_level_1,Unnamed: 313_level_1,Unnamed: 314_level_1,Unnamed: 315_level_1,Unnamed: 316_level_1,Unnamed: 317_level_1,Unnamed: 318_level_1,Unnamed: 319_level_1,Unnamed: 320_level_1,Unnamed: 321_level_1,Unnamed: 322_level_1,Unnamed: 323_level_1,Unnamed: 324_level_1,Unnamed: 325_level_1,Unnamed: 326_level_1,Unnamed: 327_level_1,Unnamed: 328_level_1,Unnamed: 329_level_1,Unnamed: 330_level_1,Unnamed: 331_level_1
1,100.0,100.0,80201,0,0,-5,5,1,1,3.5,2.0,0,Bernd,1.5,False,,2022-02-11T08:00:15.144286Z,45,80201.jpg,2.5,Leno,0.9,False,,a,1,3,10,79746,226,199265,2544,0.3,2.2,Leno,360,0,0,1,9,0,0,0,0,0,10,0,69,85.0,0.0,0.0,8.5,377,29,606,69,588,60,433,30,,,,,,,Arsenal,4.0,10.755279,0.0,0.0,4.755279,0.6,0.0,0.0,0.0,,,,,,,2.68882,2.090127,0.0,0.0,0.0,1.3,0.0,0.0,0.0,2.220093,0.0,0.0,0.0,2.9,0.0,0.0,0.0,1.089483,0.0,0.0,0.0,3.8,0.0,0.0,0.0,,,,,0.7,0.0,,,,,,,1.2,0.0,,,,,,,1.1,0.0,,,,,,,1.4,0.0,,,,,,,0.8,0.0,,,,,,,1.5,0.0,,,,,,,1.2,0.0,,,,,,,0.6,0.0,,,,,,,4.4,0.0,,,,,,,0.2,0.0,,,,,,,1.8,0.0,,,,,,,1.0,0.0,,,,,,,0.6,0.0,,,,,,,0.4,0.0,,,,,,,1.5,0.0,,,,,,,0.2,0.0,,,,,,,0.0,0.0,,,,,,,1.7,0.0,,,,,,,0.0,0.0,,,,,,,0.5,0.0,,,,,,,0.9,0.0,,,,,,,0.0,0.0,,,,,,,0.6,0.0,,,,,,,1.0,0.0,,,,,,,1.4,0.0,,,,,,,1.1,0.0,,,4.68128,0.0,0.0,0.0,0.4,1.0,0.0,0.0,,,,,1.6,0.0,,
2,0.0,0.0,115918,0,0,0,0,0,1,0.0,0.0,0,Rúnar Alex,0.0,False,Joined OH Leuven on a season-long loan - Expec...,2021-08-31T22:00:09.069158Z,40,115918.jpg,0.0,Rúnarsson,0.5,False,,u,1,3,0,19017,0,79879,179,0.0,0.0,Rúnarsson,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,533,54,518,33,483,19,537,54,,,,,,,Arsenal,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
3,0.0,0.0,47431,0,0,-2,2,0,3,0.0,0.0,0,Willian,0.0,False,Transferred to Corinthians,2021-08-20T09:30:14.065783Z,63,47431.jpg,0.0,Borges Da Silva,0.1,False,,u,1,3,0,914,0,20505,8,0.0,0.0,Willian,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,718,299,718,299,718,299,718,299,,,,,,,Arsenal,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
4,0.0,0.0,54694,0,0,-4,4,0,4,0.0,0.0,0,Pierre-Emerick,0.0,False,Left club by mutual consent,2022-02-02T08:21:28.428217Z,96,54694.jpg,3.1,Aubameyang,1.3,False,,u,1,3,44,742898,0,897547,439,0.0,4.6,Aubameyang,1036,4,1,6,16,0,0,2,3,0,0,7,131,217.6,132.4,582.0,92.9,264,33,221,33,46,19,133,26,,,,,,,Arsenal,14.0,50.411,5.8,22.24,0.0,0.6,0.0,0.8,2.4,,,,,,,3.600786,,,,,,,,,1.0,0.0,0.0,0.0,2.9,0.0,0.0,0.0,1.0,0.0,0.0,0.0,3.8,0.0,0.0,0.0,10.5,1.3,1.0,3.0,0.7,1.0,0.1,0.0,3.6,0.1,0.0,0.0,1.2,1.0,0.4,0.0,3.8,0.2,1.0,1.0,1.1,0.0,0.0,0.0,2.4,0.1,0.0,0.0,1.4,1.0,0.0,0.0,6.8,0.3,1.0,3.0,0.8,0.0,0.2,0.0,4.071,1.0,1.0,0.0,1.5,0.0,0.1,1.0,3.2,0.3,0.0,0.0,1.2,1.0,0.0,0.0,2.6,0.9,0.0,0.0,0.6,1.0,0.0,0.0,2.8,0.2,0.0,0.0,4.4,0.0,0.0,0.0,3.4,0.6,0.0,0.0,0.2,1.0,0.0,0.0,4.0,0.5,0.0,0.0,1.8,0.0,0.0,0.0,2.2,0.3,0.0,0.0,1.0,0.0,0.0,0.0,,,,,0.6,0.0,,,,,,,0.4,0.0,,,,,,,1.5,0.0,,,,,,,0.2,0.0,,,,,,,0.0,0.0,,,,,,,1.7,0.0,,,,,,,0.0,0.0,,,,,,,0.5,0.0,,,,,,,0.9,0.0,,,,,,,0.0,0.0,,,,,,,0.6,0.0,,,,,,,1.0,0.0,,,,,,,1.4,0.0,,,,,,,1.1,0.0,,,,,,,0.4,0.0,,,,,,,1.6,0.0,,
5,100.0,100.0,58822,0,0,-3,3,0,2,4.5,2.5,1,Cédric,2.0,False,,2022-01-13T23:00:13.573747Z,42,58822.jpg,2.0,Soares,0.5,False,,a,1,3,26,50482,7561,33841,1539,0.5,6.2,Cédric,984,0,0,3,18,0,0,0,3,0,0,0,193,182.2,199.3,64.0,44.7,289,108,163,39,318,106,279,93,2.0,,,,,,Arsenal,13.0,34.254657,0.3,1.8,15.454657,0.6,0.0,1.0,3.0,0.1,0.0,5.295247,0.0,0.5,0.0,2.634974,,,,,,,,,1.520093,0.0,0.0,0.0,2.9,0.0,0.1,0.0,-0.910517,0.0,0.0,0.0,3.8,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.7,0.0,0.0,0.0,,,,,1.2,0.0,,,,,,,1.1,0.0,,,,,,,1.4,0.0,,,,,,,0.8,0.0,,,,,,,1.5,0.0,,,,,,,1.2,0.0,,,,,,,0.6,0.0,,,,,,,4.4,0.0,,,,,,,0.2,0.0,,,,,,,1.8,0.0,,,,,,,1.0,0.0,,,,,,,0.6,0.0,,,,,,,0.4,0.0,,,1.0,0.0,0.0,0.0,1.5,0.0,0.0,0.0,,,,,0.2,0.0,,,,,,,0.0,0.0,,,,,,,1.7,0.0,,,,,,,0.0,0.0,,,,,,,0.5,0.0,,,3.626279,0.0,0.0,0.0,0.9,1.0,0.0,0.0,,,,,0.0,0.0,,,5.395247,0.1,0.0,0.0,0.6,0.0,0.2,0.0,1.471518,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.286388,0.0,0.0,0.0,1.4,0.0,0.1,0.0,4.931484,0.1,0.0,0.0,1.1,1.0,0.0,0.0,3.98128,0.0,0.0,0.0,0.4,1.0,0.1,0.0,1.807586,0.0,0.0,0.0,1.6,0.0,0.0,0.0


In [26]:
# give a sorted list showing the players with highest 'adjusted points per game'
df[['web_name', 'games played','total_points', 'points_per_game','adjusted points','adjusted points per game']]\
                                .sort_values(by='adjusted points per game', ascending=False).head(30)

Unnamed: 0_level_0,web_name,games played,total_points,points_per_game,adjusted points,adjusted points per game
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
233,Salah,29.0,230,7.9,219.684146,7.575315
237,Alexander-Arnold,27.0,191,7.1,172.390337,6.384827
268,Torres,4.0,26,6.5,25.212991,6.303248
359,Son,28.0,198,7.1,174.10973,6.218205
135,Chilwell,6.0,54,9.0,37.267392,6.211232
253,Mendy,1.0,3,3.0,5.841991,5.841991
256,Cancelo,29.0,173,6.0,168.656738,5.81575
230,Mané,28.0,148,5.3,155.919865,5.568567
240,Jota,28.0,154,5.5,153.219865,5.472138
357,Kane,30.0,150,5.0,162.25,5.408333


In [27]:
# save data
filepath = '../data/fpl/data_week' + str(latest_gameweek) + str('.csv')
df.to_csv(filepath)

filepath = '../data/fbref/team_stats_week' + str(latest_gameweek) + '.csv'
teamStats.to_csv(filepath)

Below we check how well the total xG matches the total scored goals.

In [28]:
total_xG = playerStats[('Expected','xG')].sum()
total_goals = df['goals_scored'].sum()
print('Total goals: ' + str(total_goals))
print('Total xG: ' + str(total_xG))
print('goals per xG: ' + str(total_goals/total_xG))

Total goals: 833
Total xG: 851.4
goals per xG: 0.9783885365280715
