In [1]:
import pandas as pd
import warnings
from functools import reduce
import itertools
import numpy as np
import sklearn.preprocessing as preprocessing
import sklearn.model_selection as model_selection
from sklearn import linear_model
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn import linear_model
from joblib import dump, load

In [2]:
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
warnings.filterwarnings("ignore")

# figure 5

In [3]:
predicted_goals_scored = pd.read_csv(r'data/predicting_team_goals.csv')[['team', 'pred_goals_scored', 'team_goals']]

In [4]:
predicted_goals_scored.head()

Unnamed: 0,team,pred_goals_scored,team_goals
0,Watford,1.213222,2.0
1,Swansea City,1.545529,1.0
2,Burnley,1.53414,0.0
3,Huddersfield,0.882562,0.0
4,Crystal Palace,0.389978,0.0


In [5]:
predicted_goals_scored.groupby('team').agg({'team': 'first', 'pred_goals_scored': 'mean', 'team_goals': 'mean'})

Unnamed: 0_level_0,team,pred_goals_scored,team_goals
team,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Arsenal,Arsenal,1.80853,1.971831
Bournemouth,Bournemouth,1.236662,1.380282
Brighton,Brighton,1.026374,0.915493
Burnley,Burnley,1.168252,1.056338
Cardiff City,Cardiff City,0.959556,0.894737
Chelsea,Chelsea,1.88541,1.647887
Crystal Palace,Crystal Palace,1.205815,1.352113
Everton,Everton,1.342606,1.352113
Fulham,Fulham,0.959531,0.894737
Huddersfield,Huddersfield,0.77727,0.633803


# figure 7

In [6]:
predicted_points = pd.read_csv(r'data/season_points_predict.csv')[['player', 'predicted_points_xg']]

In [7]:
predicted_points.head()

Unnamed: 0,player,predicted_points_xg
0,Mohamed Salah,246.539679
1,Raheem Sterling,241.660845
2,Paul Pogba,202.922922
3,Sadio Mané,199.313762
4,Eden Hazard,198.999148


In [8]:
predicted_points['predicted_rank'] = predicted_points['predicted_points_xg'].rank(ascending=False)

In [9]:
predicted_points.head(10)

Unnamed: 0,player,predicted_points_xg,predicted_rank
0,Mohamed Salah,246.539679,1.0
1,Raheem Sterling,241.660845,2.0
2,Paul Pogba,202.922922,3.0
3,Sadio Mané,199.313762,4.0
4,Eden Hazard,198.999148,5.0
5,Pierre-Emerick Aubameyang,176.493474,6.0
6,Andrew Robertson,176.000874,7.0
7,Aymeric Laporte,174.659464,8.0
8,Sergio Agüero,172.472494,9.0
9,Virgil van Dijk,171.028666,10.0


In [10]:
actual_points = pd.read_csv(r'data/player_form.csv').query('season == "2018-19"')[['player', 'total_points', 'mins', 'pos']]

In [11]:
actual_points.head()

Unnamed: 0,player,total_points,mins,pos
18072,Harvey Barnes,0.0,0.0,MID
18073,Harry Maguire,1.0,90.0,DEF
18074,Çağlar Söyüncü,0.0,0.0,DEF
18075,Hamza Choudhury,0.0,0.0,MID
18076,Danny Simpson,0.0,0.0,DEF


In [12]:
actual_points = actual_points.groupby('player', as_index=False).agg({'player': 'first', 'total_points': 'sum', 'mins': 'sum', 'pos': 'first'})

In [13]:
actual_points.head()

Unnamed: 0,player,total_points,mins,pos
0,Aaron Cresswell,33.0,1589.0,DEF
1,Aaron Lennon,41.0,1215.0,MID
2,Aaron Mooy,75.0,2330.0,MID
3,Aaron Ramsey,88.0,1331.0,MID
4,Aaron Rowe,2.0,69.0,FWD


In [14]:
actual_points['actual_rank'] = actual_points['total_points'].rank(ascending=False)

In [15]:
actual_points.sort_values(by='actual_rank').head(10)

Unnamed: 0,player,total_points,mins,pos,actual_rank
146,Eden Hazard,253.0,3044.0,MID,1.0
354,Mohamed Salah,249.0,3250.0,MID,2.0
397,Raheem Sterling,239.0,3160.0,MID,3.0
57,Aymeric Laporte,222.0,3597.0,DEF,4.0
423,Sadio Mané,220.0,3077.0,MID,5.0
436,Sergio Agüero,217.0,2827.0,FWD,6.0
394,Pierre-Emerick Aubameyang,210.0,2816.0,FWD,7.0
35,Andrew Robertson,203.0,3216.0,DEF,8.0
147,Ederson,196.0,3960.0,GK,9.0
479,Virgil van Dijk,193.0,3384.0,DEF,10.0


In [16]:
actual_points.to_csv('figure data/FIGURE 7 -50 TOP PEFORMERS.csv')

In [17]:
player_scorers = pd.merge(predicted_points, actual_points, on='player')

In [20]:
player_scorers.head()

Unnamed: 0,player,predicted_points_xg,predicted_rank,total_points,mins,pos,actual_rank
0,Mohamed Salah,246.539679,1.0,249.0,3250.0,MID,2.0
1,Raheem Sterling,241.660845,2.0,239.0,3160.0,MID,3.0
2,Paul Pogba,202.922922,3.0,183.0,3366.0,MID,16.0
3,Sadio Mané,199.313762,4.0,220.0,3077.0,MID,5.0
4,Eden Hazard,198.999148,5.0,253.0,3044.0,MID,1.0


In [19]:
player_scorers.to_csv('player_scorers.csv')