In [94]:
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib auto
points_threshold=3

Using matplotlib backend: Qt5Agg


In [95]:
def clean_df(gw1_file,players_file,teams_file):
    
    #reading the 3 csv files and preparing them for merging 
    gw1=pd.read_csv(gw1_file, engine='python')
    
    players_row=pd.read_csv(players_file)
    players_row=players_row[["element_type","id","team"]]
    players_row.rename(columns={'id': 'player_id', 'team': 'team_id','element_type': 'position'}, inplace=True)
    players_row['position']=players_row['position'].replace([1,2,3,4], ['GK', 'DEF','MID','FWD'])
    
    teams=pd.read_csv(teams_file, engine='python')
    teams.rename(columns={'id': 'team_id', 'name': 'team_name'}, inplace=True)
    
    #merging the 3 files together 
    merged1=pd.merge(players_row, teams,  how='left', left_on=['team_id'], right_on = ['team_id'])
    merged2=pd.merge(gw1, merged1,  how='left', left_on=['element'], right_on = ['player_id'])
    teams.rename(columns={'team_name': 'opponent_team_name'}, inplace=True)
    merged2=pd.merge(merged2, teams,  how='left', left_on=['opponent_team'], right_on = ['team_id'])
    
    #selecting only the columns with the important features
    gw1_cleaned=merged2[['name','total_points','was_home','position','value','team_name','opponent_team_name']]
    
    #filter the players who have points more than the threshold then sorting 
    gw1_cleaned=gw1_cleaned[gw1_cleaned.total_points >points_threshold].sort_values('total_points',ascending=False)
    
    return gw1_cleaned

In [96]:
def plot_df(gw_final,fig_title):
    categorical_features = ["was_home","position","value","team_name","opponent_team_name"]
    fig, ax = plt.subplots(1, len(categorical_features),gridspec_kw={'width_ratios': [1,1.25,2,3.5,3.5]})
    fig.subplots_adjust(left=0.035, bottom=0.2, right=0.975)

    for i, categorical_feature in enumerate(gw_final[categorical_features]):
        gw_final[categorical_feature].value_counts().plot(kind='bar', ax=ax[i]).set_title(categorical_feature)
        fig.suptitle("Players Stats Who got more than {} points in {} ".format(points_thresh,fig_title))
    
    fig.show()

In [97]:
#season 2021 files are already came ready just need to be merged so cant use the fuction made above
gw1_2021=pd.read_csv('gw1_2021.csv')
teams2021=pd.read_csv('teams2021.csv', engine='python')
gw1_2021=pd.merge(gw1_2021, teams2021,  how='left', left_on=['opponent_team'], right_on = ['id'])
gw1_2021=gw1_2021[['name','position','team','total_points','value','was_home','opponent_team_name','selected']]
gw1_2021.rename(columns={'team': 'team_name'}, inplace=True)
gw1_2021=gw1_2021.sort_values('total_points',ascending=False)
gw1_2021=gw1_2021[gw1_2021.total_points >points_threshold]

_=plot_df(gw1_2021,'gw1 season 2020/2021')

gw1_2021.head(10)

Unnamed: 0,name,position,team_name,total_points,value,was_home,opponent_team_name,selected
301,Mohamed Salah,MID,Liverpool,20,120,True,Leeds,1883241
135,Gabriel Magalhães,DEF,Arsenal,15,50,False,Fulham,85593
354,Romain Saïss,DEF,Wolves,15,50,False,Sheffield Utd,177607
394,Timothy Castagne,DEF,Leicester,14,55,False,West Brom,67356
414,Willian Borges Da Silva,MID,Arsenal,14,80,False,Fulham,473242
196,Jeff Hendrick,MID,Newcastle,14,50,False,West Ham,6679
340,Reece James,DEF,Chelsea,14,50,False,Brighton,304155
183,Jamie Vardy,FWD,Leicester,13,100,False,West Brom,1036141
217,Jorge Luiz Frello Filho,MID,Chelsea,12,50,False,Brighton,104796
252,Lucas Digne,DEF,Everton,12,60,False,Spurs,397204


In [98]:
gw1_2020=clean_df('gw1_2020.csv','players_raw2020.csv','teams2020.csv')
_=plot_df(gw1_2020,'gw1 season 2019/2020')
gw1_2020.head(10)

Unnamed: 0,name,total_points,was_home,position,value,team_name,opponent_team_name
423,Raheem_Sterling_214,20,False,MID,120,Man City,West Ham
433,Riyad_Mahrez_217,14,False,MID,85,Man City,West Ham
156,Erik_Pieters_447,14,True,DEF,45,Burnley,Southampton
48,Ashley_Barnes_90,13,True,FWD,65,Burnley,Southampton
190,Harry_Kane_338,13,True,FWD,110,Spurs,Aston Villa
331,Marcus_Rashford_233,13,True,FWD,85,Man Utd,Chelsea
15,Ainsley_Maitland-Niles_4,12,False,DEF,50,Arsenal,Newcastle
138,Divock_Origi_188,12,True,FWD,55,Liverpool,Norwich
372,Mohamed_Salah_191,12,True,MID,125,Liverpool,Norwich
310,Lewis_Dunk_42,11,False,DEF,45,Brighton,Watford


In [99]:
gw1_2019=clean_df('gw1_2019.csv','players_raw2019.csv','teams2019.csv')
_=plot_df(gw1_2019,'gw1 season 2018/2019')
gw1_2019.head(10)

Unnamed: 0,name,total_points,was_home,position,value,team_name,opponent_team_name
423,Roberto_Pereyra_391,16,True,MID,60,Watford,Brighton
441,Sadio_Mané_251,16,True,MID,95,Liverpool,West Ham
67,Benjamin_Mendy_267,15,False,DEF,60,Man City,Arsenal
418,Richarlison_de Andrade_393,14,False,MID,65,Everton,Wolves
277,José_Holebas_378,13,True,DEF,45,Watford,Brighton
439,Rúben Diogo_da Silva Neves_433,12,True,MID,50,Wolves,Everton
4,Aaron_Wan-Bissaka_145,12,False,DEF,40,Crystal Palace,Fulham
436,Ryan_Fraser_40,11,True,MID,55,Bournemouth,Cardiff
325,Marcos_Alonso_115,11,False,DEF,65,Chelsea,Huddersfield
242,Jeffrey_Schlupp_141,11,False,DEF,45,Crystal Palace,Fulham


In [100]:
gw1_2018=clean_df('gw1_2018.csv','players_raw2018.csv','teams2018.csv')
_=plot_df(gw1_2018,'gw1 season 2017/2018')
gw1_2018.head(10)

Unnamed: 0,name,total_points,was_home,position,value,team_name,opponent_team_name
11,Ahmed El-Sayed_Hegazi,15,True,DEF,45,West Brom,Bournemouth
54,Ben_Davies,14,False,DEF,55,Spurs,Newcastle
421,Romelu_Lukaku,13,True,FWD,115,Man Utd,West Ham
219,Jamie_Vardy,13,False,FWD,85,Leicester,Arsenal
435,Sam_Vokes,13,False,FWD,60,Burnley,Chelsea
82,Christian_Eriksen,12,False,MID,95,Spurs,Newcastle
414,Roberto_Firmino,12,False,FWD,85,Liverpool,Watford
465,Steve_Mounie,12,False,FWD,60,Huddersfield,Crystal Palace
187,Henrikh_Mkhitaryan,11,True,MID,80,Arsenal,West Ham
357,Mohamed_Salah,11,False,MID,90,Liverpool,Watford


In [101]:
gw1_2017=clean_df('gw1_2017.csv','players_raw2017.csv','teams2017.csv')
_=plot_df(gw1_2017,'gw1 season 2016/2017')
gw1_2017.head(10)

Unnamed: 0,name,total_points,was_home,position,value,team_name,opponent_team_name
417,Philippe_Coutinho,15,False,MID,80,Liverpool,Arsenal
10,Adam_Lallana,11,False,MID,70,Liverpool,Arsenal
315,Leroy_Fer,11,False,MID,50,Swansea,Burnley
44,Anthony_Martial,11,False,MID,95,Man Utd,Bournemouth
152,Eden_Hazard,10,True,MID,100,Chelsea,West Ham
434,Robert_Snodgrass,10,True,MID,55,West Ham,Leicester
63,Ben_Foster,10,False,GK,45,West Brom,Crystal Palace
438,Ross_Barkley,10,True,MID,75,Everton,Spurs
388,Nathan_Redmond,10,True,MID,60,Southampton,Watford
163,Etienne_Capoue,10,False,MID,45,Watford,Southampton
