In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

%matplotlib inline

In [2]:
from utils.bootstrap_api import BootstrapAPI
from utils.fpl_api_client import FPLApiClient

In [3]:
client = FPLApiClient()
bootstrap = BootstrapAPI(client)

## Fetching Players and converting to a pandas dataframe

In [4]:
players = bootstrap.get_players()
df_players = pd.DataFrame(players)

#### Checking all the columns that exist in the players dataframe

In [5]:
df_players.columns.to_list()

['can_transact',
 'can_select',
 'chance_of_playing_next_round',
 'chance_of_playing_this_round',
 'code',
 'cost_change_event',
 'cost_change_event_fall',
 'cost_change_start',
 'cost_change_start_fall',
 'dreamteam_count',
 'element_type',
 'ep_next',
 'ep_this',
 'event_points',
 'first_name',
 'form',
 'id',
 'in_dreamteam',
 'news',
 'news_added',
 'now_cost',
 'photo',
 'points_per_game',
 'removed',
 'second_name',
 'selected_by_percent',
 'special',
 'squad_number',
 'status',
 'team',
 'team_code',
 'total_points',
 'transfers_in',
 'transfers_in_event',
 'transfers_out',
 'transfers_out_event',
 'value_form',
 'value_season',
 'web_name',
 'region',
 'team_join_date',
 'birth_date',
 'has_temporary_code',
 'opta_code',
 'minutes',
 'goals_scored',
 'assists',
 'clean_sheets',
 'goals_conceded',
 'own_goals',
 'penalties_saved',
 'penalties_missed',
 'yellow_cards',
 'red_cards',
 'saves',
 'bonus',
 'bps',
 'influence',
 'creativity',
 'threat',
 'ict_index',
 'clearances_blo

In [6]:
df_players.head()

Unnamed: 0,can_transact,can_select,chance_of_playing_next_round,chance_of_playing_this_round,code,cost_change_event,cost_change_event_fall,cost_change_start,cost_change_start_fall,dreamteam_count,...,now_cost_rank_type,form_rank,form_rank_type,points_per_game_rank,points_per_game_rank_type,selected_rank,selected_rank_type,starts_per_90,clean_sheets_per_90,defensive_contribution_per_90
0,True,True,,,154561,1,-1,2,-2,1,...,1,66,3,42,5,11,2,1.0,0.57,0.0
1,True,True,,,109745,0,0,-2,2,0,...,35,503,72,536,75,234,32,0.0,0.0,0.0
2,True,False,0.0,0.0,463748,0,0,0,0,0,...,66,462,55,497,58,297,43,0.0,0.0,0.0
3,True,True,,,551221,0,0,0,0,0,...,79,488,68,521,71,349,53,0.0,0.0,0.0
4,True,True,100.0,75.0,226597,1,-1,3,-3,1,...,1,3,1,6,2,10,3,1.0,0.57,9.43


## Fetching Teams and converting to a pandas dataframe

In [7]:
teams = bootstrap.get_teams()
df_teams = pd.DataFrame(teams)

In [8]:
df_teams.columns.to_list()

['code',
 'draw',
 'form',
 'id',
 'loss',
 'name',
 'played',
 'points',
 'position',
 'short_name',
 'strength',
 'team_division',
 'unavailable',
 'win',
 'strength_overall_home',
 'strength_overall_away',
 'strength_attack_home',
 'strength_attack_away',
 'strength_defence_home',
 'strength_defence_away',
 'pulse_id']

In [9]:
df_teams.head()

Unnamed: 0,code,draw,form,id,loss,name,played,points,position,short_name,...,team_division,unavailable,win,strength_overall_home,strength_overall_away,strength_attack_home,strength_attack_away,strength_defence_home,strength_defence_away,pulse_id
0,3,0,,1,0,Arsenal,0,0,1,ARS,...,,False,0,1320,1325,1350,1350,1290,1300,1
1,7,0,,2,0,Aston Villa,0,0,13,AVL,...,,False,0,1125,1250,1110,1200,1140,1300,2
2,90,0,,3,0,Burnley,0,0,18,BUR,...,,False,0,1050,1050,1050,1050,1050,1050,43
3,91,0,,4,0,Bournemouth,0,0,4,BOU,...,,False,0,1150,1180,1100,1160,1200,1200,127
4,94,0,,5,0,Brentford,0,0,16,BRE,...,,False,0,1120,1185,1080,1080,1160,1290,130


In [10]:
df_teams[['position','name', 'points', 'strength']].sort_values(by='position')

Unnamed: 0,position,name,points,strength
0,1,Arsenal,0,4
11,2,Liverpool,0,5
17,3,Spurs,0,3
3,4,Bournemouth,0,3
12,5,Man City,0,4
7,6,Crystal Palace,0,3
6,7,Chelsea,0,4
8,8,Everton,0,3
16,9,Sunderland,0,2
13,10,Man Utd,0,3


### Most points in the season
#### df.n_largest to get top n in pandas
Usage: df.nlargest(n, 'column_name')

In [11]:
df_player_team = df_players.merge(df_teams, left_on='team', right_on='id')[['id_x', 'first_name', 'second_name', 'name', 'threat', 'total_points', 'element_type', 'now_cost', 'form_x', 'minutes', 'goals_scored', 'expected_goals', 'assists', 'expected_assists', 'creativity', 'influence', 'ict_index', 'clean_sheets', 'yellow_cards', 'red_cards']]
df_player_team.rename(columns={'id_x': 'Player_id', 'name': 'Team_name', 'form_x': 'player_form'}, inplace=True)
df_player_team.head()

Unnamed: 0,Player_id,first_name,second_name,Team_name,threat,total_points,element_type,now_cost,player_form,minutes,goals_scored,expected_goals,assists,expected_assists,creativity,influence,ict_index,clean_sheets,yellow_cards,red_cards
0,1,David,Raya Martín,Arsenal,0.0,34,1,57,4.0,630,0,0.0,0,0.04,10.0,135.8,14.5,4,1,0
1,2,Kepa,Arrizabalaga Revuelta,Arsenal,0.0,0,1,43,0.0,0,0,0.0,0,0.0,0.0,0.0,0.0,0,0,0
2,3,Karl,Hein,Arsenal,0.0,0,1,40,0.0,0,0,0.0,0,0.0,0.0,0.0,0.0,0,0,0
3,4,Tommy,Setford,Arsenal,0.0,0,1,40,0.0,0,0,0.0,0,0.0,0.0,0.0,0.0,0,0,0
4,5,Gabriel,dos Santos Magalhães,Arsenal,56.0,47,2,63,8.2,630,1,0.48,0,0.09,13.1,200.6,27.0,4,0,0


In [12]:
df_player_team.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 743 entries, 0 to 742
Data columns (total 20 columns):
 #   Column            Non-Null Count  Dtype 
---  ------            --------------  ----- 
 0   Player_id         743 non-null    int64 
 1   first_name        743 non-null    object
 2   second_name       743 non-null    object
 3   Team_name         743 non-null    object
 4   threat            743 non-null    object
 5   total_points      743 non-null    int64 
 6   element_type      743 non-null    int64 
 7   now_cost          743 non-null    int64 
 8   player_form       743 non-null    object
 9   minutes           743 non-null    int64 
 10  goals_scored      743 non-null    int64 
 11  expected_goals    743 non-null    object
 12  assists           743 non-null    int64 
 13  expected_assists  743 non-null    object
 14  creativity        743 non-null    object
 15  influence         743 non-null    object
 16  ict_index         743 non-null    object
 17  clean_sheets    

In [13]:
df_player_team.threat.dtype

dtype('O')

In [35]:
cols_to_convert = ['threat', 'player_form', 'expected_goals', 'expected_assists', 'creativity', 'influence', 'ict_index']
df_player_team[cols_to_convert] = df_player_team[cols_to_convert].apply(pd.to_numeric, errors='coerce')
df_player_team['threat'].dtype

dtype('float64')

In [75]:
cols_to_convert = ['threat', 'form', 'expected_goals', 'expected_assists', 'creativity', 'influence', 'ict_index', 'points_per_game']
df_players[cols_to_convert] = df_players[cols_to_convert].apply(pd.to_numeric, errors='coerce')



In [16]:
df_player_team['influence'].isna().sum()

np.int64(0)

In [17]:
cols_to_compare = cols_to_convert
cols_to_compare += ['total_points', 'goals_scored', 'assists', 'minutes', 'now_cost']

In [18]:
cols_to_compare

['threat',
 'player_form',
 'expected_goals',
 'expected_assists',
 'creativity',
 'influence',
 'ict_index',
 'total_points',
 'goals_scored',
 'assists',
 'minutes',
 'now_cost']

In [21]:
df_player_team[cols_to_compare].corr().style.background_gradient(cmap='coolwarm')


Unnamed: 0,threat,player_form,expected_goals,expected_assists,creativity,influence,ict_index,total_points,goals_scored,assists,minutes,now_cost
threat,1.0,0.64234,0.879664,0.607148,0.632303,0.617729,0.84717,0.691712,0.746312,0.476688,0.603914,0.650421
player_form,0.64234,1.0,0.559915,0.575463,0.629605,0.872645,0.841634,0.929074,0.598169,0.525156,0.825717,0.442383
expected_goals,0.879664,0.559915,1.0,0.44401,0.488315,0.542695,0.720685,0.601603,0.823238,0.338835,0.473844,0.635785
expected_assists,0.607148,0.575463,0.44401,1.0,0.884425,0.581406,0.788451,0.623379,0.344468,0.619028,0.615614,0.457153
creativity,0.632303,0.629605,0.488315,0.884425,1.0,0.621951,0.856599,0.674468,0.404588,0.607843,0.685638,0.473332
influence,0.617729,0.872645,0.542695,0.581406,0.621951,1.0,0.888996,0.935824,0.585134,0.470144,0.907055,0.377535
ict_index,0.84717,0.841634,0.720685,0.788451,0.856599,0.888996,1.0,0.903108,0.665652,0.593861,0.864016,0.56193
total_points,0.691712,0.929074,0.601603,0.623379,0.674468,0.935824,0.903108,1.0,0.628245,0.559305,0.892693,0.461473
goals_scored,0.746312,0.598169,0.823238,0.344468,0.404588,0.585134,0.665652,0.628245,1.0,0.268602,0.405528,0.542957
assists,0.476688,0.525156,0.338835,0.619028,0.607843,0.470144,0.593861,0.559305,0.268602,1.0,0.444781,0.312606


## Top 5 Features that affect the most points in a season

In [138]:
corr_matrix = df_player_team[cols_to_compare].corr()

corr_pairs = (
    corr_matrix.where(np.triu(np.ones(corr_matrix.shape), k=1).astype(bool))
    .stack()
    .reset_index()
)
corr_pairs.columns = ['Feature 1', 'Feature 2', 'Correlation']
top5 = corr_pairs.reindex(corr_pairs['Correlation'].abs().sort_values(ascending=False).index).head(5)
top5

Unnamed: 0,Feature 1,Feature 2,Correlation
46,influence,total_points,0.935824
16,player_form,total_points,0.929074
49,influence,minutes,0.907055
51,ict_index,total_points,0.903108
58,total_points,minutes,0.892693


In [141]:
corr_pairs = corr_matrix.unstack().reset_index()
corr_pairs.columns = ['Feature 1', 'Feature 2', 'Correlation']
corr_pairs

Unnamed: 0,Feature 1,Feature 2,Correlation
0,threat,threat,1.000000
1,threat,player_form,0.642340
2,threat,expected_goals,0.879664
3,threat,expected_assists,0.607148
4,threat,creativity,0.632303
...,...,...,...
139,now_cost,total_points,0.461473
140,now_cost,goals_scored,0.542957
141,now_cost,assists,0.312606
142,now_cost,minutes,0.372324


In [143]:
corr_pairs = corr_pairs[corr_pairs['Feature 1'] != corr_pairs['Feature 2']] # Remove self-correlations
corr_pairs = corr_pairs.drop_duplicates(subset=['Correlation']) # Remove duplicate pairs
corr_pairs.reindex(corr_pairs['Correlation'].abs().sort_values(ascending=False).index).head(10)

Unnamed: 0,Feature 1,Feature 2,Correlation
67,influence,total_points,0.935824
19,player_form,total_points,0.929074
70,influence,minutes,0.907055
79,ict_index,total_points,0.903108
94,total_points,minutes,0.892693
66,influence,ict_index,0.888996
40,expected_assists,creativity,0.884425
2,threat,expected_goals,0.879664
17,player_form,influence,0.872645
82,ict_index,minutes,0.864016


In [121]:
df_player_team.nlargest(10, 'influence')

Unnamed: 0,Player_id,first_name,second_name,Team_name,threat,total_points,element_type,now_cost,player_form,minutes,goals_scored,expected_goals,assists,expected_assists,creativity,influence,ict_index,clean_sheets,yellow_cards,red_cards
473,430,Erling,Haaland,Man City,387.0,70,4,145,11.5,593,9,7.64,1,0.57,55.1,324.8,76.8,4,0,0
134,82,Antoine,Semenyo,Bournemouth,212.0,66,3,79,10.8,630,6,3.65,3,0.37,132.4,305.0,65.0,3,1,0
259,241,Moisés,Caicedo Corozo,Chelsea,51.0,45,3,58,7.0,630,3,0.53,0,0.24,59.2,245.4,35.5,2,1,0
124,72,Marcos,Senesi Barón,Bournemouth,20.0,46,2,50,5.2,617,0,0.14,2,1.09,67.8,225.8,31.4,3,1,0
415,373,Virgil,van Dijk,Liverpool,29.0,29,2,61,3.8,630,0,0.18,0,0.12,26.4,219.8,27.5,2,1,0
94,200,Jaidon,Anthony,Burnley,174.0,41,3,57,5.0,587,4,2.16,1,0.62,100.4,219.0,49.4,2,2,0
114,470,Martin,Dúbravka,Burnley,0.0,20,1,40,2.5,630,0,0.0,0,0.0,0.0,218.0,21.9,1,0,0
630,683,Omar,Alderete,Sunderland,77.0,39,2,41,7.5,577,1,0.58,1,0.34,27.6,210.8,31.5,2,1,0
628,670,Robin,Roefs,Sunderland,0.0,41,1,46,6.0,630,0,0.0,0,0.01,20.0,208.0,22.7,3,1,0
338,419,Jack,Grealish,Everton,208.0,37,3,69,3.8,556,1,0.85,4,1.89,228.6,207.6,64.5,2,2,0


In [151]:
df_player_team.loc[df_player_team['element_type'] == 3].nlargest(10, 'influence')[['first_name', 'second_name', 'Team_name', 'goals_scored', 'expected_goals', 'G - xG', 'expected_assists', 'A - xA', 'total_points', 'now_cost', 'influence', 'minutes']]

Unnamed: 0,first_name,second_name,Team_name,goals_scored,expected_goals,G - xG,expected_assists,A - xA,total_points,now_cost,influence,minutes
134,Antoine,Semenyo,Bournemouth,6,3.65,2.35,0.37,2.63,66,79,305.0,630
259,Moisés,Caicedo Corozo,Chelsea,3,0.53,2.47,0.24,-0.24,45,58,245.4,630
94,Jaidon,Anthony,Burnley,4,2.16,1.84,0.62,0.38,41,57,219.0,587
338,Jack,Grealish,Everton,1,0.85,0.15,1.89,2.11,37,69,207.6,556
497,Bruno,Borges Fernandes,Man Utd,2,3.69,-1.69,1.3,-1.3,28,90,199.8,626
323,Iliman,Ndiaye,Everton,3,2.16,0.84,1.04,-0.04,37,65,198.2,588
99,Josh,Cullen,Burnley,1,0.18,0.82,0.59,0.41,37,50,189.2,590
255,Enzo,Fernández,Chelsea,3,3.76,-0.76,0.51,1.49,41,67,188.4,618
652,Mohammed,Kudus,Spurs,1,1.29,-0.29,1.14,3.86,39,67,187.0,618
327,James,Garner,Everton,1,0.55,0.45,1.05,-0.05,33,50,186.8,630


In [None]:
# Ranking by players outperforming their xG
df_player_team['G - xG'] = df_player_team['goals_scored'] - df_player_team['expected_goals']
df_player_team.nlargest(10, 'G - xG')[['first_name', 'second_name', 'Team_name', 'goals_scored', 'expected_goals', 'G - xG', 'total_points', 'now_cost']]

Unnamed: 0,first_name,second_name,Team_name,goals_scored,expected_goals,G - xG,total_points,now_cost
259,Moisés,Caicedo Corozo,Chelsea,3,0.53,2.47,45,58
134,Antoine,Semenyo,Bournemouth,6,3.65,2.35,66,79
698,Jarrod,Bowen,West Ham,3,0.83,2.17,35,77
94,Jaidon,Anthony,Burnley,4,2.16,1.84,41,57
622,Wilson,Isidor,Sunderland,3,1.23,1.77,28,56
24,Martín,Zubimendi Ibáñez,Arsenal,2,0.26,1.74,32,55
537,Bruno,Guimarães Rodriguez Moura,Newcastle,2,0.32,1.68,30,65
431,Ryan,Gravenberch,Liverpool,2,0.4,1.6,37,57
670,João Maria,Lobo Alves Palhares Costa Palhinha Gonçalves,Spurs,2,0.54,1.46,30,55
473,Erling,Haaland,Man City,9,7.64,1.36,70,145


In [127]:
# Ranking by players outperforming their xA
df_player_team['A - xA'] = df_player_team['assists'] - df_player_team['expected_assists']
df_player_team.nlargest(10, 'A - xA')[['first_name', 'second_name', 'Team_name', 'assists', 'expected_assists', 'A - xA', 'total_points', 'now_cost', 'minutes']]

Unnamed: 0,first_name,second_name,Team_name,assists,expected_assists,A - xA,total_points,now_cost,minutes
652,Mohammed,Kudus,Spurs,5,1.14,3.86,39,67,618
677,El Hadji Malick,Diouf,West Ham,4,0.75,3.25,26,44,630
266,João Pedro,Junqueira de Jesus,Chelsea,3,0.23,2.77,39,77,583
463,Jérémy,Doku,Man City,4,1.33,2.67,29,66,385
134,Antoine,Semenyo,Bournemouth,3,0.37,2.63,66,79,630
627,Granit,Xhaka,Sunderland,3,0.72,2.28,32,50,630
29,Eberechi,Eze,Arsenal,3,0.84,2.16,22,76,394
338,Jack,Grealish,Everton,4,1.89,2.11,37,69,556
6,Riccardo,Calafiori,Arsenal,2,0.15,1.85,42,57,524
19,Declan,Rice,Arsenal,3,1.16,1.84,40,65,542


In [129]:
df_player_team.nlargest(10, 'assists')[['first_name', 'second_name', 'Team_name', 'assists', 'expected_assists', 'A - xA', 'total_points', 'now_cost', 'minutes']]

Unnamed: 0,first_name,second_name,Team_name,assists,expected_assists,A - xA,total_points,now_cost,minutes
652,Mohammed,Kudus,Spurs,5,1.14,3.86,39,67,618
338,Jack,Grealish,Everton,4,1.89,2.11,37,69,556
463,Jérémy,Doku,Man City,4,1.33,2.67,29,66,385
677,El Hadji Malick,Diouf,West Ham,4,0.75,3.25,26,44,630
19,Declan,Rice,Arsenal,3,1.16,1.84,40,65,542
29,Eberechi,Eze,Arsenal,3,0.84,2.16,22,76,394
134,Antoine,Semenyo,Bournemouth,3,0.37,2.63,66,79,630
266,João Pedro,Junqueira de Jesus,Chelsea,3,0.23,2.77,39,77,583
627,Granit,Xhaka,Sunderland,3,0.72,2.28,32,50,630
6,Riccardo,Calafiori,Arsenal,2,0.15,1.85,42,57,524


In [145]:
df_players.loc[df_players['element_type'] == 4].nlargest(10, 'now_cost')[['first_name', 'second_name', 'now_cost', 'cost_change_event','cost_change_event_fall','cost_change_start','cost_change_start_fall']]

Unnamed: 0,first_name,second_name,now_cost,cost_change_event,cost_change_event_fall,cost_change_start,cost_change_start_fall
473,Erling,Haaland,145,1,-1,5,-5
439,Alexander,Isak,106,0,0,1,-1
31,Viktor,Gyökeres,91,1,-1,1,-1
66,Ollie,Watkins,87,0,0,-3,3
440,Hugo,Ekitiké,87,0,0,2,-2
266,João Pedro,Junqueira de Jesus,77,0,0,2,-2
698,Jarrod,Bowen,77,0,0,-3,3
302,Jean-Philippe,Mateta,75,0,0,0,0
518,Yoane,Wissa,74,0,0,-1,1
578,Chris,Wood,74,0,0,-1,1


In [90]:
df_players.nlargest(10, 'total_points')[['id', 'first_name', 'second_name', 'team', 'ep_next','total_points', 'element_type', 'now_cost', 'form', 'minutes', 'goals_scored', 'assists', 'clean_sheets', 'yellow_cards', 'red_cards']]

Unnamed: 0,id,first_name,second_name,team,ep_next,total_points,element_type,now_cost,form,minutes,goals_scored,assists,clean_sheets,yellow_cards,red_cards
473,430,Erling,Haaland,13,12.0,70,4,145,11.5,593,9,1,4,0,0
134,82,Antoine,Semenyo,4,10.8,66,3,79,10.8,630,6,3,3,1,0
7,8,Jurriën,Timber,1,6.0,48,2,59,5.5,521,2,2,3,2,0
4,5,Gabriel,dos Santos Magalhães,1,8.7,47,2,63,8.2,630,1,0,4,0,0
124,72,Marcos,Senesi Barón,4,5.2,46,2,50,5.2,617,0,2,3,1,0
280,260,Marc,Guéhi,8,5.8,46,2,49,5.8,630,1,2,3,0,0
259,241,Moisés,Caicedo Corozo,7,7.5,45,3,58,7.0,630,3,0,2,1,0
525,476,Dan,Burn,15,7.0,43,2,51,6.5,630,0,1,5,2,0
6,7,Riccardo,Calafiori,1,4.0,42,2,57,3.5,524,1,2,4,3,0
94,200,Jaidon,Anthony,3,3.8,41,3,57,5.0,587,4,1,2,2,0


## Focus on Midfielders

In [78]:
mid_players = df_players.loc[(df_players['element_type'] == 3) & (df_players['now_cost'] < 84)].merge(df_teams, left_on='team', right_on='id')

stat_to_sort_by = 'points_per_game'

mid_players.nlargest(10, stat_to_sort_by)[['first_name', 'second_name', 'name', 'total_points', 'now_cost', stat_to_sort_by, 'minutes', 'goals_scored', 'assists', 'clean_sheets', 'yellow_cards', 'red_cards', 'clearances_blocks_interceptions',
 'recoveries',
 'tackles',
 'defensive_contribution', 'bonus', 'bps']].reset_index(drop=True)

Unnamed: 0,first_name,second_name,name,total_points,now_cost,points_per_game,minutes,goals_scored,assists,clean_sheets,yellow_cards,red_cards,clearances_blocks_interceptions,recoveries,tackles,defensive_contribution,bonus,bps
0,Antoine,Semenyo,Bournemouth,66,79,9.4,630,6,3,3,1,0,7,39,12,58,9,190
1,Ismaïla,Sarr,Crystal Palace,36,65,7.2,432,3,1,3,1,0,8,14,6,28,6,107
2,Moisés,Caicedo Corozo,Chelsea,45,58,6.4,630,3,0,2,1,0,30,38,28,96,7,187
3,Ryan,Gravenberch,Liverpool,37,57,6.2,540,2,2,2,2,0,18,28,10,56,7,153
4,Matt,O'Riley,Brighton,12,55,6.0,177,1,0,1,0,0,3,2,4,9,2,36
5,Jaidon,Anthony,Burnley,41,57,5.9,587,4,1,2,2,0,14,29,12,55,5,144
6,Enzo,Fernández,Chelsea,41,67,5.9,618,3,2,2,1,0,8,23,14,45,3,147
7,Declan,Rice,Arsenal,40,65,5.7,542,1,3,3,0,0,14,29,6,49,6,154
8,Mohammed,Kudus,Spurs,39,67,5.6,618,1,5,3,2,0,0,29,15,44,4,123
9,Josh,Cullen,Burnley,37,50,5.3,590,1,1,2,0,0,38,33,15,86,5,150


In [79]:
mid_stats_corr = mid_players[['total_points', 'now_cost', 'influence', 'minutes', 'goals_scored', 'assists', 'clean_sheets', 'yellow_cards', 'red_cards', 'clearances_blocks_interceptions', 'expected_goals', 'expected_assists',
 'recoveries',
 'tackles',
 'defensive_contribution', 'bonus']].corr()

mid_stats_corr.style.background_gradient(cmap='coolwarm')

Unnamed: 0,total_points,now_cost,influence,minutes,goals_scored,assists,clean_sheets,yellow_cards,red_cards,clearances_blocks_interceptions,expected_goals,expected_assists,recoveries,tackles,defensive_contribution,bonus
total_points,1.0,0.511861,0.969351,0.906445,0.760144,0.663528,0.743221,0.487841,0.011297,0.687191,0.727475,0.725692,0.856861,0.756998,0.841897,0.791482
now_cost,0.511861,1.0,0.470975,0.48983,0.382756,0.354991,0.436095,0.128428,0.082094,0.190573,0.555148,0.54411,0.412396,0.25305,0.332694,0.345946
influence,0.969351,0.470975,1.0,0.875637,0.774809,0.623429,0.670544,0.520252,0.024427,0.72881,0.701987,0.70355,0.853736,0.793077,0.86261,0.7994
minutes,0.906445,0.48983,0.875637,1.0,0.5141,0.555953,0.754378,0.585532,0.051729,0.78613,0.619054,0.749701,0.923295,0.81406,0.921239,0.5746
goals_scored,0.760144,0.382756,0.774809,0.5141,1.0,0.30475,0.431423,0.291674,0.035967,0.318306,0.726217,0.360044,0.489675,0.467489,0.466323,0.772493
assists,0.663528,0.354991,0.623429,0.555953,0.30475,1.0,0.485604,0.329621,-0.013843,0.353318,0.458255,0.644075,0.498193,0.375288,0.458488,0.487204
clean_sheets,0.743221,0.436095,0.670544,0.754378,0.431423,0.485604,1.0,0.451314,0.094014,0.555309,0.515013,0.631795,0.676044,0.529438,0.651306,0.479176
yellow_cards,0.487841,0.128428,0.520252,0.585532,0.291674,0.329621,0.451314,1.0,0.015647,0.579005,0.316561,0.406323,0.583539,0.565005,0.621023,0.321471
red_cards,0.011297,0.082094,0.024427,0.051729,0.035967,-0.013843,0.094014,0.015647,1.0,0.052389,0.085699,-0.055142,0.032575,0.071193,0.051027,-0.023489
clearances_blocks_interceptions,0.687191,0.190573,0.72881,0.78613,0.318306,0.353318,0.555309,0.579005,0.052389,1.0,0.28262,0.457072,0.775178,0.757155,0.899836,0.446547


In [83]:
corr_pairs = (
    mid_stats_corr.where(np.triu(np.ones(mid_stats_corr.shape), k=1).astype(bool))
    .stack()
    .reset_index()
)
corr_pairs.columns = ['Feature 1', 'Feature 2', 'Correlation']
top5 = corr_pairs.reindex(corr_pairs['Correlation'].abs().sort_values(ascending=False).index).head(10)
top5

Unnamed: 0,Feature 1,Feature 2,Correlation
1,total_points,influence,0.969351
115,recoveries,defensive_contribution,0.959805
50,minutes,recoveries,0.923295
52,minutes,defensive_contribution,0.921239
117,tackles,defensive_contribution,0.91025
2,total_points,minutes,0.906445
103,clearances_blocks_interceptions,defensive_contribution,0.899836
29,influence,minutes,0.875637
40,influence,defensive_contribution,0.86261
11,total_points,recoveries,0.856861


In [90]:
stat_to_sort_by = 'form_x'

mid_players.nlargest(10, stat_to_sort_by)[['first_name', 'second_name', 'name', 'total_points', 'now_cost', stat_to_sort_by, 'minutes', 'goals_scored', 'assists', 'clean_sheets', 'yellow_cards', 'red_cards', 'clearances_blocks_interceptions', 'expected_goals', 'expected_assists',
 'recoveries',
 'tackles',
 'defensive_contribution', 'bonus', 'bps']].reset_index(drop=True)

Unnamed: 0,first_name,second_name,name,total_points,now_cost,form_x,minutes,goals_scored,assists,clean_sheets,yellow_cards,red_cards,clearances_blocks_interceptions,expected_goals,expected_assists,recoveries,tackles,defensive_contribution,bonus,bps
0,Antoine,Semenyo,Bournemouth,66,79,10.8,630,6,3,3,1,0,7,3.65,0.37,39,12,58,9,190
1,Moisés,Caicedo Corozo,Chelsea,45,58,7.0,630,3,0,2,1,0,30,0.53,0.24,38,28,96,7,187
2,Ryan,Gravenberch,Liverpool,37,57,7.0,540,2,2,2,2,0,18,0.4,0.37,28,10,56,7,153
3,Declan,Rice,Arsenal,40,65,6.0,542,1,3,3,0,0,14,0.47,1.16,29,6,49,6,154
4,Jérémy,Doku,Man City,29,66,6.0,385,0,4,3,0,0,1,0.59,1.33,13,5,19,3,96
5,Mohammed,Kudus,Spurs,39,67,6.0,618,1,5,3,2,0,0,1.29,1.14,29,15,44,4,123
6,Martín,Zubimendi Ibáñez,Arsenal,32,55,5.8,591,2,0,4,1,0,16,0.26,0.8,24,14,54,3,140
7,Yankuba,Minteh,Brighton,31,60,5.5,593,1,2,1,0,0,10,1.62,1.23,30,12,52,3,134
8,Sean,Longstaff,Leeds,28,49,5.2,475,1,1,1,0,0,11,0.43,0.48,21,23,55,3,137
9,Granit,Xhaka,Sunderland,32,50,5.2,630,0,3,3,3,0,41,0.14,0.72,36,12,89,1,143


## Focus on Defenders

In [37]:
df_players.columns.to_list()

['can_transact',
 'can_select',
 'chance_of_playing_next_round',
 'chance_of_playing_this_round',
 'code',
 'cost_change_event',
 'cost_change_event_fall',
 'cost_change_start',
 'cost_change_start_fall',
 'dreamteam_count',
 'element_type',
 'ep_next',
 'ep_this',
 'event_points',
 'first_name',
 'form',
 'id',
 'in_dreamteam',
 'news',
 'news_added',
 'now_cost',
 'photo',
 'points_per_game',
 'removed',
 'second_name',
 'selected_by_percent',
 'special',
 'squad_number',
 'status',
 'team',
 'team_code',
 'total_points',
 'transfers_in',
 'transfers_in_event',
 'transfers_out',
 'transfers_out_event',
 'value_form',
 'value_season',
 'web_name',
 'region',
 'team_join_date',
 'birth_date',
 'has_temporary_code',
 'opta_code',
 'minutes',
 'goals_scored',
 'assists',
 'clean_sheets',
 'goals_conceded',
 'own_goals',
 'penalties_saved',
 'penalties_missed',
 'yellow_cards',
 'red_cards',
 'saves',
 'bonus',
 'bps',
 'influence',
 'creativity',
 'threat',
 'ict_index',
 'clearances_blo

In [80]:
def_players = df_players.loc[df_players['element_type'] == 2].merge(df_teams, left_on='team', right_on='id')

stat_to_sort_by = 'clearances_blocks_interceptions'

def_players.nlargest(10, stat_to_sort_by)[['first_name', 'second_name', 'name', 'total_points', 'now_cost', stat_to_sort_by, 'minutes', 'goals_scored', 'assists', 'clean_sheets', 'yellow_cards', 'red_cards', 'clearances_blocks_interceptions',
 'recoveries',
 'tackles',
 'defensive_contribution', 'bonus', 'bps']].reset_index(drop=True)

Unnamed: 0,first_name,second_name,name,total_points,now_cost,clearances_blocks_interceptions,minutes,goals_scored,assists,clean_sheets,yellow_cards,red_cards,clearances_blocks_interceptions.1,recoveries,tackles,defensive_contribution,bonus,bps
0,Marcos,Senesi Barón,Bournemouth,46,50,76,617,0,2,3,1,0,76,26,12,88,6,146
1,Virgil,van Dijk,Liverpool,29,61,76,630,0,0,2,1,0,76,16,3,79,4,108
2,James,Tarkowski,Everton,32,55,67,630,0,0,2,1,0,67,24,12,79,1,110
3,Joachim,Andersen,Fulham,29,45,66,630,0,0,1,0,0,66,25,15,81,2,102
4,Michael,Keane,Everton,35,45,64,604,1,0,2,0,0,64,21,7,71,1,102
5,Maxime,Estève,Burnley,15,40,60,622,0,0,1,0,0,60,15,7,67,0,35
6,Nathan,Collins,Brentford,18,49,58,630,0,0,1,3,0,58,25,4,62,1,58
7,Maxence,Lacroix,Crystal Palace,37,51,57,630,0,1,3,1,0,57,18,15,72,2,130
8,Chris,Richards,Crystal Palace,33,45,57,630,0,0,3,0,0,57,8,16,73,0,130
9,Joe,Rodon,Leeds,30,41,57,630,1,0,2,0,0,57,17,5,62,2,100


In [81]:
def_stats_corr = def_players[['total_points', 'now_cost', 'influence', 'minutes', 'goals_scored', 'assists', 'clean_sheets', 'yellow_cards', 'red_cards', 'clearances_blocks_interceptions',
 'recoveries',
 'tackles',
 'defensive_contribution', 'bonus']].corr()

def_stats_corr.style.background_gradient(cmap='coolwarm')

Unnamed: 0,total_points,now_cost,influence,minutes,goals_scored,assists,clean_sheets,yellow_cards,red_cards,clearances_blocks_interceptions,recoveries,tackles,defensive_contribution,bonus
total_points,1.0,0.522206,0.912835,0.882733,0.509515,0.5167,0.898554,0.407287,0.088878,0.83091,0.810073,0.760904,0.870113,0.757535
now_cost,0.522206,1.0,0.467262,0.450785,0.231538,0.28899,0.497432,0.319003,0.002868,0.405007,0.440051,0.383173,0.427147,0.385751
influence,0.912835,0.467262,1.0,0.939055,0.491882,0.417108,0.710139,0.483597,0.129006,0.939759,0.8498,0.796194,0.968211,0.67087
minutes,0.882733,0.450785,0.939055,1.0,0.345873,0.391891,0.733406,0.543201,0.124917,0.900094,0.936744,0.816458,0.940636,0.577284
goals_scored,0.509515,0.231538,0.491882,0.345873,1.0,0.23759,0.307351,0.287953,0.168763,0.295554,0.294757,0.408231,0.343444,0.447276
assists,0.5167,0.28899,0.417108,0.391891,0.23759,1.0,0.389886,0.286671,0.019994,0.29535,0.381833,0.370907,0.334068,0.378491
clean_sheets,0.898554,0.497432,0.710139,0.733406,0.307351,0.389886,1.0,0.308878,0.100375,0.651581,0.681894,0.607366,0.684959,0.649177
yellow_cards,0.407287,0.319003,0.483597,0.543201,0.287953,0.286671,0.308878,1.0,0.008611,0.403506,0.561576,0.552761,0.467758,0.272355
red_cards,0.088878,0.002868,0.129006,0.124917,0.168763,0.019994,0.100375,0.008611,1.0,0.096245,0.121402,0.078255,0.098348,-0.007002
clearances_blocks_interceptions,0.83091,0.405007,0.939759,0.900094,0.295554,0.29535,0.651581,0.403506,0.096245,1.0,0.785791,0.653162,0.982392,0.572338


In [82]:
corr_pairs = (
    def_stats_corr.where(np.triu(np.ones(def_stats_corr.shape), k=1).astype(bool))
    .stack()
    .reset_index()
)
corr_pairs.columns = ['Feature 1', 'Feature 2', 'Correlation']
top5 = corr_pairs.reindex(corr_pairs['Correlation'].abs().sort_values(ascending=False).index).head(10)
top5

Unnamed: 0,Feature 1,Feature 2,Correlation
83,clearances_blocks_interceptions,defensive_contribution,0.982392
34,influence,defensive_contribution,0.968211
44,minutes,defensive_contribution,0.940636
31,influence,clearances_blocks_interceptions,0.939759
25,influence,minutes,0.939055
42,minutes,recoveries,0.936744
1,total_points,influence,0.912835
41,minutes,clearances_blocks_interceptions,0.900094
5,total_points,clean_sheets,0.898554
2,total_points,minutes,0.882733


### Keeper with most points and details

In [94]:
df_gk = df_players[df_players['element_type'] == 1]
df_gk.nlargest(10, 'total_points')[['id', 'first_name', 'second_name', 'team', 'total_points', 'now_cost', 'form', 'minutes', 'saves', 'clean_sheets', 'expected_goals_conceded', 'penalties_saved', 'chance_of_playing_next_round']]

Unnamed: 0,id,first_name,second_name,team,total_points,now_cost,form,minutes,saves,clean_sheets,expected_goals_conceded,penalties_saved,chance_of_playing_next_round
519,469,Nick,Pope,15,41,51,5.8,630,19,5,4.98,0,
628,670,Robin,Roefs,17,41,46,6.0,630,24,3,8.76,1,
635,565,Guglielmo,Vicario,18,35,51,3.5,630,24,3,8.83,0,
0,1,David,Raya Martín,1,34,57,4.0,630,15,4,4.4,0,
311,287,Jordan,Pickford,9,30,55,3.0,630,17,2,9.87,1,
119,67,Đorđe,Petrović,4,27,45,3.5,630,19,3,6.34,0,
273,253,Dean,Henderson,8,27,50,3.0,630,16,3,8.85,0,
160,101,Caoimhín,Kelleher,5,21,45,3.5,630,19,1,9.76,1,100.0
114,470,Martin,Dúbravka,3,20,40,2.5,630,29,1,13.67,0,100.0
345,314,Bernd,Leno,10,20,50,3.0,630,19,1,9.08,0,


In [105]:
small_df_gk = df_gk[['total_points', 'now_cost', 'form', 'minutes', 'saves', 'clean_sheets', 'expected_goals_conceded', 'penalties_saved', 'clearances_blocks_interceptions',
 'recoveries',
 'tackles', 'bonus']]

In [107]:
gk_corr = small_df_gk.corr()

gk_corr.style.background_gradient(cmap='coolwarm')

Unnamed: 0,total_points,now_cost,form,minutes,saves,clean_sheets,expected_goals_conceded,penalties_saved,clearances_blocks_interceptions,recoveries,tackles,bonus
total_points,1.0,0.690914,0.929853,0.917844,0.896145,0.920634,0.810121,0.465547,0.817224,0.923831,0.384163,0.762678
now_cost,0.690914,1.0,0.656838,0.670586,0.564218,0.664395,0.570443,0.225361,0.62355,0.634785,0.321252,0.502826
form,0.929853,0.656838,1.0,0.855641,0.847502,0.818678,0.737555,0.469035,0.7942,0.863278,0.307723,0.699802
minutes,0.917844,0.670586,0.855641,1.0,0.952777,0.749411,0.955273,0.386503,0.896341,0.981169,0.359145,0.536062
saves,0.896145,0.564218,0.847502,0.952777,1.0,0.689371,0.945457,0.407732,0.862348,0.940669,0.376287,0.560815
clean_sheets,0.920634,0.664395,0.818678,0.749411,0.689371,1.0,0.579501,0.291798,0.661055,0.755558,0.321785,0.712494
expected_goals_conceded,0.810121,0.570443,0.737555,0.955273,0.945457,0.579501,1.0,0.398566,0.870451,0.91744,0.323851,0.427742
penalties_saved,0.465547,0.225361,0.469035,0.386503,0.407732,0.291798,0.398566,1.0,0.408366,0.403796,0.382206,0.50096
clearances_blocks_interceptions,0.817224,0.62355,0.7942,0.896341,0.862348,0.661055,0.870451,0.408366,1.0,0.865237,0.424045,0.421485
recoveries,0.923831,0.634785,0.863278,0.981169,0.940669,0.755558,0.91744,0.403796,0.865237,1.0,0.395011,0.592829


In [108]:
corr_pairs = (
    gk_corr.where(np.triu(np.ones(gk_corr.shape), k=1).astype(bool))
    .stack()
    .reset_index()
)
corr_pairs.columns = ['Feature 1', 'Feature 2', 'Correlation']
top5 = corr_pairs.reindex(corr_pairs['Correlation'].abs().sort_values(ascending=False).index).head(10)
top5

Unnamed: 0,Feature 1,Feature 2,Correlation
35,minutes,recoveries,0.981169
32,minutes,expected_goals_conceded,0.955273
30,minutes,saves,0.952777
39,saves,expected_goals_conceded,0.945457
42,saves,recoveries,0.940669
1,total_points,form,0.929853
8,total_points,recoveries,0.923831
4,total_points,clean_sheets,0.920634
2,total_points,minutes,0.917844
53,expected_goals_conceded,recoveries,0.91744


In [None]:
df_gk = df_players[df_players['element_type'] == 1]
df_gk.nlargest(10, 'total_points')[['id', 'first_name', 'second_name', 'team', 'total_points', 'now_cost', 'form', 'minutes', 'saves', 'clean_sheets', 'expected_goals_conceded', 'penalties_saved', 'chance_of_playing_next_round']]

### Merge players and teams dataframes to get team names in players dataframe

In [109]:
positions_df = pd.DataFrame(bootstrap.get_positions())
positions_df.head()

Unnamed: 0,id,plural_name,plural_name_short,singular_name,singular_name_short,squad_select,squad_min_select,squad_max_select,squad_min_play,squad_max_play,ui_shirt_specific,sub_positions_locked,element_count
0,1,Goalkeepers,GKP,Goalkeeper,GKP,2,,,1,1,True,[12],86
1,2,Defenders,DEF,Defender,DEF,5,,,3,5,False,[],245
2,3,Midfielders,MID,Midfielder,MID,5,,,2,5,False,[],330
3,4,Forwards,FWD,Forward,FWD,3,,,1,3,False,[],82


In [111]:
df_players[['id', 'first_name', 'second_name', 'team', 'threat', 'total_points', 'element_type', 'now_cost', 'form', 'minutes', 'goals_scored','news']]

Unnamed: 0,id,first_name,second_name,team,threat,total_points,element_type,now_cost,form,minutes,goals_scored,news
0,1,David,Raya Martín,1,0.0,34,1,57,4.0,630,0,
1,2,Kepa,Arrizabalaga Revuelta,1,0.0,0,1,43,0.0,0,0,
2,3,Karl,Hein,1,0.0,0,1,40,0.0,0,0,Has joined Werder Bremen on loan for the rest ...
3,4,Tommy,Setford,1,0.0,0,1,40,0.0,0,0,
4,5,Gabriel,dos Santos Magalhães,1,56.0,47,2,63,8.2,630,1,
...,...,...,...,...,...,...,...,...,...,...,...,...
738,663,Jhon,Arias,20,58.0,14,3,52,2.5,357,0,
739,682,David,Møller Wolfe,20,4.0,7,2,44,0.2,144,0,
740,695,Jackson,Tchatchoua,20,14.0,9,2,45,1.8,355,0,
741,709,Ladislav,Krejcí,20,53.0,12,2,45,3.0,360,1,


In [123]:
new_df = df_players[['id', 'first_name', 'second_name', 'team', 'threat', 'total_points', 'element_type', 'now_cost', 'form', 'minutes', 'goals_scored','points_per_game']]

new_df = new_df.merge(df_teams, left_on='team', right_on='id')[['first_name', 'second_name', 'name', 'threat', 'total_points', 'now_cost', 'form_x', 'minutes', 'goals_scored','points_per_game']]

In [124]:
new_df.rename(columns={'id': 'Player_id'}, inplace=True)

In [125]:
new_df.head()

Unnamed: 0,first_name,second_name,name,threat,total_points,now_cost,form_x,minutes,goals_scored,points_per_game
0,David,Raya Martín,Arsenal,0.0,34,57,4.0,630,0,4.9
1,Kepa,Arrizabalaga Revuelta,Arsenal,0.0,0,43,0.0,0,0,0.0
2,Karl,Hein,Arsenal,0.0,0,40,0.0,0,0,0.0
3,Tommy,Setford,Arsenal,0.0,0,40,0.0,0,0,0.0
4,Gabriel,dos Santos Magalhães,Arsenal,56.0,47,63,8.2,630,1,6.7


In [126]:
# points per million

new_df['ppm'] = new_df['total_points'] / (new_df['now_cost'] / 10)
new_df.sort_values(by='ppm', ascending=False)

Unnamed: 0,first_name,second_name,name,threat,total_points,now_cost,form_x,minutes,goals_scored,points_per_game,ppm
630,Omar,Alderete,Sunderland,77.0,39,41,7.5,577,1,5.6,9.512195
280,Marc,Guéhi,Crystal Palace,76.0,46,49,5.8,630,1,6.6,9.387755
124,Marcos,Senesi Barón,Bournemouth,20.0,46,50,5.2,617,0,6.6,9.200000
628,Robin,Roefs,Sunderland,0.0,41,46,6.0,630,0,5.9,8.913043
525,Dan,Burn,Newcastle,33.0,43,51,6.5,630,0,6.1,8.431373
...,...,...,...,...,...,...,...,...,...,...,...
300,Kaden,Rodney,Crystal Palace,0.0,0,45,0.0,0,0,0.0,0.000000
299,David,Ozoh,Crystal Palace,0.0,0,45,0.0,0,0,0.0,0.000000
337,Martin,Sherif,Everton,0.0,0,44,0.0,0,0,0.0,0.000000
336,Youssef,Ramalho Chermiti,Everton,0.0,0,49,0.0,0,0,0.0,0.000000


## Playing around

In [137]:
# Find Value XI

'''
Good value = 
    - played at least 500 mins
    - points per game > 5
    - points per million > 15

'''

value_xi = new_df.loc[(new_df['minutes'] >= 500) & (new_df['points_per_game'] > 5) & (new_df['ppm'] > 7)]
value_xi.nlargest(20, 'ppm')[['first_name', 'second_name', 'name', 'threat', 'total_points', 'now_cost', 'form_x', 'minutes', 'goals_scored','points_per_game', 'ppm']].reset_index(drop=True)

Unnamed: 0,first_name,second_name,name,threat,total_points,now_cost,form_x,minutes,goals_scored,points_per_game,ppm
0,Omar,Alderete,Sunderland,77.0,39,41,7.5,577,1,5.6,9.512195
1,Marc,Guéhi,Crystal Palace,76.0,46,49,5.8,630,1,6.6,9.387755
2,Marcos,Senesi Barón,Bournemouth,20.0,46,50,5.2,617,0,6.6,9.2
3,Robin,Roefs,Sunderland,0.0,41,46,6.0,630,0,5.9,8.913043
4,Dan,Burn,Newcastle,33.0,43,51,6.5,630,0,6.1,8.431373
5,Antoine,Semenyo,Bournemouth,212.0,66,79,10.8,630,6,9.4,8.35443
6,Jurriën,Timber,Arsenal,183.0,48,59,5.5,521,2,6.9,8.135593
7,Nick,Pope,Newcastle,0.0,41,51,5.8,630,0,5.9,8.039216
8,Moisés,Caicedo Corozo,Chelsea,51.0,45,58,7.0,630,3,6.4,7.758621
9,Gabriel,dos Santos Magalhães,Arsenal,56.0,47,63,8.2,630,1,6.7,7.460317
