# Setup

## Get data and put into pandas

In [11]:
import requests
import pandas as pd
import numpy as np
url = 'https://fantasy.premierleague.com/api/bootstrap-static/'
r = requests.get(url)
json = r.json()

In [12]:
elements_df = pd.DataFrame(json['elements'])
elements_types_df = pd.DataFrame(json['element_types'])
teams_df = pd.DataFrame(json['teams'])
slim_elements_df = elements_df[['second_name','team','element_type','value_season','total_points', 'now_cost']]

## Clean data

In [13]:
slim_elements_df['position'] = slim_elements_df.element_type.map(elements_types_df.set_index('id').singular_name)
slim_elements_df['team'] = slim_elements_df.team.map(teams_df.set_index('id').name)
slim_elements_df['value'] = slim_elements_df.value_season.astype(float)
slim_elements_df = slim_elements_df.drop(columns=['element_type', 'value_season'])
slim_elements_df.style.hide_index()
slim_elements_df.sort_values('value',ascending=False)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  This is separate from the ipykernel package so we can avoid doing imports until


Unnamed: 0,second_name,team,total_points,now_cost,position,value
310,Tsimikas,Liverpool,17,41,Defender,4.1
95,Duffy,Brighton,16,41,Defender,3.9
152,Alonso,Chelsea,21,56,Defender,3.8
448,Lloris,Spurs,21,55,Goalkeeper,3.8
79,Pinnock,Brentford,17,46,Defender,3.7
172,James,Chelsea,19,55,Defender,3.5
64,Jansson,Brentford,15,45,Defender,3.3
68,Raya Martin,Brentford,14,45,Goalkeeper,3.1
43,Mings,Aston Villa,15,50,Defender,3.0
462,Sánchez,Spurs,13,45,Defender,2.9


# Analysis

## Positional Value

Note: Players who don't play much (have a value of > 0) are removed. Don't pick players who don't play.

Outputs the mean average value of a player by position.

In [14]:
slim_elements_df = slim_elements_df.loc[slim_elements_df.value > 0]
pivot = slim_elements_df.pivot_table(index='position',values='value',aggfunc=np.mean).reset_index()
pivot.sort_values('value',ascending=False)

Unnamed: 0,position,value
2,Goalkeeper,1.5
0,Defender,1.130693
3,Midfielder,0.789262
1,Forward,0.72381


### Positional total points
Mean total points of players with a value of over 15 (not shit players)

In [37]:
slim_high_value_elements_df = slim_elements_df.loc[slim_elements_df.value > 0]
pivot = slim_high_value_elements_df.pivot_table(index='position',values='total_points',aggfunc=np.mean).reset_index()
pivot.sort_values('total_points',ascending=False)

Unnamed: 0,position,total_points
2,Goalkeeper,7.736842
0,Defender,5.613861
1,Forward,5.02381
3,Midfielder,4.805369


## Team Value

Same game but by teams

In [16]:
team_pivot = slim_elements_df.pivot_table(index='team',values='value',aggfunc=np.mean).reset_index()
team_pivot.sort_values('value',ascending=False)

Unnamed: 0,team,value
2,Brentford,1.58125
5,Chelsea,1.564706
16,Spurs,1.533333
3,Brighton,1.406667
10,Liverpool,1.347059
1,Aston Villa,1.058824
12,Man Utd,0.98
6,Crystal Palace,0.973333
11,Man City,0.863158
7,Everton,0.8625


## Most valuable player by position

In [17]:
fwd_df = slim_elements_df.loc[slim_elements_df.position == 'Forward']
mid_df = slim_elements_df.loc[slim_elements_df.position == 'Midfielder']
def_df = slim_elements_df.loc[slim_elements_df.position == 'Defender']
goal_df = slim_elements_df.loc[slim_elements_df.position == 'Goalkeeper']

### Keepers

In [18]:
# Value players
goal_df.sort_values('value',ascending=False).head(10)

Unnamed: 0,second_name,team,total_points,now_cost,position,value
448,Lloris,Spurs,21,55,Goalkeeper,3.8
68,Raya Martin,Brentford,14,45,Goalkeeper,3.1
296,Ramses Becker,Liverpool,15,60,Goalkeeper,2.5
179,Guaita,Crystal Palace,11,45,Goalkeeper,2.4
173,Mendy,Chelsea,14,60,Goalkeeper,2.3
107,Sánchez,Brighton,8,45,Goalkeeper,1.8
234,Schmeichel,Leicester,9,50,Goalkeeper,1.8
392,Woodman,Newcastle,7,45,Goalkeeper,1.6
36,Martínez,Aston Villa,7,55,Goalkeeper,1.3
325,Santana de Moraes,Man City,8,60,Goalkeeper,1.3


In [19]:
# points players
goal_df.sort_values('total_points',ascending=False).head(10)

Unnamed: 0,second_name,team,total_points,now_cost,position,value
448,Lloris,Spurs,21,55,Goalkeeper,3.8
296,Ramses Becker,Liverpool,15,60,Goalkeeper,2.5
68,Raya Martin,Brentford,14,45,Goalkeeper,3.1
173,Mendy,Chelsea,14,60,Goalkeeper,2.3
179,Guaita,Crystal Palace,11,45,Goalkeeper,2.4
234,Schmeichel,Leicester,9,50,Goalkeeper,1.8
107,Sánchez,Brighton,8,45,Goalkeeper,1.8
325,Santana de Moraes,Man City,8,60,Goalkeeper,1.3
36,Martínez,Aston Villa,7,55,Goalkeeper,1.3
392,Woodman,Newcastle,7,45,Goalkeeper,1.6


In [20]:
# Value spread
goal_df.value.hist()

ImportError: No module named 'matplotlib'

In [21]:
# Total Points Spread
goal_df.total_points.hist()

ImportError: No module named 'matplotlib'

### Defenders

In [22]:
# Value players
def_df.sort_values('value',ascending=False).head(10)

Unnamed: 0,second_name,team,total_points,now_cost,position,value
310,Tsimikas,Liverpool,17,41,Defender,4.1
95,Duffy,Brighton,16,41,Defender,3.9
152,Alonso,Chelsea,21,56,Defender,3.8
79,Pinnock,Brentford,17,46,Defender,3.7
172,James,Chelsea,19,55,Defender,3.5
64,Jansson,Brentford,15,45,Defender,3.3
43,Mings,Aston Villa,15,50,Defender,3.0
462,Sánchez,Spurs,13,45,Defender,2.9
456,Dier,Spurs,13,45,Defender,2.9
175,Chalobah,Chelsea,14,50,Defender,2.8


In [23]:
# points players
def_df.sort_values('total_points',ascending=False).head(10)

Unnamed: 0,second_name,team,total_points,now_cost,position,value
152,Alonso,Chelsea,21,56,Defender,3.8
172,James,Chelsea,19,55,Defender,3.5
302,Alexander-Arnold,Liverpool,18,75,Defender,2.4
79,Pinnock,Brentford,17,46,Defender,3.7
310,Tsimikas,Liverpool,17,41,Defender,4.1
95,Duffy,Brighton,16,41,Defender,3.9
43,Mings,Aston Villa,15,50,Defender,3.0
64,Jansson,Brentford,15,45,Defender,3.3
327,Laporte,Man City,14,55,Defender,2.5
175,Chalobah,Chelsea,14,50,Defender,2.8


In [24]:
# Value spread
def_df.value.hist()

ImportError: No module named 'matplotlib'

In [25]:
# Total Points Spread
def_df.total_points.hist()

ImportError: No module named 'matplotlib'

### Midfielders

In [26]:
# Value players
mid_df.sort_values('value',ascending=False).head(10)

Unnamed: 0,second_name,team,total_points,now_cost,position,value
343,Pogba,Man Utd,20,77,Midfielder,2.6
69,Canós,Brentford,14,55,Midfielder,2.5
359,Greenwood,Man Utd,18,76,Midfielder,2.4
108,Bissouma,Brighton,10,45,Midfielder,2.2
216,Doucouré,Everton,12,55,Midfielder,2.2
112,Mac Allister,Brighton,12,55,Midfielder,2.2
305,Jota,Liverpool,16,76,Midfielder,2.1
65,Nørgaard,Brentford,10,50,Midfielder,2.0
264,Klich,Leeds,11,55,Midfielder,2.0
227,Gray,Everton,11,55,Midfielder,2.0


In [27]:
# points players
mid_df.sort_values('total_points',ascending=False).head(10)

Unnamed: 0,second_name,team,total_points,now_cost,position,value
348,Borges Fernandes,Man Utd,21,121,Midfielder,1.7
343,Pogba,Man Utd,20,77,Midfielder,2.6
298,Salah,Liverpool,20,126,Midfielder,1.6
359,Greenwood,Man Utd,18,76,Midfielder,2.4
305,Jota,Liverpool,16,76,Midfielder,2.1
69,Canós,Brentford,14,55,Midfielder,2.5
454,Son,Spurs,13,100,Midfielder,1.3
458,Alli,Spurs,12,65,Midfielder,1.8
168,Mount,Chelsea,12,75,Midfielder,1.6
216,Doucouré,Everton,12,55,Midfielder,2.2


In [28]:
# Value spread
mid_df.value.hist()

ImportError: No module named 'matplotlib'

In [29]:
# Total Points Spread
mid_df.total_points.hist()

ImportError: No module named 'matplotlib'

### Forwards

In [30]:
# Value players
fwd_df.sort_values('value',ascending=False).head(10)

Unnamed: 0,second_name,team,total_points,now_cost,position,value
505,Dennis,Watford,14,51,Forward,2.7
59,Ings,Aston Villa,15,81,Forward,1.9
520,Antonio,West Ham,13,76,Forward,1.7
223,de Andrade,Everton,13,76,Forward,1.7
102,Maupay,Brighton,11,65,Forward,1.7
220,Calvert-Lewin,Everton,14,81,Forward,1.7
433,Adams,Southampton,10,70,Forward,1.4
510,Hernández Suárez,Watford,7,50,Forward,1.4
331,de Jesus,Man City,12,85,Forward,1.4
445,Armstrong,Southampton,8,60,Forward,1.3


In [31]:
# points players
fwd_df.sort_values('total_points',ascending=False).head(10)

Unnamed: 0,second_name,team,total_points,now_cost,position,value
59,Ings,Aston Villa,15,81,Forward,1.9
505,Dennis,Watford,14,51,Forward,2.7
220,Calvert-Lewin,Everton,14,81,Forward,1.7
223,de Andrade,Everton,13,76,Forward,1.7
520,Antonio,West Ham,13,76,Forward,1.7
331,de Jesus,Man City,12,85,Forward,1.4
102,Maupay,Brighton,11,65,Forward,1.7
433,Adams,Southampton,10,70,Forward,1.4
445,Armstrong,Southampton,8,60,Forward,1.3
385,Saint-Maximin,Newcastle,8,65,Forward,1.2


In [32]:
# Value spread
fwd_df.value.hist()

ImportError: No module named 'matplotlib'

In [33]:
# Total Points Spread
fwd_df.total_points.hist()

ImportError: No module named 'matplotlib'

## Value and points teams

In [34]:
metric_type = 'total_points'

total_points_team_df = pd.concat([
  goal_df.sort_values(metric_type,ascending=False).head(2),
  def_df.sort_values(metric_type,ascending=False).head(5),
  mid_df.sort_values(metric_type,ascending=False).head(5),
  fwd_df.sort_values(metric_type,ascending=False).head(3)
], ignore_index=False)

metric_type = 'value'

value_team_df = pd.concat([
  goal_df.sort_values(metric_type,ascending=False).head(2),
  def_df.sort_values(metric_type,ascending=False).head(5),
  mid_df.sort_values(metric_type,ascending=False).head(5),
  fwd_df.sort_values(metric_type,ascending=False).head(3)
], ignore_index=False)

d = {'Total Points': [total_points_team_df.total_points.sum(), value_team_df.total_points.sum()], 'Value': [total_points_team_df.value.sum(), value_team_df.value.sum()], 'Cost': [total_points_team_df.now_cost.sum(), value_team_df.now_cost.sum()]}
df = pd.DataFrame(data=d, index=['Total Points', 'Value',])

### Points Team

In [35]:
total_points_team_df.head(20)

Unnamed: 0,second_name,team,total_points,now_cost,position,value
448,Lloris,Spurs,21,55,Goalkeeper,3.8
296,Ramses Becker,Liverpool,15,60,Goalkeeper,2.5
152,Alonso,Chelsea,21,56,Defender,3.8
172,James,Chelsea,19,55,Defender,3.5
302,Alexander-Arnold,Liverpool,18,75,Defender,2.4
79,Pinnock,Brentford,17,46,Defender,3.7
310,Tsimikas,Liverpool,17,41,Defender,4.1
348,Borges Fernandes,Man Utd,21,121,Midfielder,1.7
343,Pogba,Man Utd,20,77,Midfielder,2.6
298,Salah,Liverpool,20,126,Midfielder,1.6


### Value Team

In [None]:
value_team_df.head(20)

In [None]:
### Total points and total value of the 2 teams

In [None]:
df.head()

# Futher analysis

Prehaps the most interesting stat to come from the value analysis was the correlation between MST calling a player a twat in the 20/21 season and their likelyhood of ending up in the top 10 best value players of their position. A **100%** record.

**Good value twats:** Bamford, Soucek, Ward-Prowse, Richarleson