# Setup

## Get data and put into pandas

In [1]:
import requests
import pandas as pd
import numpy as np
url = 'https://fantasy.premierleague.com/api/bootstrap-static/'
r = requests.get(url)
json = r.json()

In [2]:
elements_df = pd.DataFrame(json['elements'])
elements_types_df = pd.DataFrame(json['element_types'])
teams_df = pd.DataFrame(json['teams'])
events_df = pd.DataFrame(json["events"])
completed_events_df = events_df.loc[events_df.finished == True]
COMPLETED_GW = completed_events_df['id'].iloc[-1]
slim_elements_df = elements_df[['second_name','team','element_type','value_season','total_points', 'now_cost', 'minutes']]

## Clean data

In [3]:
slim_elements_df['position'] = slim_elements_df.element_type.map(elements_types_df.set_index('id').singular_name)
slim_elements_df['team'] = slim_elements_df.team.map(teams_df.set_index('id').name)
slim_elements_df['value'] = slim_elements_df.value_season.astype(float)
slim_elements_df = slim_elements_df.drop(columns=['element_type', 'value_season'])
slim_elements_df.style.hide_index()
slim_elements_df.sort_values('value',ascending=False)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  This is separate from the ipykernel package so we can avoid doing imports until


Unnamed: 0,second_name,team,total_points,now_cost,minutes,position,value
310,Tsimikas,Liverpool,17,41,180,Defender,4.1
95,Duffy,Brighton,16,41,180,Defender,3.9
152,Alonso,Chelsea,21,56,175,Defender,3.8
448,Lloris,Spurs,21,55,180,Goalkeeper,3.8
79,Pinnock,Brentford,17,46,180,Defender,3.7
172,James,Chelsea,19,55,113,Defender,3.5
64,Jansson,Brentford,15,45,180,Defender,3.3
68,Raya Martin,Brentford,14,45,180,Goalkeeper,3.1
43,Mings,Aston Villa,15,50,180,Defender,3.0
462,Sánchez,Spurs,13,45,180,Defender,2.9


# Analysis

## Positional Value

Note: Players who don't play much (have a value of > 0) are removed. Don't pick players who don't play.

Outputs the mean average value of a player by position.

In [4]:
slim_elements_df = slim_elements_df.loc[slim_elements_df.minutes > (70 * COMPLETED_GW)]
pivot = slim_elements_df.pivot_table(index='position',values='value',aggfunc=np.mean).reset_index()
pivot.sort_values('value',ascending=False)

Unnamed: 0,position,value
2,Goalkeeper,1.425
0,Defender,1.204348
1,Forward,1.0125
3,Midfielder,0.992453


### Positional total points
Mean total points of players with a value of over 15 (not shit players)

In [5]:
slim_high_value_elements_df = slim_elements_df.loc[slim_elements_df.total_points > (1.75 * COMPLETED_GW)]
pivot = slim_high_value_elements_df.pivot_table(index='position',values='total_points',aggfunc=np.mean).reset_index()
pivot.sort_values('total_points',ascending=False)

Unnamed: 0,position,total_points
2,Goalkeeper,8.111111
0,Defender,7.642857
1,Forward,7.291667
3,Midfielder,6.113208


In [6]:
elements_df.keys()

Index(['assists', 'bonus', 'bps', 'chance_of_playing_next_round',
       'chance_of_playing_this_round', 'clean_sheets', 'code',
       'corners_and_indirect_freekicks_order',
       'corners_and_indirect_freekicks_text', 'cost_change_event',
       'cost_change_event_fall', 'cost_change_start', 'cost_change_start_fall',
       'creativity', 'creativity_rank', 'creativity_rank_type',
       'direct_freekicks_order', 'direct_freekicks_text', 'dreamteam_count',
       'element_type', 'ep_next', 'ep_this', 'event_points', 'first_name',
       'form', 'goals_conceded', 'goals_scored', 'ict_index', 'ict_index_rank',
       'ict_index_rank_type', 'id', 'in_dreamteam', 'influence',
       'influence_rank', 'influence_rank_type', 'minutes', 'news',
       'news_added', 'now_cost', 'own_goals', 'penalties_missed',
       'penalties_order', 'penalties_saved', 'penalties_text', 'photo',
       'points_per_game', 'red_cards', 'saves', 'second_name',
       'selected_by_percent', 'special', 'squad_

## Team Value

Same game but by teams

In [7]:
team_pivot = slim_elements_df.pivot_table(index='team',values='value',aggfunc=np.mean).reset_index()
team_pivot.sort_values('value',ascending=False)

Unnamed: 0,team,value
2,Brentford,2.190909
16,Spurs,2.009091
5,Chelsea,1.864286
10,Liverpool,1.741667
3,Brighton,1.633333
1,Aston Villa,1.341667
9,Leicester,1.144444
12,Man Utd,1.107692
6,Crystal Palace,1.092308
7,Everton,1.015385


## Team Points

Same game but by teams

In [12]:
team_pivot = slim_elements_df.pivot_table(index='team',values='total_points',aggfunc=np.mean).reset_index()
team_pivot.sort_values('total_points',ascending=False)

Unnamed: 0,team,total_points
10,Liverpool,11.416667
5,Chelsea,11.071429
16,Spurs,10.909091
2,Brentford,10.545455
3,Brighton,8.25
12,Man Utd,7.692308
1,Aston Villa,7.5
11,Man City,6.75
7,Everton,6.076923
9,Leicester,6.0


## Most valuable player by position

In [8]:
fwd_df = slim_elements_df.loc[slim_elements_df.position == 'Forward']
mid_df = slim_elements_df.loc[slim_elements_df.position == 'Midfielder']
def_df = slim_elements_df.loc[slim_elements_df.position == 'Defender']
goal_df = slim_elements_df.loc[slim_elements_df.position == 'Goalkeeper']

### Keepers

In [9]:
# Value players
goal_df.sort_values('value',ascending=False).head(10)

Unnamed: 0,second_name,team,total_points,now_cost,minutes,position,value
448,Lloris,Spurs,21,55,180,Goalkeeper,3.8
68,Raya Martin,Brentford,14,45,180,Goalkeeper,3.1
296,Ramses Becker,Liverpool,15,60,180,Goalkeeper,2.5
179,Guaita,Crystal Palace,11,45,180,Goalkeeper,2.4
173,Mendy,Chelsea,14,60,180,Goalkeeper,2.3
107,Sánchez,Brighton,8,45,180,Goalkeeper,1.8
234,Schmeichel,Leicester,9,50,90,Goalkeeper,1.8
392,Woodman,Newcastle,7,45,180,Goalkeeper,1.6
36,Martínez,Aston Villa,7,55,180,Goalkeeper,1.3
325,Santana de Moraes,Man City,8,60,180,Goalkeeper,1.3


In [10]:
# points players
goal_df.sort_values('total_points',ascending=False).head(10)

Unnamed: 0,second_name,team,total_points,now_cost,minutes,position,value
448,Lloris,Spurs,21,55,180,Goalkeeper,3.8
296,Ramses Becker,Liverpool,15,60,180,Goalkeeper,2.5
68,Raya Martin,Brentford,14,45,180,Goalkeeper,3.1
173,Mendy,Chelsea,14,60,180,Goalkeeper,2.3
179,Guaita,Crystal Palace,11,45,180,Goalkeeper,2.4
234,Schmeichel,Leicester,9,50,90,Goalkeeper,1.8
107,Sánchez,Brighton,8,45,180,Goalkeeper,1.8
325,Santana de Moraes,Man City,8,60,180,Goalkeeper,1.3
36,Martínez,Aston Villa,7,55,180,Goalkeeper,1.3
392,Woodman,Newcastle,7,45,180,Goalkeeper,1.6


In [11]:
# Value spread
goal_df.value.hist()

ImportError: No module named 'matplotlib'

In [None]:
# Total Points Spread
goal_df.total_points.hist()

### Defenders

In [None]:
# Value players
def_df.sort_values('value',ascending=False).head(10)

In [None]:
# points players
def_df.sort_values('total_points',ascending=False).head(10)

In [None]:
# Value spread
def_df.value.hist()

In [None]:
# Total Points Spread
def_df.total_points.hist()

### Midfielders

In [None]:
# Value players
mid_df.sort_values('value',ascending=False).head(10)

In [None]:
# points players
mid_df.sort_values('total_points',ascending=False).head(10)

In [None]:
# Value spread
mid_df.value.hist()

In [None]:
# Total Points Spread
mid_df.total_points.hist()

### Forwards

In [None]:
# Value players
fwd_df.sort_values('value',ascending=False).head(10)

In [None]:
# points players
fwd_df.sort_values('total_points',ascending=False).head(10)

In [None]:
# Value spread
fwd_df.value.hist()

In [None]:
# Total Points Spread
fwd_df.total_points.hist()

## Value and points teams

In [None]:
metric_type = 'total_points'

total_points_team_df = pd.concat([
  goal_df.sort_values(metric_type,ascending=False).head(2),
  def_df.sort_values(metric_type,ascending=False).head(5),
  mid_df.sort_values(metric_type,ascending=False).head(5),
  fwd_df.sort_values(metric_type,ascending=False).head(3)
], ignore_index=False)

metric_type = 'value'

value_team_df = pd.concat([
  goal_df.sort_values(metric_type,ascending=False).head(2),
  def_df.sort_values(metric_type,ascending=False).head(5),
  mid_df.sort_values(metric_type,ascending=False).head(5),
  fwd_df.sort_values(metric_type,ascending=False).head(3)
], ignore_index=False)

d = {'Total Points': [total_points_team_df.total_points.sum(), value_team_df.total_points.sum()], 'Value': [total_points_team_df.value.sum(), value_team_df.value.sum()], 'Cost': [total_points_team_df.now_cost.sum(), value_team_df.now_cost.sum()]}
df = pd.DataFrame(data=d, index=['Total Points', 'Value',])

### Points Team

In [None]:
total_points_team_df.head(20)

### Value Team

In [None]:
value_team_df.head(20)

In [None]:
### Total points and total value of the 2 teams

In [None]:
df.head()

# Futher analysis

Prehaps the most interesting stat to come from the value analysis was the correlation between MST calling a player a twat in the 20/21 season and their likelyhood of ending up in the top 10 best value players of their position. A **100%** record.

**Good value twats:** Bamford, Soucek, Ward-Prowse, Richarleson