# Data analysis of the fantasy premier league

### Goal of Analysis: Use data science to get the best possible team for the next gameweek as well as suggest the best possible transfers according to the available resources.


In [26]:
import json
import numpy as np
import pandas as pd
import requests


In [27]:
with open('D:/programming/my_projects/fpl_ai_api_django/fpl_data.json','r',encoding='utf-8') as file:
    data=json.load(file)
    teams_df=pd.DataFrame(data['teams'])
    elements_df=pd.DataFrame(data['elements'])
    element_types_df=pd.DataFrame(data['element_types'])
    

In [28]:
teams_df.head()

Unnamed: 0,code,draw,form,id,loss,name,played,points,position,short_name,...,team_division,unavailable,win,strength_overall_home,strength_overall_away,strength_attack_home,strength_attack_away,strength_defence_home,strength_defence_away,pulse_id
0,3,0,,1,0,Arsenal,0,0,0,ARS,...,,False,0,1220,1270,1240,1250,1200,1270,1
1,7,0,,2,0,Aston Villa,0,0,0,AVL,...,,False,0,1090,1100,1110,1130,1090,1110,2
2,91,0,,3,0,Bournemouth,0,0,0,BOU,...,,False,0,1060,1090,1070,1130,1050,1080,127
3,94,0,,4,0,Brentford,0,0,0,BRE,...,,False,0,1100,1130,1100,1110,1130,1160,130
4,36,0,,5,0,Brighton,0,0,0,BHA,...,,False,0,1150,1160,1140,1160,1170,1190,131


In [29]:
elements_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 740 entries, 0 to 739
Data columns (total 88 columns):
 #   Column                                Non-Null Count  Dtype  
---  ------                                --------------  -----  
 0   chance_of_playing_next_round          503 non-null    float64
 1   chance_of_playing_this_round          501 non-null    float64
 2   code                                  740 non-null    int64  
 3   cost_change_event                     740 non-null    int64  
 4   cost_change_event_fall                740 non-null    int64  
 5   cost_change_start                     740 non-null    int64  
 6   cost_change_start_fall                740 non-null    int64  
 7   dreamteam_count                       740 non-null    int64  
 8   element_type                          740 non-null    int64  
 9   ep_next                               740 non-null    object 
 10  ep_this                               740 non-null    object 
 11  event_points       

In [30]:
slim_elements_df=elements_df[['second_name','element_type','team','total_points','minutes','selected_by_percent','transfers_in','value_season']]

In [31]:
slim_elements_df

Unnamed: 0,second_name,element_type,team,total_points,minutes,selected_by_percent,transfers_in,value_season
0,Xhaka,3,1,81,1706,2.8,812298,16.5
1,Elneny,3,1,6,111,0.7,196232,1.5
2,Holding,2,1,6,16,0.1,6884,1.4
3,Partey,3,1,54,1450,0.5,96965,11.5
4,Ødegaard,3,1,119,1598,35.0,4646275,17.0
...,...,...,...,...,...,...,...,...
735,Santos Carneiro Da Cunha,4,20,5,191,0.1,16769,0.9
736,Lemina,3,20,5,165,0.0,272,1.1
737,Sarabia,3,20,2,104,0.0,2627,0.4
738,Bentley,1,20,0,0,0.0,407,0.0


In [32]:
element_types_df

Unnamed: 0,id,plural_name,plural_name_short,singular_name,singular_name_short,squad_select,squad_min_play,squad_max_play,ui_shirt_specific,sub_positions_locked,element_count
0,1,Goalkeepers,GKP,Goalkeeper,GKP,2,1,1,True,[12],77
1,2,Defenders,DEF,Defender,DEF,5,3,5,False,[],250
2,3,Midfielders,MID,Midfielder,MID,5,2,5,False,[],321
3,4,Forwards,FWD,Forward,FWD,3,1,3,False,[],92


In [33]:
elements_df['position'] = elements_df.element_type.map(element_types_df.set_index('id').singular_name)

In [34]:
elements_df['value'] = elements_df.value_season.astype(float)

In [35]:
elements_df['team'] = elements_df.team.map(teams_df.set_index('id').name)

In [36]:
elements_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 740 entries, 0 to 739
Data columns (total 90 columns):
 #   Column                                Non-Null Count  Dtype  
---  ------                                --------------  -----  
 0   chance_of_playing_next_round          503 non-null    float64
 1   chance_of_playing_this_round          501 non-null    float64
 2   code                                  740 non-null    int64  
 3   cost_change_event                     740 non-null    int64  
 4   cost_change_event_fall                740 non-null    int64  
 5   cost_change_start                     740 non-null    int64  
 6   cost_change_start_fall                740 non-null    int64  
 7   dreamteam_count                       740 non-null    int64  
 8   element_type                          740 non-null    int64  
 9   ep_next                               740 non-null    object 
 10  ep_this                               740 non-null    object 
 11  event_points       

In [37]:
slim_elements_df=elements_df[['second_name','position','team','total_points','minutes','selected_by_percent','transfers_in','value_season','value']]

In [38]:
slim_elements_df

Unnamed: 0,second_name,position,team,total_points,minutes,selected_by_percent,transfers_in,value_season,value
0,Xhaka,Midfielder,Arsenal,81,1706,2.8,812298,16.5,16.5
1,Elneny,Midfielder,Arsenal,6,111,0.7,196232,1.5,1.5
2,Holding,Defender,Arsenal,6,16,0.1,6884,1.4,1.4
3,Partey,Midfielder,Arsenal,54,1450,0.5,96965,11.5,11.5
4,Ødegaard,Midfielder,Arsenal,119,1598,35.0,4646275,17.0,17.0
...,...,...,...,...,...,...,...,...,...
735,Santos Carneiro Da Cunha,Forward,Wolves,5,191,0.1,16769,0.9,0.9
736,Lemina,Midfielder,Wolves,5,165,0.0,272,1.1,1.1
737,Sarabia,Midfielder,Wolves,2,104,0.0,2627,0.4,0.4
738,Bentley,Goalkeeper,Wolves,0,0,0.0,407,0.0,0.0


In [39]:
slim_elements_df.sort_values('value',ascending=False).head(10)

Unnamed: 0,second_name,position,team,total_points,minutes,selected_by_percent,transfers_in,value_season,value
525,Trippier,Defender,Newcastle,143,1864,69.6,6481031,23.4,23.4
115,Raya Martin,Goalkeeper,Brentford,98,1890,9.1,1312839,20.9,20.9
537,Almirón Rejala,Midfielder,Newcastle,114,1736,34.3,5513952,20.0,20.0
526,Burn,Defender,Newcastle,89,1764,4.2,580384,19.3,19.3
545,Botman,Defender,Newcastle,87,1597,6.3,893811,19.3,19.3
138,Mee,Defender,Brentford,92,1748,11.5,1842154,19.2,19.2
544,Pope,Goalkeeper,Newcastle,105,1890,25.5,3555599,19.1,19.1
534,Schär,Defender,Newcastle,99,1784,17.2,2926916,19.0,19.0
7,White,Defender,Arsenal,86,1593,13.8,2007524,18.3,18.3
326,Hoelgebaum Pereira,Midfielder,Fulham,80,1818,18.9,2823398,18.2,18.2


In [40]:
slim_elements_df.pivot_table(index='position',values='value',aggfunc=np.mean).reset_index()

Unnamed: 0,position,value
0,Defender,4.7632
1,Forward,3.769565
2,Goalkeeper,4.350649
3,Midfielder,4.44486


In [41]:
pivot=slim_elements_df.pivot_table(index='position',values='value',aggfunc=np.mean).reset_index()
pivot.sort_values('value',ascending=False)

Unnamed: 0,position,value
0,Defender,4.7632
3,Midfielder,4.44486
2,Goalkeeper,4.350649
1,Forward,3.769565


In [42]:
slim_elements_df = slim_elements_df.loc[slim_elements_df.value > 0]

In [43]:
pivot = slim_elements_df.pivot_table(index='position',values='value',aggfunc=np.mean).reset_index()

In [44]:
pivot.sort_values('value',ascending=False)

Unnamed: 0,position,value
2,Goalkeeper,11.551724
0,Defender,6.730508
3,Midfielder,5.945
1,Forward,5.180597


In [45]:
team_pivot = slim_elements_df.pivot_table(index='team',values='value',aggfunc=np.mean).reset_index()

In [46]:
team_pivot.sort_values('value',ascending=False)

Unnamed: 0,team,value
14,Newcastle,9.556522
0,Arsenal,9.343478
3,Brentford,7.852
12,Man City,7.714286
4,Brighton,7.426087
13,Man Utd,7.220833
6,Crystal Palace,6.981818
10,Leicester,6.85
17,Spurs,6.72
1,Aston Villa,6.678261


In [47]:
fwd_df = slim_elements_df.loc[slim_elements_df.position == 'Forward']
mid_df = slim_elements_df.loc[slim_elements_df.position == 'Midfielder']
def_df = slim_elements_df.loc[slim_elements_df.position == 'Defender']
goal_df = slim_elements_df.loc[slim_elements_df.position == 'Goalkeeper']

In [48]:
goal_df.value.hist()

ImportError: matplotlib is required for plotting when the default backend "matplotlib" is selected.