# FPL Player Points Forecast

In [1]:
#Import libraries

import requests
import pandas as pd
import numpy as np
import unicodedata

In [2]:
#Configure API

url = 'https://fantasy.premierleague.com/api/bootstrap-static/'

In [3]:
r = requests.get(url)

In [4]:
json = r.json()

In [5]:
json.keys()

dict_keys(['events', 'game_settings', 'phases', 'teams', 'total_players', 'elements', 'element_stats', 'element_types'])

# Extracting data needed

In [6]:
elements_df = pd.DataFrame(json['elements'])
elements_types_df = pd.DataFrame(json['element_types'])
teams_df = pd.DataFrame(json['teams'])

In [7]:
#List of all columns

elements_df.columns

Index(['chance_of_playing_next_round', 'chance_of_playing_this_round', 'code',
       'cost_change_event', 'cost_change_event_fall', 'cost_change_start',
       'cost_change_start_fall', 'dreamteam_count', 'element_type', 'ep_next',
       'ep_this', 'event_points', 'first_name', 'form', 'id', 'in_dreamteam',
       'news', 'news_added', 'now_cost', 'photo', 'points_per_game',
       'second_name', 'selected_by_percent', 'special', 'squad_number',
       'status', 'team', 'team_code', 'total_points', 'transfers_in',
       'transfers_in_event', 'transfers_out', 'transfers_out_event',
       'value_form', 'value_season', 'web_name', 'minutes', 'goals_scored',
       'assists', 'clean_sheets', 'goals_conceded', 'own_goals',
       'penalties_saved', 'penalties_missed', 'yellow_cards', 'red_cards',
       'saves', 'bonus', 'bps', 'influence', 'creativity', 'threat',
       'ict_index', 'influence_rank', 'influence_rank_type', 'creativity_rank',
       'creativity_rank_type', 'threat_rank'

In [8]:
#Selecting columns wanted

slim_elements_df = elements_df[['first_name','second_name','team','element_type','selected_by_percent','now_cost','minutes','transfers_in','value_season','total_points']]

In [9]:
slim_elements_df.head()

Unnamed: 0,first_name,second_name,team,element_type,selected_by_percent,now_cost,minutes,transfers_in,value_season,total_points
0,Mesut,Özil,1,3,0.7,68,0,3196,0.0,0
1,Sokratis,Papastathopoulos,1,2,0.1,49,0,10176,0.0,0
2,David,Luiz Moreira Marinho,1,2,1.1,55,271,25882,1.1,6
3,Pierre-Emerick,Aubameyang,1,3,17.0,118,450,345268,1.5,18
4,Cédric,Soares,1,2,0.2,48,0,2318,0.0,0


In [10]:
slim_elements_df['position'] = slim_elements_df.element_type.map(elements_types_df.set_index('id').singular_name)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.


In [11]:
slim_elements_df.head()

Unnamed: 0,first_name,second_name,team,element_type,selected_by_percent,now_cost,minutes,transfers_in,value_season,total_points,position
0,Mesut,Özil,1,3,0.7,68,0,3196,0.0,0,Midfielder
1,Sokratis,Papastathopoulos,1,2,0.1,49,0,10176,0.0,0,Defender
2,David,Luiz Moreira Marinho,1,2,1.1,55,271,25882,1.1,6,Defender
3,Pierre-Emerick,Aubameyang,1,3,17.0,118,450,345268,1.5,18,Midfielder
4,Cédric,Soares,1,2,0.2,48,0,2318,0.0,0,Defender


In [12]:
slim_elements_df['team'] = slim_elements_df.team.map(teams_df.set_index('id').name)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.


In [13]:
slim_elements_df.head()

Unnamed: 0,first_name,second_name,team,element_type,selected_by_percent,now_cost,minutes,transfers_in,value_season,total_points,position
0,Mesut,Özil,Arsenal,3,0.7,68,0,3196,0.0,0,Midfielder
1,Sokratis,Papastathopoulos,Arsenal,2,0.1,49,0,10176,0.0,0,Defender
2,David,Luiz Moreira Marinho,Arsenal,2,1.1,55,271,25882,1.1,6,Defender
3,Pierre-Emerick,Aubameyang,Arsenal,3,17.0,118,450,345268,1.5,18,Midfielder
4,Cédric,Soares,Arsenal,2,0.2,48,0,2318,0.0,0,Defender


In [14]:
slim_elements_df['value'] = slim_elements_df.value_season.astype(float)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.


In [15]:
slim_elements_df.sort_values('value',ascending=False).tail(20)

Unnamed: 0,first_name,second_name,team,element_type,selected_by_percent,now_cost,minutes,transfers_in,value_season,total_points,position,value
260,Filip,Benkovic,Leicester,2,0.0,44,0,156,0.0,0,Defender,0.0
291,Oliver,Casey,Leeds,2,0.7,40,0,8540,0.0,0,Defender,0.0
314,Harry,Wilson,Liverpool,3,0.0,52,0,419,0.0,0,Midfielder,0.0
313,Divock,Origi,Liverpool,4,0.7,53,0,13871,0.0,0,Forward,0.0
305,Alex,Oxlade-Chamberlain,Liverpool,3,0.0,64,0,349,0.0,0,Midfielder,0.0
304,Xherdan,Shaqiri,Liverpool,3,0.1,64,0,737,0.0,0,Midfielder,0.0
299,Konstantinos,Tsimikas,Liverpool,2,0.0,49,0,475,0.0,0,Defender,0.0
296,Diego,Llorente,Leeds,2,0.0,45,0,2469,0.0,0,Defender,0.0
295,Mateusz,Bogusz,Leeds,3,0.0,45,0,1996,0.0,0,Midfielder,0.0
1,Sokratis,Papastathopoulos,Arsenal,2,0.1,49,0,10176,0.0,0,Defender,0.0


In [16]:
model_elements_df = slim_elements_df[['first_name','second_name','team', 'position','selected_by_percent','now_cost','total_points']]

In [17]:
model_elements_df.head()

Unnamed: 0,first_name,second_name,team,position,selected_by_percent,now_cost,total_points
0,Mesut,Özil,Arsenal,Midfielder,0.7,68,0
1,Sokratis,Papastathopoulos,Arsenal,Defender,0.1,49,0
2,David,Luiz Moreira Marinho,Arsenal,Defender,1.1,55,6
3,Pierre-Emerick,Aubameyang,Arsenal,Midfielder,17.0,118,18
4,Cédric,Soares,Arsenal,Defender,0.2,48,0


# Adding points to master dataframe

In [18]:
Points = {'position' : ['Goalkeeper', 'Defender', 'Midfielder', 'Forward'],
            'CS Points' : [4, 4, 1, 0],
             '1Goal Points' : [6, 6, 5, 4],
                '2Goal Points' : [12, 12, 10, 8]}

In [19]:
#Creating points dataframe

fullPoints = pd.DataFrame(Points, columns = ['position', 'CS Points', '1Goal Points', '2Goal Points'])
fullPoints

Unnamed: 0,position,CS Points,1Goal Points,2Goal Points
0,Goalkeeper,4,6,12
1,Defender,4,6,12
2,Midfielder,1,5,10
3,Forward,0,4,8


In [20]:
#Merging dataframes

inner_join = pd.merge(model_elements_df,
                     fullPoints,
                     on = 'position',
                     how = 'left')

inner_join

Unnamed: 0,first_name,second_name,team,position,selected_by_percent,now_cost,total_points,CS Points,1Goal Points,2Goal Points
0,Mesut,Özil,Arsenal,Midfielder,0.7,68,0,1,5,10
1,Sokratis,Papastathopoulos,Arsenal,Defender,0.1,49,0,4,6,12
2,David,Luiz Moreira Marinho,Arsenal,Defender,1.1,55,6,4,6,12
3,Pierre-Emerick,Aubameyang,Arsenal,Midfielder,17.0,118,18,1,5,10
4,Cédric,Soares,Arsenal,Defender,0.2,48,0,4,6,12
...,...,...,...,...,...,...,...,...,...,...
583,Ki-Jana,Hoever,Wolves,Defender,0.0,45,2,4,6,12
584,Nélson,Cabral Semedo,Wolves,Defender,2.1,55,13,4,6,12
585,Meritan,Shabani,Wolves,Midfielder,0.0,45,0,1,5,10
586,Rayan,Ait Nouri,Wolves,Defender,0.0,50,0,4,6,12


In [21]:
inner_join["Selection"] = inner_join["first_name"] + " " + inner_join["second_name"]
inner_join

Unnamed: 0,first_name,second_name,team,position,selected_by_percent,now_cost,total_points,CS Points,1Goal Points,2Goal Points,Selection
0,Mesut,Özil,Arsenal,Midfielder,0.7,68,0,1,5,10,Mesut Özil
1,Sokratis,Papastathopoulos,Arsenal,Defender,0.1,49,0,4,6,12,Sokratis Papastathopoulos
2,David,Luiz Moreira Marinho,Arsenal,Defender,1.1,55,6,4,6,12,David Luiz Moreira Marinho
3,Pierre-Emerick,Aubameyang,Arsenal,Midfielder,17.0,118,18,1,5,10,Pierre-Emerick Aubameyang
4,Cédric,Soares,Arsenal,Defender,0.2,48,0,4,6,12,Cédric Soares
...,...,...,...,...,...,...,...,...,...,...,...
583,Ki-Jana,Hoever,Wolves,Defender,0.0,45,2,4,6,12,Ki-Jana Hoever
584,Nélson,Cabral Semedo,Wolves,Defender,2.1,55,13,4,6,12,Nélson Cabral Semedo
585,Meritan,Shabani,Wolves,Midfielder,0.0,45,0,1,5,10,Meritan Shabani
586,Rayan,Ait Nouri,Wolves,Defender,0.0,50,0,4,6,12,Rayan Ait Nouri


In [22]:
#Remove accents from names

inner_join = inner_join.replace({'Selection': {'ä' : 'a',
                                               'ä' : 'a',
                                               'â' : 'a',
                                               'á' : 'a',
                                               'ñ' : 'n',
                                               'Ñ' : 'N',
                                               'Ç' : 'C', 
                                               'ü' : 'u',
                                               'û' : 'u',
                                               'ù' : 'u',
                                               'ú' : 'u', 
                                               'ç' : 'c', 
                                               'é' : 'e',
                                               'ê' : 'e',
                                               'ë' : 'e',
                                               'è' : 'e',
                                               'ï' : 'i',
                                               'î' : 'i',
                                               'ì' : 'i',
                                               'í' : 'i',
                                               'ô' : 'o',
                                               'ö' : 'o',
                                               'ò' : 'o',
                                               'ó' : 'o',
                                               'Ö' : 'O'}}, regex=True)

In [23]:
inner_join = inner_join.replace({'Selection': {'Richarlison de Andrade' : 'Richarlison'}}, regex=True)

In [24]:
inner_join = inner_join.replace({'Selection': {'Heung-Min Son' : 'Son Heung-Min'}}, regex=True)

In [25]:
#Check if accents removed

inner_join[inner_join['Selection'].str.contains('Son')]

Unnamed: 0,first_name,second_name,team,position,selected_by_percent,now_cost,total_points,CS Points,1Goal Points,2Goal Points,Selection
478,Heung-Min,Son,Spurs,Midfielder,51.6,94,58,1,5,10,Son Heung-Min


# Processing odds data

In [26]:
#Reading in csv files for each game

anytime0 = pd.read_csv(r'C:\Users\Conor\Documents\FPL\GW6\anytime0.csv')
anytime1 = pd.read_csv(r'C:\Users\Conor\Documents\FPL\GW6\anytime1.csv')
anytime2 = pd.read_csv(r'C:\Users\Conor\Documents\FPL\GW6\anytime2.csv')
anytime3 = pd.read_csv(r'C:\Users\Conor\Documents\FPL\GW6\anytime3.csv')
anytime4 = pd.read_csv(r'C:\Users\Conor\Documents\FPL\GW6\anytime4.csv')
anytime5 = pd.read_csv(r'C:\Users\Conor\Documents\FPL\GW6\anytime5.csv')
anytime6 = pd.read_csv(r'C:\Users\Conor\Documents\FPL\GW6\anytime6.csv')
anytime7 = pd.read_csv(r'C:\Users\Conor\Documents\FPL\GW6\anytime7.csv')
anytime8 = pd.read_csv(r'C:\Users\Conor\Documents\FPL\GW6\anytime8.csv')
anytime9 = pd.read_csv(r'C:\Users\Conor\Documents\FPL\GW6\anytime9.csv')

#Anytime goalscorer odds dataframe

anytime = merged_df = pd.concat([anytime0,anytime1,anytime2,anytime3,anytime4,anytime5,anytime6,anytime7,anytime8,anytime9])

In [27]:
goals0 = pd.read_csv(r'C:\Users\Conor\Documents\FPL\GW6\2goals0.csv')
goals1 = pd.read_csv(r'C:\Users\Conor\Documents\FPL\GW6\2goals1.csv')
goals2 = pd.read_csv(r'C:\Users\Conor\Documents\FPL\GW6\2goals2.csv')
goals3 = pd.read_csv(r'C:\Users\Conor\Documents\FPL\GW6\2goals3.csv')
goals4 = pd.read_csv(r'C:\Users\Conor\Documents\FPL\GW6\2goals4.csv')
goals5 = pd.read_csv(r'C:\Users\Conor\Documents\FPL\GW6\2goals5.csv')
goals6 = pd.read_csv(r'C:\Users\Conor\Documents\FPL\GW6\2goals6.csv')
goals7 = pd.read_csv(r'C:\Users\Conor\Documents\FPL\GW6\2goals7.csv')
goals8 = pd.read_csv(r'C:\Users\Conor\Documents\FPL\GW6\2goals8.csv')
goals9 = pd.read_csv(r'C:\Users\Conor\Documents\FPL\GW6\2goals9.csv')

#To score 2+ goals odds dataframe

goals = merged_df = pd.concat([goals0,goals1,goals2,goals3,goals4,goals5,goals6,goals7,goals8,goals9])

In [28]:
#Adding anytime goalscorer odds to master dataframe

new_table = pd.merge(inner_join,  
                      anytime,  
                      on ='Selection',  
                      how = 'inner')

In [29]:
#Adding score 2+ goal odds to master dataframe

new_table_2 = pd.merge(new_table,  
                      goals,  
                      on ='Selection',  
                      how = 'inner')

In [30]:
#Reading in cleansheet odds

cleansheetOdds = pd.read_csv(r'C:\Users\Conor\Documents\FPL\GW6\cleansheet6.csv')

In [31]:
#Adding clean sheet odds to master dataframe

final_table = pd.merge(new_table_2,  
                      cleansheetOdds,  
                      on ='team',  
                      how = 'inner')
final_table

Unnamed: 0,first_name,second_name,team,position,selected_by_percent,now_cost,total_points,CS Points,1Goal Points,2Goal Points,Selection,Odds_x,Odds_y,odds
0,Pierre-Emerick,Aubameyang,Arsenal,Midfielder,17.0,118,18,1,5,10,Pierre-Emerick Aubameyang,1.72,4.5,2.87
1,Cédric,Soares,Arsenal,Defender,0.2,48,0,4,6,12,Cedric Soares,14.00,301.0,2.87
2,Alexandre,Lacazette,Arsenal,Forward,4.1,85,22,0,4,8,Alexandre Lacazette,2.30,7.5,2.87
3,Granit,Xhaka,Arsenal,Midfielder,0.6,54,9,1,5,10,Granit Xhaka,9.00,126.0,2.87
4,Héctor,Bellerín,Arsenal,Defender,5.8,50,20,4,6,12,Hector Bellerin,14.00,301.0,2.87
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
293,Romain,Saïss,Wolves,Defender,17.4,52,31,4,6,12,Romain Saiss,9.50,126.0,2.10
294,Leander,Dendoncker,Wolves,Midfielder,1.2,49,13,1,5,10,Leander Dendoncker,7.50,91.0,2.10
295,Adama,Traoré,Wolves,Midfielder,10.6,64,9,1,5,10,Adama Traore,3.50,15.0,2.10
296,Oskar,Buur,Wolves,Defender,2.6,40,1,4,6,12,Oskar Buur,18.00,301.0,2.10


In [32]:
#Extracting columns needed

forecast = final_table[['Selection', 'team', 'position', 'selected_by_percent', 'now_cost', 'CS Points', '1Goal Points', '2Goal Points', 'Odds_x', 'Odds_y', 'odds']]

In [33]:
#Renaming columns

forecast.columns = ['Full Name', 'Team', 'Position', 'Ownership Percentage', 'Price', 'CS Points', '1 Goal Points', '2 Goal Points', 'Anytime Odds', '2 or More Odds', 'CS Odds']

In [34]:
forecast['Price'] = (forecast['Price'] / 10)
forecast

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.


Unnamed: 0,Full Name,Team,Position,Ownership Percentage,Price,CS Points,1 Goal Points,2 Goal Points,Anytime Odds,2 or More Odds,CS Odds
0,Pierre-Emerick Aubameyang,Arsenal,Midfielder,17.0,11.8,1,5,10,1.72,4.5,2.87
1,Cedric Soares,Arsenal,Defender,0.2,4.8,4,6,12,14.00,301.0,2.87
2,Alexandre Lacazette,Arsenal,Forward,4.1,8.5,0,4,8,2.30,7.5,2.87
3,Granit Xhaka,Arsenal,Midfielder,0.6,5.4,1,5,10,9.00,126.0,2.87
4,Hector Bellerin,Arsenal,Defender,5.8,5.0,4,6,12,14.00,301.0,2.87
...,...,...,...,...,...,...,...,...,...,...,...
293,Romain Saiss,Wolves,Defender,17.4,5.2,4,6,12,9.50,126.0,2.10
294,Leander Dendoncker,Wolves,Midfielder,1.2,4.9,1,5,10,7.50,91.0,2.10
295,Adama Traore,Wolves,Midfielder,10.6,6.4,1,5,10,3.50,15.0,2.10
296,Oskar Buur,Wolves,Defender,2.6,4.0,4,6,12,18.00,301.0,2.10


In [35]:
#Deleting rows of missing players

indexNames = forecast[forecast['Anytime Odds'] == 1.0 ].index
forecast.drop(indexNames , inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  errors=errors,


In [36]:
#Deleting rows of missing players

indexNames = forecast[forecast['2 or More Odds'] == 1.0 ].index
forecast.drop(indexNames , inplace=True)

# Forecasting points

In [37]:
#Calculating probabilities based off of odds data

forecast['Cleansheet Probability'] = (1 / forecast['CS Odds']) 
forecast['1 Goal Probability'] = (1 / forecast['Anytime Odds'])
forecast['2 Goal Probability'] = (1 / forecast['2 or More Odds'])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  This is separate from the ipykernel package so we can avoid doing imports until
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  after removing the cwd from sys.path.


In [38]:
#Calculating individual points per score type

forecast['Cleansheet Expected'] = forecast['Cleansheet Probability'] * forecast['CS Points']
forecast['Anytime Expected'] = forecast['1 Goal Probability'] * forecast['1 Goal Points']
forecast['2 Goal Expected'] = forecast['2 Goal Probability'] * forecast['2 Goal Points']

In [39]:
#Totalling all expected points

forecast['Total Forecasted Points'] = (forecast['Cleansheet Expected'] + forecast['Anytime Expected'] + forecast['2 Goal Expected'])

In [40]:
#Calculating 'Points Per Million'

forecast['PPM'] = forecast['Total Forecasted Points'] / forecast['Price']

In [41]:
#'Test' csv containing all columns

forecast.to_csv(r'C:\Users\Conor\Documents\FPL\GW6\test1.csv', index = False)

In [42]:
#Extracting final dataframe

finalForecast = forecast[['Full Name', 'Team', 'Position', 'Ownership Percentage', 'Price', 'Total Forecasted Points', 'PPM']]

In [43]:
finalForecast

Unnamed: 0,Full Name,Team,Position,Ownership Percentage,Price,Total Forecasted Points,PPM
0,Pierre-Emerick Aubameyang,Arsenal,Midfielder,17.0,11.8,5.477631,0.464206
1,Cedric Soares,Arsenal,Defender,0.2,4.8,1.862167,0.387951
2,Alexandre Lacazette,Arsenal,Forward,4.1,8.5,2.805797,0.330094
3,Granit Xhaka,Arsenal,Midfielder,0.6,5.4,0.983353,0.182102
4,Hector Bellerin,Arsenal,Defender,5.8,5.0,1.862167,0.372433
...,...,...,...,...,...,...,...
293,Romain Saiss,Wolves,Defender,17.4,5.2,2.631579,0.506073
294,Leander Dendoncker,Wolves,Midfielder,1.2,4.9,1.252747,0.255663
295,Adama Traore,Wolves,Midfielder,10.6,6.4,2.571429,0.401786
296,Oskar Buur,Wolves,Defender,2.6,4.0,2.277962,0.569491


In [44]:
#Final csv file with desired data

finalForecast.to_csv(r'C:\Users\Conor\Documents\FPL\GW6\GW6_forecast.csv', index = False)