In [2]:
import pandas as pd
import numpy as np
import requests
from sklearn.model_selection import train_test_split

In [3]:
url = "https://fantasy.premierleague.com/api/bootstrap-static/"
request = requests.get(url)
json = request.json()
json.keys()

dict_keys(['events', 'game_settings', 'phases', 'teams', 'total_players', 'elements', 'element_stats', 'element_types'])

In [4]:
print(request)

<Response [200]>


In [5]:
#Create & save as a csv a master list of all EPL players
master_player_df = pd.DataFrame(json['elements'])
master_player_df
#Ensure we can read the whole dataframe, without "..."
pd.set_option("display.max_rows", None)
print(master_player_df.dtypes)



chance_of_playing_next_round            float64
chance_of_playing_this_round            float64
code                                      int64
cost_change_event                         int64
cost_change_event_fall                    int64
cost_change_start                         int64
cost_change_start_fall                    int64
dreamteam_count                           int64
element_type                              int64
ep_next                                  object
ep_this                                  object
event_points                              int64
first_name                               object
form                                     object
id                                        int64
in_dreamteam                               bool
news                                     object
news_added                               object
now_cost                                  int64
photo                                    object
points_per_game                         

In [6]:
master_player_df.head(5)

Unnamed: 0,chance_of_playing_next_round,chance_of_playing_this_round,code,cost_change_event,cost_change_event_fall,cost_change_start,cost_change_start_fall,dreamteam_count,element_type,ep_next,...,now_cost_rank,now_cost_rank_type,form_rank,form_rank_type,points_per_game_rank,points_per_game_rank_type,selected_rank,selected_rank_type,starts_per_90,clean_sheets_per_90
0,0.0,0.0,438098,0,0,-1,1,0,3,0.0,...,169,106,627,286,627,286,591,259,0.0,0.0
1,50.0,0.0,205651,0,0,-2,2,0,4,0.5,...,38,17,648,69,648,69,201,33,0.0,0.0
2,,,226597,0,0,0,0,0,2,5.7,...,82,5,43,8,47,9,32,11,1.0,0.67
3,,,219847,0,0,1,-1,1,4,8.3,...,12,4,9,3,9,3,17,5,1.0,0.67
4,0.0,0.0,463748,0,0,0,0,0,1,0.0,...,625,64,411,50,411,50,523,64,0.0,0.0


In [7]:
#element_type = position ID. The IDs are as follows:
    # 1-Goalkeeper, # 2-Defender, 3-Midfielder, 4-Forward
    
master_player_df_names = master_player_df[["id","code", "first_name", "second_name", "team", "element_type"]]
#master_player_csv = master_player_df_names.to_csv("C:/Users/Daniel Quinn/Desktop/Bootcamp/Project_2 - Working_Copy/master_player_csv.csv", index=False)
#pd.set_option("display.max_rows", None)
master_player_df_names.head()

Unnamed: 0,id,code,first_name,second_name,team,element_type
0,1,438098,Fábio,Ferreira Vieira,1,3
1,2,205651,Gabriel,Fernando de Jesus,1,4
2,3,226597,Gabriel,dos Santos Magalhães,1,2
3,4,219847,Kai,Havertz,1,4
4,5,463748,Karl,Hein,1,1


In [8]:
#Screen out the goalies only
#element_type = position ID. The IDs are as follows:
    # 1-Goalkeeper, # 2-Defender, 3-Midfielder, 4-Forward
epl_goalies = master_player_df_names[master_player_df_names["element_type"]==1]
epl_goalies.reset_index().shape

(69, 7)

In [15]:
epl_goalies.head()

Unnamed: 0,id,code,first_name,second_name,team,element_type
4,5,463748,Karl,Hein,1,1
11,15,154561,David,Raya Martin,1,1
21,73,69752,Norberto,Murara Neto,1,1
27,639,551221,Tommy,Setford,1,1
39,39,462492,Joe,Gauci,2,1


In [10]:
#Create a function to ID historical stats for individual players

def get_past_data(element_id):
    url_past = f'https://fantasy.premierleague.com/api/element-summary/{element_id}/'
    response = requests.get(url_past)
    past_data = response.json()
    history_past_df = pd.DataFrame(past_data['history_past'])
    
    return history_past_df

#Get individual players' historical data using their ID (element_id)
element_id = 3
history_past_df = get_past_data(element_id)

# Seee all columns
pd.set_option("display.max_columns", None)
history_past_df


Unnamed: 0,season_name,element_code,start_cost,end_cost,total_points,minutes,goals_scored,assists,clean_sheets,goals_conceded,own_goals,penalties_saved,penalties_missed,yellow_cards,red_cards,saves,bonus,bps,influence,creativity,threat,ict_index,starts,expected_goals,expected_assists,expected_goal_involvements,expected_goals_conceded
0,2020/21,226597,50,49,78,1996,2,0,6,24,0,0,0,2,1,0,7,385,464.8,51.0,233.0,74.9,0,0.0,0.0,0.0,0.0
1,2021/22,226597,50,53,146,3063,5,0,13,38,0,0,0,6,1,0,16,682,746.8,131.6,470.0,134.7,0,0.0,0.0,0.0,0.0
2,2022/23,226597,50,52,146,3409,3,0,14,43,0,0,0,5,0,0,15,723,743.8,131.4,401.0,127.7,38,5.04,0.66,5.7,41.84
3,2023/24,226597,50,54,149,3042,3,1,16,26,1,0,0,4,0,0,8,676,675.8,121.9,379.0,117.9,34,4.48,0.65,5.13,26.18


In [11]:
#convert objects to int/floats

history_past_df["influence"] = history_past_df["influence"].astype(float)
history_past_df["creativity"] = history_past_df["creativity"].astype(float)
history_past_df["threat"] = history_past_df["threat"].astype(float)
history_past_df["expected_goals"] = history_past_df["expected_goals"].astype(float)
history_past_df["expected_assists"] = history_past_df["expected_assists"].astype(float)
history_past_df["expected_goal_involvements"] = history_past_df["expected_goal_involvements"].astype(float)
history_past_df["expected_goals_conceded"] = history_past_df["expected_goals_conceded"].astype(float)


In [12]:
history_past_df.columns

Index(['season_name', 'element_code', 'start_cost', 'end_cost', 'total_points',
       'minutes', 'goals_scored', 'assists', 'clean_sheets', 'goals_conceded',
       'own_goals', 'penalties_saved', 'penalties_missed', 'yellow_cards',
       'red_cards', 'saves', 'bonus', 'bps', 'influence', 'creativity',
       'threat', 'ict_index', 'starts', 'expected_goals', 'expected_assists',
       'expected_goal_involvements', 'expected_goals_conceded'],
      dtype='object')

In [13]:
#Create a df with goalie-relevant features only by individual players

past_history_df = history_past_df[['season_name', 'total_points',
       'minutes', 'clean_sheets', 'goals_conceded', 'expected_goals_conceded',
       'own_goals', 'penalties_saved', 'saves', 'starts', 'end_cost']]
past_history_df

Unnamed: 0,season_name,total_points,minutes,clean_sheets,goals_conceded,expected_goals_conceded,own_goals,penalties_saved,saves,starts,end_cost
0,2020/21,78,1996,6,24,0.0,0,0,0,0,49
1,2021/22,146,3063,13,38,0.0,0,0,0,0,53
2,2022/23,146,3409,14,43,41.84,0,0,0,38,52
3,2023/24,149,3042,16,26,26.18,1,0,0,34,54
