# Fantasy PL Player Performance
Code for getting data from FPL API: https://medium.com/analytics-vidhya/getting-started-with-fantasy-premier-league-data-56d3b9be8c32

## Get player data and gameweek data

Import packages

In [1]:
import pandas as pd
import re
import random
import requests

Get data from the FPL API

In [2]:
# set url for fantasy PL API
api_url = "https://fantasy.premierleague.com/api/bootstrap-static/"

# download the webpage
data = requests.get(api_url)

json = data.json()

json.keys()

dict_keys(['events', 'game_settings', 'phases', 'teams', 'total_players', 'elements', 'element_stats', 'element_types'])

Build a Pandas dataframe from the json data

In [3]:
# build a dataframe
players = pd.DataFrame(json['elements'])

players.columns

Index(['chance_of_playing_next_round', 'chance_of_playing_this_round', 'code',
       'cost_change_event', 'cost_change_event_fall', 'cost_change_start',
       'cost_change_start_fall', 'dreamteam_count', 'element_type', 'ep_next',
       'ep_this', 'event_points', 'first_name', 'form', 'id', 'in_dreamteam',
       'news', 'news_added', 'now_cost', 'photo', 'points_per_game',
       'second_name', 'selected_by_percent', 'special', 'squad_number',
       'status', 'team', 'team_code', 'total_points', 'transfers_in',
       'transfers_in_event', 'transfers_out', 'transfers_out_event',
       'value_form', 'value_season', 'web_name', 'minutes', 'goals_scored',
       'assists', 'clean_sheets', 'goals_conceded', 'own_goals',
       'penalties_saved', 'penalties_missed', 'yellow_cards', 'red_cards',
       'saves', 'bonus', 'bps', 'influence', 'creativity', 'threat',
       'ict_index', 'starts', 'expected_goals', 'expected_assists',
       'expected_goal_involvements', 'expected_goals_con

Cleaning up the player data and selecting columns (if desired)

In [4]:
#select only relevant columns from elements_df, not all needed at this point but maybe useful in the future
#players_df_select = elements_df[['first_name','second_name','team','element_type','selected_by_percent',
#                                'now_cost','minutes','transfers_in','value_season','total_points']]

# use all columns 
players_df_select = players

# combine first and last names to get player full names
players_df_select['full_name'] = players_df_select[['first_name', 'second_name']].agg(' '.join, axis=1)

# drop first and last name columns
players_df_select = players_df_select.drop(['first_name', 'second_name'], axis = 1)

# player prices are 10x the true value. Divide the prices by 10 to get the true values
players_df_select['now_cost'] = players_df_select['now_cost']/10

players_df_select.head()

Unnamed: 0,chance_of_playing_next_round,chance_of_playing_this_round,code,cost_change_event,cost_change_event_fall,cost_change_start,cost_change_start_fall,dreamteam_count,element_type,ep_next,...,now_cost_rank_type,form_rank,form_rank_type,points_per_game_rank,points_per_game_rank_type,selected_rank,selected_rank_type,starts_per_90,clean_sheets_per_90,full_name
0,100.0,100.0,58822,0,0,-3,3,0,2,0.5,...,160,481,187,410,154,406,154,0.0,0.0,Cédric Alves Soares
1,,,84450,0,0,0,0,1,3,3.8,...,113,89,33,48,20,100,36,1.06,0.45,Granit Xhaka
2,50.0,100.0,153256,-1,1,-4,4,1,3,0.2,...,313,476,180,383,165,204,62,0.81,0.0,Mohamed Elneny
3,,,156074,0,0,-3,3,0,2,0.8,...,167,326,111,414,158,431,164,0.0,0.0,Rob Holding
4,75.0,100.0,167199,-1,1,-3,3,0,3,2.4,...,187,134,47,124,57,256,83,1.03,0.58,Thomas Partey


Get team info

In [5]:
# get team info
teams = pd.DataFrame(json['teams'])

teams.head()

Unnamed: 0,code,draw,form,id,loss,name,played,points,position,short_name,...,team_division,unavailable,win,strength_overall_home,strength_overall_away,strength_attack_home,strength_attack_away,strength_defence_home,strength_defence_away,pulse_id
0,3,0,,1,0,Arsenal,0,0,0,ARS,...,,False,0,1220,1270,1240,1250,1200,1270,1
1,7,0,,2,0,Aston Villa,0,0,0,AVL,...,,False,0,1090,1100,1110,1130,1090,1110,2
2,91,0,,3,0,Bournemouth,0,0,0,BOU,...,,False,0,1060,1090,1070,1130,1050,1080,127
3,94,0,,4,0,Brentford,0,0,0,BRE,...,,False,0,1100,1130,1100,1110,1130,1160,130
4,36,0,,5,0,Brighton,0,0,0,BHA,...,,False,0,1150,1160,1140,1160,1170,1190,131


Get team defensive strength for each team

In [6]:
team_strength_def = teams[['id', 'name', 'strength_defence_away', 'strength_defence_home']]

team_strength_def

Unnamed: 0,id,name,strength_defence_away,strength_defence_home
0,1,Arsenal,1270,1200
1,2,Aston Villa,1110,1090
2,3,Bournemouth,1080,1050
3,4,Brentford,1160,1130
4,5,Brighton,1190,1170
5,6,Chelsea,1210,1180
6,7,Crystal Palace,1140,1080
7,8,Everton,1080,1080
8,9,Fulham,1130,1100
9,10,Leicester,1120,1180


Get team attack strength for each team

In [7]:
team_strength_att = teams[['id', 'name', 'strength_attack_away', 'strength_attack_home']]

team_strength_att

Unnamed: 0,id,name,strength_attack_away,strength_attack_home
0,1,Arsenal,1250,1240
1,2,Aston Villa,1130,1110
2,3,Bournemouth,1130,1070
3,4,Brentford,1110,1100
4,5,Brighton,1160,1140
5,6,Chelsea,1210,1200
6,7,Crystal Palace,1170,1100
7,8,Everton,1080,1080
8,9,Fulham,1070,1070
9,10,Leicester,1160,1080


Get position info

In [8]:
# get position information from 'element_types'
positions = pd.DataFrame(json['element_types'])

positions.head()

Unnamed: 0,id,plural_name,plural_name_short,singular_name,singular_name_short,squad_select,squad_min_play,squad_max_play,ui_shirt_specific,sub_positions_locked,element_count
0,1,Goalkeepers,GKP,Goalkeeper,GKP,2,1,1,True,[12],74
1,2,Defenders,DEF,Defender,DEF,5,3,5,False,[],243
2,3,Midfielders,MID,Midfielder,MID,5,2,5,False,[],313
3,4,Forwards,FWD,Forward,FWD,3,1,3,False,[],91


Merge the players with their teams

In [9]:
# merge player data with teams and positions
player_team_merge = pd.merge(
    left = players_df_select,
    right = teams,
    left_on = 'team',
    right_on = 'id'
)

player_team_merge[['full_name', 'name']].head()

Unnamed: 0,full_name,name
0,Cédric Alves Soares,Arsenal
1,Granit Xhaka,Arsenal
2,Mohamed Elneny,Arsenal
3,Rob Holding,Arsenal
4,Thomas Partey,Arsenal


Merge the players with their positions

In [10]:
player_team_pos_merge = pd.merge(
    left = player_team_merge,
    right = positions,
    left_on = 'element_type',
    right_on = 'id'
)

player_team_pos_merge[['full_name', 'name', 'singular_name_short']].head()

Unnamed: 0,full_name,name,singular_name_short
0,Cédric Alves Soares,Arsenal,DEF
1,Rob Holding,Arsenal,DEF
2,Kieran Tierney,Arsenal,DEF
3,Benjamin White,Arsenal,DEF
4,Takehiro Tomiyasu,Arsenal,DEF


In [11]:
# rename columns
player_team_pos_merge = player_team_pos_merge.rename(
    columns={'name':'team_name', 'singular_name_short':'position_name'}
)

player_team_pos_merge[['full_name', 'team_name', 'position_name']].head()

Unnamed: 0,full_name,team_name,position_name
0,Cédric Alves Soares,Arsenal,DEF
1,Rob Holding,Arsenal,DEF
2,Kieran Tierney,Arsenal,DEF
3,Benjamin White,Arsenal,DEF
4,Takehiro Tomiyasu,Arsenal,DEF


Get player gameweek data from https://fantasy.premierleague.com/api/element-summary/

In [12]:
# function for getting specific player gameweek history
def get_history(player_id):
    ''' get all gameweek history for a given player'''
    
    # request data from API 
    data = requests.get("https://fantasy.premierleague.com/api/element-summary/" + str(player_id) + "/")
    json = data.json()
    
    # turn data into Pandas dataframe
    df = pd.DataFrame(json['history'])
    
    return df

get_history(1)  

Unnamed: 0,element,fixture,opponent_team,total_points,was_home,kickoff_time,team_h_score,team_a_score,round,minutes,...,starts,expected_goals,expected_assists,expected_goal_involvements,expected_goals_conceded,value,transfers_balance,selected,transfers_in,transfers_out
0,1,1,7,0,False,2022-08-05T19:00:00Z,0,2,1,0,...,0,0.0,0.0,0.0,0.0,45,0,23970,0,0
1,1,11,10,0,True,2022-08-13T14:00:00Z,4,2,2,0,...,0,0.0,0.0,0.0,0.0,44,-5169,24193,1361,6530
2,1,21,3,0,False,2022-08-20T16:30:00Z,0,3,3,0,...,0,0.0,0.0,0.0,0.0,44,-4337,20960,879,5216
3,1,31,9,0,True,2022-08-27T16:30:00Z,2,1,4,0,...,0,0.0,0.0,0.0,0.0,43,-2988,18825,577,3565
4,1,41,2,0,True,2022-08-31T18:30:00Z,2,1,5,0,...,0,0.0,0.0,0.0,0.0,43,-1611,17790,405,2016
5,1,56,14,0,False,2022-09-04T15:30:00Z,3,1,6,0,...,0,0.0,0.0,0.0,0.0,42,-1207,17133,516,1723
6,1,72,4,0,False,2022-09-18T11:00:00Z,0,3,8,0,...,0,0.0,0.0,0.0,0.0,42,-445,16837,114,559
7,1,81,18,0,True,2022-10-01T11:30:00Z,3,1,9,0,...,0,0.0,0.0,0.0,0.0,42,-1572,15362,64,1636
8,1,91,12,0,True,2022-10-09T15:30:00Z,3,2,10,0,...,0,0.0,0.0,0.0,0.0,42,-828,14630,148,976
9,1,104,11,0,False,2022-10-16T13:00:00Z,0,1,11,0,...,0,0.0,0.0,0.0,0.0,42,-229,14586,227,456


In [13]:
from tqdm.auto import tqdm
tqdm.pandas()

In [14]:
# join team name
players = players.merge(
    teams[['id', 'name']],
    left_on='team',
    right_on='id',
    suffixes=['_player', None]
).drop(['team', 'id'], axis=1)

# join player positions
players = players.merge(
    positions[['id', 'singular_name_short']],
    left_on='element_type',
    right_on='id'
).drop(['element_type', 'id'], axis=1)

# rename columns
players = players.rename(
    columns={'name':'team', 'singular_name_short':'position'}
)

players.head()


Unnamed: 0,chance_of_playing_next_round,chance_of_playing_this_round,code,cost_change_event,cost_change_event_fall,cost_change_start,cost_change_start_fall,dreamteam_count,ep_next,ep_this,...,form_rank_type,points_per_game_rank,points_per_game_rank_type,selected_rank,selected_rank_type,starts_per_90,clean_sheets_per_90,full_name,team,position
0,100.0,100.0,58822,0,0,-3,3,0,0.5,0.5,...,187,410,154,406,154,0.0,0.0,Cédric Alves Soares,Arsenal,DEF
1,,,156074,0,0,-3,3,0,0.8,0.8,...,111,414,158,431,164,0.0,0.0,Rob Holding,Arsenal,DEF
2,100.0,100.0,192895,-1,1,-4,4,0,0.8,0.8,...,125,354,134,201,74,0.79,0.2,Kieran Tierney,Arsenal,DEF
3,,,198869,0,0,2,-2,0,5.5,5.5,...,9,37,7,25,9,1.13,0.6,Benjamin White,Arsenal,DEF
4,100.0,100.0,223723,0,0,-3,3,0,2.5,2.5,...,63,311,117,238,88,0.72,0.36,Takehiro Tomiyasu,Arsenal,DEF


In [15]:
# get gameweek history for all players
points = players['id_player'].progress_apply(get_history)

# combine results into one dataframe
points = pd.concat(df for df in points)

# join full_name
points = players[['id_player', 'full_name', 'team', 'position']].merge(
    points,
    left_on='id_player',
    right_on='element'
)

  0%|          | 0/721 [00:00<?, ?it/s]

In [16]:
# merge opponent defensive strength
points = pd.merge(left = points,
                  right = team_strength_def[['id', 'strength_defence_away', 
                                             'strength_defence_home']],
                  how = 'left',
                  left_on = 'opponent_team',
                  right_on = 'id'
).drop(
    'id', axis = 1
).rename(
    columns={'strength_defence_away':'opp_def_strength_away', 'strength_defence_home':'opp_def_strength_home'}
)

points.head()

Unnamed: 0,id_player,full_name,team,position,element,fixture,opponent_team,total_points,was_home,kickoff_time,...,expected_assists,expected_goal_involvements,expected_goals_conceded,value,transfers_balance,selected,transfers_in,transfers_out,opp_def_strength_away,opp_def_strength_home
0,1,Cédric Alves Soares,Arsenal,DEF,1,1,7,0,False,2022-08-05T19:00:00Z,...,0.0,0.0,0.0,45,0,23970,0,0,1140,1080
1,1,Cédric Alves Soares,Arsenal,DEF,1,11,10,0,True,2022-08-13T14:00:00Z,...,0.0,0.0,0.0,44,-5169,24193,1361,6530,1120,1180
2,1,Cédric Alves Soares,Arsenal,DEF,1,21,3,0,False,2022-08-20T16:30:00Z,...,0.0,0.0,0.0,44,-4337,20960,879,5216,1080,1050
3,1,Cédric Alves Soares,Arsenal,DEF,1,31,9,0,True,2022-08-27T16:30:00Z,...,0.0,0.0,0.0,43,-2988,18825,577,3565,1130,1100
4,1,Cédric Alves Soares,Arsenal,DEF,1,41,2,0,True,2022-08-31T18:30:00Z,...,0.0,0.0,0.0,43,-1611,17790,405,2016,1110,1090


In [17]:
# assign correct home/away opponent defensive strength for each fixture
def opp_def_strength(row):
    if row['was_home'] == False:
        return row['opp_def_strength_home']
    elif row['was_home'] == True:
        return row['opp_def_strength_away']
    else:
        return "Unknown"

points['opp_def_strength'] = points.apply(lambda row: opp_def_strength(row), axis = 1)

points = points.drop(['opp_def_strength_home','opp_def_strength_away'], axis = 1)

points.head()

Unnamed: 0,id_player,full_name,team,position,element,fixture,opponent_team,total_points,was_home,kickoff_time,...,expected_goals,expected_assists,expected_goal_involvements,expected_goals_conceded,value,transfers_balance,selected,transfers_in,transfers_out,opp_def_strength
0,1,Cédric Alves Soares,Arsenal,DEF,1,1,7,0,False,2022-08-05T19:00:00Z,...,0.0,0.0,0.0,0.0,45,0,23970,0,0,1080
1,1,Cédric Alves Soares,Arsenal,DEF,1,11,10,0,True,2022-08-13T14:00:00Z,...,0.0,0.0,0.0,0.0,44,-5169,24193,1361,6530,1120
2,1,Cédric Alves Soares,Arsenal,DEF,1,21,3,0,False,2022-08-20T16:30:00Z,...,0.0,0.0,0.0,0.0,44,-4337,20960,879,5216,1050
3,1,Cédric Alves Soares,Arsenal,DEF,1,31,9,0,True,2022-08-27T16:30:00Z,...,0.0,0.0,0.0,0.0,43,-2988,18825,577,3565,1130
4,1,Cédric Alves Soares,Arsenal,DEF,1,41,2,0,True,2022-08-31T18:30:00Z,...,0.0,0.0,0.0,0.0,43,-1611,17790,405,2016,1110


In [18]:
# merge opponent attack strength
points = pd.merge(left = points,
                  right = team_strength_att[['id', 'strength_attack_away', 
                                             'strength_attack_home']],
                  how = 'left',
                  left_on = 'opponent_team',
                  right_on = 'id'
).drop(
    'id', axis = 1
).rename(
    columns={'strength_attack_away':'opp_att_strength_away', 
             'strength_attack_home':'opp_att_strength_home'}
)

points.head()

Unnamed: 0,id_player,full_name,team,position,element,fixture,opponent_team,total_points,was_home,kickoff_time,...,expected_goal_involvements,expected_goals_conceded,value,transfers_balance,selected,transfers_in,transfers_out,opp_def_strength,opp_att_strength_away,opp_att_strength_home
0,1,Cédric Alves Soares,Arsenal,DEF,1,1,7,0,False,2022-08-05T19:00:00Z,...,0.0,0.0,45,0,23970,0,0,1080,1170,1100
1,1,Cédric Alves Soares,Arsenal,DEF,1,11,10,0,True,2022-08-13T14:00:00Z,...,0.0,0.0,44,-5169,24193,1361,6530,1120,1160,1080
2,1,Cédric Alves Soares,Arsenal,DEF,1,21,3,0,False,2022-08-20T16:30:00Z,...,0.0,0.0,44,-4337,20960,879,5216,1050,1130,1070
3,1,Cédric Alves Soares,Arsenal,DEF,1,31,9,0,True,2022-08-27T16:30:00Z,...,0.0,0.0,43,-2988,18825,577,3565,1130,1070,1070
4,1,Cédric Alves Soares,Arsenal,DEF,1,41,2,0,True,2022-08-31T18:30:00Z,...,0.0,0.0,43,-1611,17790,405,2016,1110,1130,1110


In [19]:
# assign correct home/away opponent attack strength for each fixture
def opp_att_strength(row):
    if row['was_home'] == False:
        return row['opp_att_strength_home']
    elif row['was_home'] == True:
        return row['opp_att_strength_away']
    else:
        return "Unknown"

points['opp_att_strength'] = points.apply(lambda row: opp_att_strength(row), axis = 1)

points = points.drop(['opp_att_strength_home','opp_att_strength_away'], axis = 1)

points.head()

Unnamed: 0,id_player,full_name,team,position,element,fixture,opponent_team,total_points,was_home,kickoff_time,...,expected_assists,expected_goal_involvements,expected_goals_conceded,value,transfers_balance,selected,transfers_in,transfers_out,opp_def_strength,opp_att_strength
0,1,Cédric Alves Soares,Arsenal,DEF,1,1,7,0,False,2022-08-05T19:00:00Z,...,0.0,0.0,0.0,45,0,23970,0,0,1080,1100
1,1,Cédric Alves Soares,Arsenal,DEF,1,11,10,0,True,2022-08-13T14:00:00Z,...,0.0,0.0,0.0,44,-5169,24193,1361,6530,1120,1160
2,1,Cédric Alves Soares,Arsenal,DEF,1,21,3,0,False,2022-08-20T16:30:00Z,...,0.0,0.0,0.0,44,-4337,20960,879,5216,1050,1070
3,1,Cédric Alves Soares,Arsenal,DEF,1,31,9,0,True,2022-08-27T16:30:00Z,...,0.0,0.0,0.0,43,-2988,18825,577,3565,1130,1070
4,1,Cédric Alves Soares,Arsenal,DEF,1,41,2,0,True,2022-08-31T18:30:00Z,...,0.0,0.0,0.0,43,-1611,17790,405,2016,1110,1130


In [20]:
# save the dataframe to csv
#points.to_csv('data/points_20220107.csv')

Let's try to compare performance over the past 5 matches with performance in the most recent match

For a given player we need to:
- get the last 6 results from the points dataframe
- take the average of each performance stat over the first 5/6 observations
- see if any performance stats predict performance in the 6th fixture

The analysis will be different for players in different positions. Attacking players get most of their points from goal involvements, while defenders and goalkeepers get more points from clean sheets.

By running this method after each gameweek we can build a larger dataset over the course of the season.

In [21]:
# import test data
points = pd.read_csv('data/points_20220107.csv')

In [22]:
# select columns of interest
points_select = points[['id_player', 'full_name', 'team', 'position',
                        'total_points',
                        'minutes', 'goals_scored', 'assists', 'clean_sheets', 
                        'goals_conceded', 'own_goals',
                        'saves', 'bonus', 'bps', 'influence', 'creativity', 'threat', 'ict_index',
                        'expected_goals', 'expected_assists', 'expected_goal_involvements', 
                        'expected_goals_conceded', 'opp_att_strength', 'opp_def_strength']]

In [40]:
points_13 = points_select[points_select['id_player'] == 13]

points_13

Unnamed: 0,id_player,full_name,team,position,total_points,minutes,goals_scored,assists,clean_sheets,goals_conceded,...,influence,creativity,threat,ict_index,expected_goals,expected_assists,expected_goal_involvements,expected_goals_conceded,opp_att_strength,opp_def_strength
4110,13,Bukayo Saka,Arsenal,MID,6,90,0,1,1,0,...,1.8,7.6,36.0,4.5,0.2583,0.11039,0.36869,1.2119,1100,1080
4111,13,Bukayo Saka,Arsenal,MID,2,83,0,0,0,2,...,13.8,43.9,23.0,8.1,0.0377,0.285,0.3227,0.3306,1160,1120
4112,13,Bukayo Saka,Arsenal,MID,3,87,0,0,1,0,...,12.0,29.9,8.0,5.0,0.0239,0.13348,0.15738,0.2579,1070,1050
4113,13,Bukayo Saka,Arsenal,MID,5,90,0,1,0,1,...,28.0,25.4,45.0,9.8,0.4738,0.10734,0.58114,0.8328,1070,1130
4114,13,Bukayo Saka,Arsenal,MID,5,87,0,1,0,1,...,24.6,42.7,49.0,11.6,0.4524,0.34341,0.79581,0.4514,1130,1110
4115,13,Bukayo Saka,Arsenal,MID,7,90,1,0,0,3,...,34.2,18.5,52.0,10.5,0.3993,0.06968,0.46898,1.5519,1130,1170
4116,13,Bukayo Saka,Arsenal,MID,9,90,0,2,1,0,...,42.6,41.8,33.0,11.7,0.2286,0.13483,0.36343,0.5327,1100,1130
4117,13,Bukayo Saka,Arsenal,MID,2,90,0,0,0,1,...,21.2,69.0,34.0,12.4,0.2731,0.20199,0.47509,1.5742,1210,1220
4118,13,Bukayo Saka,Arsenal,MID,15,90,2,0,0,2,...,70.8,3.7,97.0,17.2,2.0413,0.02396,2.06526,1.0122,1300,1360
4119,13,Bukayo Saka,Arsenal,MID,9,81,1,0,1,0,...,37.4,18.3,19.0,7.5,0.0924,0.08806,0.18046,1.7177,1080,1140


In [24]:
def last_5_player(df, player_id):
    ''' 
    get the mean stats for a given player_id over the last 5 fixtures
    prior to most recent fixture and the total points from the most 
    recent fixture. 
    
    assume dataframe is sorted from oldest to newest fixtures
    '''
    df = df[df['id_player'] == player_id]
    
    last_6 = df.tail(6) 
    
    last_5 = last_6.head(5)
    
    recent = last_6['total_points'].tail(1)
    
    d = {'name': last_5['full_name'].iloc[0],
         'id': last_5['id_player'].iloc[0],
         'team': last_5['team'].iloc[0],
        'position': last_5['position'].iloc[0],
        'mean_points': last_5['total_points'].mean(),
        'mean_minutes': last_5['minutes'].mean(),
        'mean_goals_scored': last_5['goals_scored'].mean(),
        'mean_assists': last_5['assists'].mean(),
        'mean_clean_sheets': last_5['clean_sheets'].mean(),
        'mean_goals_conceded': last_5['goals_conceded'].mean(),
        'mean_own_goals': last_5['own_goals'].mean(),
        'mean_saves': last_5['saves'].mean(),
        'mean_bonus': last_5['bonus'].mean(),
        'mean_bps': last_5['bps'].mean(),
        'mean_influence': last_5['influence'].mean(),
        'mean_creativity': last_5['creativity'].mean(),
        'mean_threat': last_5['threat'].mean(),
        'mean_ict': last_5['ict_index'].mean(),
        'mean_xg': last_5['expected_goals'].mean(),
        'mean_xa': last_5['expected_assists'].mean(),
        'mean_xgi': last_5['expected_goal_involvements'].mean(),
        'mean_xgc': last_5['expected_goals_conceded'].mean(),
        'mean_opp_att': last_5['opp_att_strength'].mean(),
        'mean_opp_def': last_5['opp_def_strength'].mean()}
    
    last_5_mean = pd.DataFrame(data = d, index = [0])
    
    last_5_mean['latest_points'] = recent.iloc[0]
    
    return last_5_mean

# test for Bukayo Saka (id = 13)
last_5_player(points_select, 13)

Unnamed: 0,name,id,team,position,mean_points,mean_minutes,mean_goals_scored,mean_assists,mean_clean_sheets,mean_goals_conceded,...,mean_creativity,mean_threat,mean_ict,mean_xg,mean_xa,mean_xgi,mean_xgc,mean_opp_att,mean_opp_def,latest_points
0,Bukayo Saka,13,Arsenal,MID,5.4,77.2,0.4,0.4,0.4,0.6,...,39.24,14.0,8.26,0.25732,0.168212,0.425532,0.64568,1134.0,1132.0,3


In [25]:
len(points_13.index) - 5

12

In [26]:
def last_5_all(df):
    ''' get last mean stats for all players in df over the last 5 fixtures
    prior to most recent fixture and the total points from the most 
    recent fixture.
    '''
    last_5_all = pd.DataFrame() # empty dataframe
    for p in df['id_player'].unique():
        player_df = last_5_player(df, p)
        last_5_all = pd.concat([last_5_all, player_df])
    return last_5_all

In [27]:
# testing
subset = points_select[points_select['id_player'].isin([12, 13])]

last_5_all(subset)

Unnamed: 0,name,id,team,position,mean_points,mean_minutes,mean_goals_scored,mean_assists,mean_clean_sheets,mean_goals_conceded,...,mean_creativity,mean_threat,mean_ict,mean_xg,mean_xa,mean_xgi,mean_xgc,mean_opp_att,mean_opp_def,latest_points
0,Emile Smith Rowe,12,Arsenal,MID,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1134.0,1132.0,0
0,Bukayo Saka,13,Arsenal,MID,5.4,77.2,0.4,0.4,0.4,0.6,...,39.24,14.0,8.26,0.25732,0.168212,0.425532,0.64568,1134.0,1132.0,3


Let's get the last 5 games performance and most recent performance for all the players

In [28]:
last_5_df = last_5_all(points_select)

last_5_df.head()

Unnamed: 0,name,id,team,position,mean_points,mean_minutes,mean_goals_scored,mean_assists,mean_clean_sheets,mean_goals_conceded,...,mean_creativity,mean_threat,mean_ict,mean_xg,mean_xa,mean_xgi,mean_xgc,mean_opp_att,mean_opp_def,latest_points
0,Cédric Alves Soares,1,Arsenal,DEF,0.4,5.6,0.0,0.0,0.0,0.0,...,0.32,0.0,0.08,0.0,0.001958,0.001958,0.0087,1134.0,1132.0,0
0,Rob Holding,5,Arsenal,DEF,0.4,0.8,0.0,0.0,0.0,0.0,...,0.0,0.0,0.02,0.0,0.0,0.0,0.0281,1134.0,1132.0,0
0,Kieran Tierney,8,Arsenal,DEF,0.8,25.6,0.0,0.0,0.0,0.6,...,3.04,0.8,0.74,0.0,0.010344,0.010344,0.36994,1134.0,1132.0,0
0,Benjamin White,10,Arsenal,DEF,4.4,83.8,0.0,0.0,0.6,0.2,...,16.18,3.6,3.34,0.05678,0.120242,0.177022,0.56658,1134.0,1132.0,7
0,Takehiro Tomiyasu,14,Arsenal,DEF,1.2,18.4,0.0,0.0,0.2,0.4,...,3.26,3.6,0.88,0.0157,0.007934,0.023634,0.23434,1134.0,1132.0,1


In [29]:
# save the data to csv
#last_5_df.to_csv('data/last_5_gw14-19.csv')

In [30]:
def last_5_player_all_gw(df, player_id):
    '''
    get the mean stats for a given player_id over every group of 5 fixtures
    and the latest points from the subsequent fixture. 
    
    assume dataframe is sorted from oldest to newest fixtures
    '''
    df = df[df['id_player'] == player_id]
    
    new_df = pd.DataFrame()
    
    # iterate over fixtures for player
    for n in range(0, len(df.index)-5):
        last_5 = df.iloc[n:(n + 5)]
        recent = df.iloc[[n+5]]
        
        d = {'name': last_5['full_name'].iloc[0],
         'id': last_5['id_player'].iloc[0],
         'team': last_5['team'].iloc[0],
        'position': last_5['position'].iloc[0],
        'mean_points': last_5['total_points'].mean(),
        'mean_minutes': last_5['minutes'].mean(),
        'mean_goals_scored': last_5['goals_scored'].mean(),
        'mean_assists': last_5['assists'].mean(),
        'mean_clean_sheets': last_5['clean_sheets'].mean(),
        'mean_goals_conceded': last_5['goals_conceded'].mean(),
        'mean_own_goals': last_5['own_goals'].mean(),
        'mean_saves': last_5['saves'].mean(),
        'mean_bonus': last_5['bonus'].mean(),
        'mean_bps': last_5['bps'].mean(),
        'mean_influence': last_5['influence'].mean(),
        'mean_creativity': last_5['creativity'].mean(),
        'mean_threat': last_5['threat'].mean(),
        'mean_ict': last_5['ict_index'].mean(),
        'mean_xg': last_5['expected_goals'].mean(),
        'mean_xa': last_5['expected_assists'].mean(),
        'mean_xgi': last_5['expected_goal_involvements'].mean(),
        'mean_xgc': last_5['expected_goals_conceded'].mean(),
        'mean_opp_att': last_5['opp_att_strength'].mean(),
        'mean_opp_def': last_5['opp_def_strength'].mean()}
        
        last_5_mean = pd.DataFrame(data = d, index = [0])
        
        last_5_mean['latest_points'] = recent['total_points'].iloc[0]
        
        new_df = pd.concat([new_df, last_5_mean])
        
    return new_df

In [31]:
# test id_player = 13
last_5_player_all_gw(points_select, 13)

Unnamed: 0,name,id,team,position,mean_points,mean_minutes,mean_goals_scored,mean_assists,mean_clean_sheets,mean_goals_conceded,...,mean_creativity,mean_threat,mean_ict,mean_xg,mean_xa,mean_xgi,mean_xgc,mean_opp_att,mean_opp_def,latest_points
0,Bukayo Saka,13,Arsenal,MID,4.2,87.4,0.0,0.6,0.4,0.8,...,29.9,32.2,7.8,0.24922,0.195924,0.445144,0.61692,1106.0,1098.0,7
0,Bukayo Saka,13,Arsenal,MID,4.4,87.4,0.2,0.4,0.2,1.4,...,32.08,35.4,9.0,0.27742,0.187782,0.465202,0.68492,1112.0,1116.0,9
0,Bukayo Saka,13,Arsenal,MID,5.8,88.8,0.2,0.8,0.4,1.0,...,31.66,37.4,9.72,0.3156,0.157748,0.473348,0.72534,1100.0,1118.0,2
0,Bukayo Saka,13,Arsenal,MID,5.6,89.4,0.2,0.8,0.2,1.2,...,39.48,42.6,11.2,0.36544,0.17145,0.53689,0.9886,1128.0,1152.0,15
0,Bukayo Saka,13,Arsenal,MID,7.6,89.4,0.6,0.6,0.2,1.4,...,35.14,53.0,12.68,0.67894,0.154774,0.833714,1.02448,1174.0,1198.0,9
0,Bukayo Saka,13,Arsenal,MID,8.4,88.2,0.8,0.4,0.4,1.2,...,30.26,47.0,11.86,0.60694,0.103704,0.710644,1.27774,1164.0,1204.0,1
0,Bukayo Saka,13,Arsenal,MID,7.2,88.2,0.6,0.4,0.4,0.8,...,27.16,38.8,10.1,0.54136,0.097278,0.638638,1.10212,1156.0,1186.0,4
0,Bukayo Saka,13,Arsenal,MID,6.2,75.4,0.6,0.2,0.2,0.8,...,26.96,34.2,9.2,0.52692,0.09476,0.62168,1.0042,1150.0,1180.0,5
0,Bukayo Saka,13,Arsenal,MID,6.8,75.4,0.6,0.4,0.4,0.6,...,21.86,28.6,8.18,0.49376,0.080496,0.574256,0.74766,1148.0,1172.0,3
0,Bukayo Saka,13,Arsenal,MID,4.4,75.4,0.2,0.4,0.6,0.2,...,30.6,10.0,5.92,0.0855,0.118818,0.204318,0.68108,1114.0,1116.0,9


In [32]:
def last_5_all_gw(df):
    ''' 
    get last mean stats for all players in df over 5 consecutive fixtures
    plus points from subsequent fixture across all fixtures.
    '''
    last_5_all = pd.DataFrame() # empty dataframe
    for p in df['id_player'].unique():
        player_df = last_5_player_all_gw(df, p)
        last_5_all = pd.concat([last_5_all, player_df])
    return last_5_all

In [33]:
last_5_df_all_gw = last_5_all_gw(points_select)

last_5_df_all_gw.head()

Unnamed: 0,name,id,team,position,mean_points,mean_minutes,mean_goals_scored,mean_assists,mean_clean_sheets,mean_goals_conceded,...,mean_creativity,mean_threat,mean_ict,mean_xg,mean_xa,mean_xgi,mean_xgc,mean_opp_att,mean_opp_def,latest_points
0,Cédric Alves Soares,1,Arsenal,DEF,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1106.0,1098.0,0
0,Cédric Alves Soares,1,Arsenal,DEF,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1112.0,1116.0,0
0,Cédric Alves Soares,1,Arsenal,DEF,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1100.0,1118.0,0
0,Cédric Alves Soares,1,Arsenal,DEF,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1128.0,1152.0,0
0,Cédric Alves Soares,1,Arsenal,DEF,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1174.0,1198.0,0


In [41]:
# save the data
last_5_df_all_gw.to_csv('Data/5_gw_perform_upto_gw21.csv')

Combining past data with newer data

In [None]:
# get the existing data
past_data = pd.read_csv("data/5_gw_perform_upto_gw19.csv")

# add the newest data to the past data
new_data = pd.concat([past_data, last_5_df], axis = 1)

# save the new data
new_data.to_csv("data/plyr_form_data_DATE.csv")