# Fantasy PL Player Performance
Code for getting data from FPL API: https://medium.com/analytics-vidhya/getting-started-with-fantasy-premier-league-data-56d3b9be8c32

## Get player data and gameweek data

Import packages

In [1]:
import pandas as pd
import re
import random
import requests

Get data from the FPL API

In [2]:
# set url for fantasy PL API
api_url = "https://fantasy.premierleague.com/api/bootstrap-static/"

# download the webpage
data = requests.get(api_url)

json = data.json()

json.keys()

dict_keys(['events', 'game_settings', 'phases', 'teams', 'total_players', 'elements', 'element_stats', 'element_types'])

Build a Pandas dataframe from the json data

In [3]:
# build a dataframe
players = pd.DataFrame(json['elements'])

players.columns

Index(['chance_of_playing_next_round', 'chance_of_playing_this_round', 'code',
       'cost_change_event', 'cost_change_event_fall', 'cost_change_start',
       'cost_change_start_fall', 'dreamteam_count', 'element_type', 'ep_next',
       'ep_this', 'event_points', 'first_name', 'form', 'id', 'in_dreamteam',
       'news', 'news_added', 'now_cost', 'photo', 'points_per_game',
       'second_name', 'selected_by_percent', 'special', 'squad_number',
       'status', 'team', 'team_code', 'total_points', 'transfers_in',
       'transfers_in_event', 'transfers_out', 'transfers_out_event',
       'value_form', 'value_season', 'web_name', 'minutes', 'goals_scored',
       'assists', 'clean_sheets', 'goals_conceded', 'own_goals',
       'penalties_saved', 'penalties_missed', 'yellow_cards', 'red_cards',
       'saves', 'bonus', 'bps', 'influence', 'creativity', 'threat',
       'ict_index', 'starts', 'expected_goals', 'expected_assists',
       'expected_goal_involvements', 'expected_goals_con

Cleaning up the player data and selecting columns (if desired)

In [4]:
#select only relevant columns from elements_df, not all needed at this point but maybe useful in the future
#players_df_select = elements_df[['first_name','second_name','team','element_type','selected_by_percent',
#                                'now_cost','minutes','transfers_in','value_season','total_points']]

# use all columns 
players_df_select = players

# combine first and last names to get player full names
players_df_select['full_name'] = players_df_select[['first_name', 'second_name']].agg(' '.join, axis=1)

# drop first and last name columns
players_df_select = players_df_select.drop(['first_name', 'second_name'], axis = 1)

# player prices are 10x the true value. Divide the prices by 10 to get the true values
players_df_select['now_cost'] = players_df_select['now_cost']/10

players_df_select.head()

Unnamed: 0,chance_of_playing_next_round,chance_of_playing_this_round,code,cost_change_event,cost_change_event_fall,cost_change_start,cost_change_start_fall,dreamteam_count,element_type,ep_next,...,now_cost_rank_type,form_rank,form_rank_type,points_per_game_rank,points_per_game_rank_type,selected_rank,selected_rank_type,starts_per_90,clean_sheets_per_90,full_name
0,,,84450,0,0,-1,1,1,3,6.8,...,131,12,5,63,26,116,40,1.06,0.4,Granit Xhaka
1,0.0,0.0,153256,0,0,-4,4,1,3,0.0,...,323,495,177,429,188,240,77,0.81,0.0,Mohamed Elneny
2,,,156074,0,0,-3,3,0,2,2.2,...,165,185,66,431,152,416,157,0.94,0.0,Rob Holding
3,100.0,100.0,167199,0,0,-3,3,0,3,2.2,...,187,188,79,155,62,276,91,1.03,0.45,Thomas Partey
4,100.0,100.0,184029,0,0,2,-2,4,3,3.8,...,24,79,39,12,6,8,4,1.05,0.4,Martin Ødegaard


Get team info

In [5]:
# get team info
teams = pd.DataFrame(json['teams'])

teams.head()

Unnamed: 0,code,draw,form,id,loss,name,played,points,position,short_name,...,team_division,unavailable,win,strength_overall_home,strength_overall_away,strength_attack_home,strength_attack_away,strength_defence_home,strength_defence_away,pulse_id
0,3,0,,1,0,Arsenal,0,0,0,ARS,...,,False,0,1245,1285,1250,1250,1240,1320,1
1,7,0,,2,0,Aston Villa,0,0,0,AVL,...,,False,0,1070,1100,1070,1075,1070,1130,2
2,91,0,,3,0,Bournemouth,0,0,0,BOU,...,,False,0,1035,1095,1020,1110,1050,1080,127
3,94,0,,4,0,Brentford,0,0,0,BRE,...,,False,0,1115,1180,1100,1160,1130,1200,130
4,36,0,,5,0,Brighton,0,0,0,BHA,...,,False,0,1170,1175,1140,1150,1200,1200,131


Get team defensive strength for each team

In [6]:
team_strength_def = teams[['id', 'name', 'strength_defence_away', 'strength_defence_home']]

team_strength_def

Unnamed: 0,id,name,strength_defence_away,strength_defence_home
0,1,Arsenal,1320,1240
1,2,Aston Villa,1130,1070
2,3,Bournemouth,1080,1050
3,4,Brentford,1200,1130
4,5,Brighton,1200,1200
5,6,Chelsea,1220,1140
6,7,Crystal Palace,1090,1060
7,8,Everton,1090,1040
8,9,Fulham,1140,1120
9,10,Leicester,1120,1200


Get team attack strength for each team

In [7]:
team_strength_att = teams[['id', 'name', 'strength_attack_away', 'strength_attack_home']]

team_strength_att

Unnamed: 0,id,name,strength_attack_away,strength_attack_home
0,1,Arsenal,1250,1250
1,2,Aston Villa,1075,1070
2,3,Bournemouth,1110,1020
3,4,Brentford,1160,1100
4,5,Brighton,1150,1140
5,6,Chelsea,1220,1190
6,7,Crystal Palace,1110,1110
7,8,Everton,1100,1070
8,9,Fulham,1065,1070
9,10,Leicester,1110,1065


Get position info

In [8]:
# get position information from 'element_types'
positions = pd.DataFrame(json['element_types'])

positions.head()

Unnamed: 0,id,plural_name,plural_name_short,singular_name,singular_name_short,squad_select,squad_min_play,squad_max_play,ui_shirt_specific,sub_positions_locked,element_count
0,1,Goalkeepers,GKP,Goalkeeper,GKP,2,1,1,True,[12],81
1,2,Defenders,DEF,Defender,DEF,5,3,5,False,[],257
2,3,Midfielders,MID,Midfielder,MID,5,2,5,False,[],324
3,4,Forwards,FWD,Forward,FWD,3,1,3,False,[],92


Merge the players with their teams

In [9]:
# merge player data with teams and positions
player_team_merge = pd.merge(
    left = players_df_select,
    right = teams,
    left_on = 'team',
    right_on = 'id'
)

player_team_merge[['full_name', 'name']].head()

Unnamed: 0,full_name,name
0,Granit Xhaka,Arsenal
1,Mohamed Elneny,Arsenal
2,Rob Holding,Arsenal
3,Thomas Partey,Arsenal
4,Martin Ødegaard,Arsenal


Merge the players with their positions

In [10]:
player_team_pos_merge = pd.merge(
    left = player_team_merge,
    right = positions,
    left_on = 'element_type',
    right_on = 'id'
)

player_team_pos_merge[['full_name', 'name', 'singular_name_short']].head()

Unnamed: 0,full_name,name,singular_name_short
0,Granit Xhaka,Arsenal,MID
1,Mohamed Elneny,Arsenal,MID
2,Thomas Partey,Arsenal,MID
3,Martin Ødegaard,Arsenal,MID
4,Nicolas Pépé,Arsenal,MID


In [11]:
# rename columns
player_team_pos_merge = player_team_pos_merge.rename(
    columns={'name':'team_name', 'singular_name_short':'position_name'}
)

player_team_pos_merge[['full_name', 'team_name', 'position_name']].head()

Unnamed: 0,full_name,team_name,position_name
0,Granit Xhaka,Arsenal,MID
1,Mohamed Elneny,Arsenal,MID
2,Thomas Partey,Arsenal,MID
3,Martin Ødegaard,Arsenal,MID
4,Nicolas Pépé,Arsenal,MID


Get player gameweek data from https://fantasy.premierleague.com/api/element-summary/

In [12]:
# function for getting specific player gameweek history
def get_history(player_id):
    ''' get all gameweek history for a given player'''
    
    # request data from API 
    data = requests.get("https://fantasy.premierleague.com/api/element-summary/" + str(player_id) + "/")
    json = data.json()
    
    # turn data into Pandas dataframe
    df = pd.DataFrame(json['history'])
    
    return df

get_history(13) #bukayo saka id = 13 

Unnamed: 0,element,fixture,opponent_team,total_points,was_home,kickoff_time,team_h_score,team_a_score,round,minutes,...,starts,expected_goals,expected_assists,expected_goal_involvements,expected_goals_conceded,value,transfers_balance,selected,transfers_in,transfers_out
0,13,1,7,6,False,2022-08-05T19:00:00Z,0,2,1,90,...,1,0.26,0.11,0.37,1.21,80,0,1772492,0,0
1,13,11,10,2,True,2022-08-13T14:00:00Z,4,2,2,83,...,1,0.04,0.29,0.33,0.33,80,-38146,1922750,93462,131608
2,13,21,3,3,False,2022-08-20T16:30:00Z,0,3,3,87,...,1,0.02,0.13,0.15,0.26,80,-310888,1658605,67472,378360
3,13,31,9,5,True,2022-08-27T16:30:00Z,2,1,4,90,...,1,0.47,0.11,0.58,0.83,79,-350292,1344008,28075,378367
4,13,41,2,5,True,2022-08-31T18:30:00Z,2,1,5,87,...,1,0.45,0.34,0.79,0.45,79,-182936,1189232,22544,205480
5,13,56,14,7,False,2022-09-04T15:30:00Z,3,1,6,90,...,1,0.4,0.07,0.47,1.55,78,-123281,1091602,34101,157382
6,13,72,4,9,False,2022-09-18T11:00:00Z,0,3,8,90,...,1,0.23,0.13,0.36,0.53,78,56733,1222425,108114,51381
7,13,81,18,2,True,2022-10-01T11:30:00Z,3,1,9,90,...,1,0.27,0.2,0.47,1.57,78,21882,1222918,96664,74782
8,13,91,12,15,True,2022-10-09T15:30:00Z,3,2,10,90,...,1,2.04,0.02,2.06,1.01,78,-84324,1146083,43669,127993
9,13,104,11,9,False,2022-10-16T13:00:00Z,0,1,11,81,...,1,0.09,0.09,0.18,1.72,78,257682,1437635,283723,26041


In [13]:
from tqdm.auto import tqdm
tqdm.pandas()

In [14]:
# join team name
players = players.merge(
    teams[['id', 'name']],
    left_on='team',
    right_on='id',
    suffixes=['_player', None]
).drop(['team', 'id'], axis=1)

# join player positions
players = players.merge(
    positions[['id', 'singular_name_short']],
    left_on='element_type',
    right_on='id'
).drop(['element_type', 'id'], axis=1)

# rename columns
players = players.rename(
    columns={'name':'team', 'singular_name_short':'position'}
)

players.head()


Unnamed: 0,chance_of_playing_next_round,chance_of_playing_this_round,code,cost_change_event,cost_change_event_fall,cost_change_start,cost_change_start_fall,dreamteam_count,ep_next,ep_this,...,form_rank_type,points_per_game_rank,points_per_game_rank_type,selected_rank,selected_rank_type,starts_per_90,clean_sheets_per_90,full_name,team,position
0,,,84450,0,0,-1,1,1,6.8,6.3,...,5,63,26,116,40,1.06,0.4,Granit Xhaka,Arsenal,MID
1,0.0,0.0,153256,0,0,-4,4,1,0.0,0.0,...,177,429,188,240,77,0.81,0.0,Mohamed Elneny,Arsenal,MID
2,100.0,100.0,167199,0,0,-3,3,0,2.2,1.7,...,79,155,62,276,91,1.03,0.45,Thomas Partey,Arsenal,MID
3,100.0,100.0,184029,0,0,2,-2,4,3.8,3.3,...,39,12,6,8,4,1.05,0.4,Martin Ødegaard,Arsenal,MID
4,0.0,0.0,195735,0,0,-2,2,0,0.0,0.0,...,307,745,320,436,156,0.0,0.0,Nicolas Pépé,Arsenal,MID


In [15]:
# get gameweek history for all players
points = players['id_player'].progress_apply(get_history)

# combine results into one dataframe
points = pd.concat(df for df in points)

# join full_name
points = players[['id_player', 'full_name', 'team', 'position']].merge(
    points,
    left_on='id_player',
    right_on='element'
)

  0%|          | 0/754 [00:00<?, ?it/s]

In [16]:
# merge opponent defensive strength
points = pd.merge(left = points,
                  right = team_strength_def[['id', 'strength_defence_away', 
                                             'strength_defence_home']],
                  how = 'left',
                  left_on = 'opponent_team',
                  right_on = 'id'
).drop(
    'id', axis = 1
).rename(
    columns={'strength_defence_away':'opp_def_strength_away', 'strength_defence_home':'opp_def_strength_home'}
)

points.head()

Unnamed: 0,id_player,full_name,team,position,element,fixture,opponent_team,total_points,was_home,kickoff_time,...,expected_assists,expected_goal_involvements,expected_goals_conceded,value,transfers_balance,selected,transfers_in,transfers_out,opp_def_strength_away,opp_def_strength_home
0,3,Granit Xhaka,Arsenal,MID,3,1,7,2,False,2022-08-05T19:00:00Z,...,0.06,0.06,1.21,50,0,48303,0,0,1090,1060
1,3,Granit Xhaka,Arsenal,MID,3,11,10,12,True,2022-08-13T14:00:00Z,...,0.1,0.48,0.46,50,-629,65418,9001,9630,1120,1200
2,3,Granit Xhaka,Arsenal,MID,3,21,3,6,False,2022-08-20T16:30:00Z,...,0.11,0.11,0.26,50,112040,216726,137326,25286,1080,1050
3,3,Granit Xhaka,Arsenal,MID,3,31,9,2,True,2022-08-27T16:30:00Z,...,0.07,0.16,0.83,50,42760,267951,77459,34699,1140,1120
4,3,Granit Xhaka,Arsenal,MID,3,41,2,2,True,2022-08-31T18:30:00Z,...,0.04,0.04,0.45,50,10781,288460,49435,38654,1130,1070


In [17]:
# assign correct home/away opponent defensive strength for each fixture
def opp_def_strength(row):
    if row['was_home'] == False:
        return row['opp_def_strength_home']
    elif row['was_home'] == True:
        return row['opp_def_strength_away']
    else:
        return "Unknown"

points['opp_def_strength'] = points.apply(lambda row: opp_def_strength(row), axis = 1)

points = points.drop(['opp_def_strength_home','opp_def_strength_away'], axis = 1)

points.head()

Unnamed: 0,id_player,full_name,team,position,element,fixture,opponent_team,total_points,was_home,kickoff_time,...,expected_goals,expected_assists,expected_goal_involvements,expected_goals_conceded,value,transfers_balance,selected,transfers_in,transfers_out,opp_def_strength
0,3,Granit Xhaka,Arsenal,MID,3,1,7,2,False,2022-08-05T19:00:00Z,...,0.0,0.06,0.06,1.21,50,0,48303,0,0,1060
1,3,Granit Xhaka,Arsenal,MID,3,11,10,12,True,2022-08-13T14:00:00Z,...,0.38,0.1,0.48,0.46,50,-629,65418,9001,9630,1120
2,3,Granit Xhaka,Arsenal,MID,3,21,3,6,False,2022-08-20T16:30:00Z,...,0.0,0.11,0.11,0.26,50,112040,216726,137326,25286,1050
3,3,Granit Xhaka,Arsenal,MID,3,31,9,2,True,2022-08-27T16:30:00Z,...,0.09,0.07,0.16,0.83,50,42760,267951,77459,34699,1140
4,3,Granit Xhaka,Arsenal,MID,3,41,2,2,True,2022-08-31T18:30:00Z,...,0.0,0.04,0.04,0.45,50,10781,288460,49435,38654,1130


In [18]:
# merge opponent attack strength
points = pd.merge(left = points,
                  right = team_strength_att[['id', 'strength_attack_away', 
                                             'strength_attack_home']],
                  how = 'left',
                  left_on = 'opponent_team',
                  right_on = 'id'
).drop(
    'id', axis = 1
).rename(
    columns={'strength_attack_away':'opp_att_strength_away', 
             'strength_attack_home':'opp_att_strength_home'}
)

points.head()

Unnamed: 0,id_player,full_name,team,position,element,fixture,opponent_team,total_points,was_home,kickoff_time,...,expected_goal_involvements,expected_goals_conceded,value,transfers_balance,selected,transfers_in,transfers_out,opp_def_strength,opp_att_strength_away,opp_att_strength_home
0,3,Granit Xhaka,Arsenal,MID,3,1,7,2,False,2022-08-05T19:00:00Z,...,0.06,1.21,50,0,48303,0,0,1060,1110,1110
1,3,Granit Xhaka,Arsenal,MID,3,11,10,12,True,2022-08-13T14:00:00Z,...,0.48,0.46,50,-629,65418,9001,9630,1120,1110,1065
2,3,Granit Xhaka,Arsenal,MID,3,21,3,6,False,2022-08-20T16:30:00Z,...,0.11,0.26,50,112040,216726,137326,25286,1050,1110,1020
3,3,Granit Xhaka,Arsenal,MID,3,31,9,2,True,2022-08-27T16:30:00Z,...,0.16,0.83,50,42760,267951,77459,34699,1140,1065,1070
4,3,Granit Xhaka,Arsenal,MID,3,41,2,2,True,2022-08-31T18:30:00Z,...,0.04,0.45,50,10781,288460,49435,38654,1130,1075,1070


In [19]:
# assign correct home/away opponent attack strength for each fixture
def opp_att_strength(row):
    if row['was_home'] == False:
        return row['opp_att_strength_home']
    elif row['was_home'] == True:
        return row['opp_att_strength_away']
    else:
        return "Unknown"

points['opp_att_strength'] = points.apply(lambda row: opp_att_strength(row), axis = 1)

points = points.drop(['opp_att_strength_home','opp_att_strength_away'], axis = 1)

points.head()

Unnamed: 0,id_player,full_name,team,position,element,fixture,opponent_team,total_points,was_home,kickoff_time,...,expected_assists,expected_goal_involvements,expected_goals_conceded,value,transfers_balance,selected,transfers_in,transfers_out,opp_def_strength,opp_att_strength
0,3,Granit Xhaka,Arsenal,MID,3,1,7,2,False,2022-08-05T19:00:00Z,...,0.06,0.06,1.21,50,0,48303,0,0,1060,1110
1,3,Granit Xhaka,Arsenal,MID,3,11,10,12,True,2022-08-13T14:00:00Z,...,0.1,0.48,0.46,50,-629,65418,9001,9630,1120,1110
2,3,Granit Xhaka,Arsenal,MID,3,21,3,6,False,2022-08-20T16:30:00Z,...,0.11,0.11,0.26,50,112040,216726,137326,25286,1050,1020
3,3,Granit Xhaka,Arsenal,MID,3,31,9,2,True,2022-08-27T16:30:00Z,...,0.07,0.16,0.83,50,42760,267951,77459,34699,1140,1065
4,3,Granit Xhaka,Arsenal,MID,3,41,2,2,True,2022-08-31T18:30:00Z,...,0.04,0.04,0.45,50,10781,288460,49435,38654,1130,1075


In [20]:
# save the dataframe to csv
points.to_csv('data/points_20230413.csv')

Let's try to compare performance over the past 5 matches with performance in the most recent match

For a given player we need to:
- get the last 6 results from the points dataframe
- take the average of each performance stat over the first 5/6 observations
- see if any performance stats predict performance in the 6th fixture

The analysis will be different for players in different positions. Attacking players get most of their points from goal involvements, while defenders and goalkeepers get more points from clean sheets.

By running this method after each gameweek we can build a larger dataset over the course of the season.

In [50]:
# import test data
points = pd.read_csv('data/points_20230413.csv')

In [51]:
# select columns of interest
points_select = points[['id_player', 'full_name', 'team', 'position',
                        'total_points',
                        'minutes', 'goals_scored', 'assists', 'clean_sheets', 
                        'goals_conceded', 'own_goals',
                        'saves', 'bonus', 'bps', 'influence', 'creativity', 'threat', 'ict_index',
                        'expected_goals', 'expected_assists', 'expected_goal_involvements', 
                        'expected_goals_conceded', 'opp_att_strength', 'opp_def_strength']]

In [52]:
points_13 = points_select[points_select['id_player'] == 13]

points_13

Unnamed: 0,id_player,full_name,team,position,total_points,minutes,goals_scored,assists,clean_sheets,goals_conceded,...,influence,creativity,threat,ict_index,expected_goals,expected_assists,expected_goal_involvements,expected_goals_conceded,opp_att_strength,opp_def_strength
180,13,Bukayo Saka,Arsenal,MID,6,90,0,1,1,0,...,1.8,7.6,36.0,4.5,0.26,0.11,0.37,1.21,1110,1060
181,13,Bukayo Saka,Arsenal,MID,2,83,0,0,0,2,...,13.8,43.9,23.0,8.1,0.04,0.29,0.33,0.33,1110,1120
182,13,Bukayo Saka,Arsenal,MID,3,87,0,0,1,0,...,12.0,29.9,8.0,5.0,0.02,0.13,0.15,0.26,1020,1050
183,13,Bukayo Saka,Arsenal,MID,5,90,0,1,0,1,...,28.0,25.4,45.0,9.8,0.47,0.11,0.58,0.83,1065,1140
184,13,Bukayo Saka,Arsenal,MID,5,87,0,1,0,1,...,24.6,42.7,49.0,11.6,0.45,0.34,0.79,0.45,1075,1130
185,13,Bukayo Saka,Arsenal,MID,7,90,1,0,0,3,...,34.2,18.5,52.0,10.5,0.4,0.07,0.47,1.55,1140,1165
186,13,Bukayo Saka,Arsenal,MID,9,90,0,2,1,0,...,42.6,41.8,33.0,11.7,0.23,0.13,0.36,0.53,1100,1130
187,13,Bukayo Saka,Arsenal,MID,2,90,0,0,0,1,...,21.2,69.0,34.0,12.4,0.27,0.2,0.47,1.57,1210,1230
188,13,Bukayo Saka,Arsenal,MID,15,90,2,0,0,2,...,70.8,3.7,97.0,17.2,2.04,0.02,2.06,1.01,1250,1300
189,13,Bukayo Saka,Arsenal,MID,9,81,1,0,1,0,...,37.4,18.3,19.0,7.5,0.09,0.09,0.18,1.72,1070,1130


In [53]:
def last_5_player(df, player_id):
    ''' 
    get the mean stats for a given player_id over the last 5 fixtures
    prior to most recent fixture and the total points from the most 
    recent fixture. 
    
    assume dataframe is sorted from oldest to newest fixtures
    '''
    df = df[df['id_player'] == player_id]
    
    last_6 = df.tail(6) 
    
    last_5 = last_6.head(5)
    
    recent = last_6['total_points'].tail(1)
    
    d = {'name': last_5['full_name'].iloc[0],
         'id': last_5['id_player'].iloc[0],
         'team': last_5['team'].iloc[0],
        'position': last_5['position'].iloc[0],
        'mean_points': last_5['total_points'].mean(),
        'mean_minutes': last_5['minutes'].mean(),
        'mean_goals_scored': last_5['goals_scored'].mean(),
        'mean_assists': last_5['assists'].mean(),
        'mean_clean_sheets': last_5['clean_sheets'].mean(),
        'mean_goals_conceded': last_5['goals_conceded'].mean(),
        'mean_own_goals': last_5['own_goals'].mean(),
        'mean_saves': last_5['saves'].mean(),
        'mean_bonus': last_5['bonus'].mean(),
        'mean_bps': last_5['bps'].mean(),
        'mean_influence': last_5['influence'].mean(),
        'mean_creativity': last_5['creativity'].mean(),
        'mean_threat': last_5['threat'].mean(),
        'mean_ict': last_5['ict_index'].mean(),
        'mean_xg': last_5['expected_goals'].mean(),
        'mean_xa': last_5['expected_assists'].mean(),
        'mean_xgi': last_5['expected_goal_involvements'].mean(),
        'mean_xgc': last_5['expected_goals_conceded'].mean(),
        'mean_opp_att': last_5['opp_att_strength'].mean(),
        'mean_opp_def': last_5['opp_def_strength'].mean()}
    
    last_5_mean = pd.DataFrame(data = d, index = [0])
    
    last_5_mean['latest_points'] = recent.iloc[0]
    
    return last_5_mean

# test for Bukayo Saka (id = 13)
last_5_player(points_select, 13)

Unnamed: 0,name,id,team,position,mean_points,mean_minutes,mean_goals_scored,mean_assists,mean_clean_sheets,mean_goals_conceded,...,mean_creativity,mean_threat,mean_ict,mean_xg,mean_xa,mean_xgi,mean_xgc,mean_opp_att,mean_opp_def,latest_points
0,Bukayo Saka,13,Arsenal,MID,7.2,72.4,0.6,0.4,0.4,0.8,...,26.9,36.4,9.8,0.268,0.132,0.4,0.758,1093.0,1100.0,1


In [54]:
len(points_13.index) - 5

25

In [55]:
def last_5_all(df):
    ''' get last mean stats for all players in df over the last 5 fixtures
    prior to most recent fixture and the total points from the most 
    recent fixture.
    '''
    last_5_all = pd.DataFrame() # empty dataframe
    for p in df['id_player'].unique():
        player_df = last_5_player(df, p)
        last_5_all = pd.concat([last_5_all, player_df])
    return last_5_all

In [56]:
# testing
subset = points_select[points_select['id_player'].isin([12, 13])]

last_5_all(subset)

Unnamed: 0,name,id,team,position,mean_points,mean_minutes,mean_goals_scored,mean_assists,mean_clean_sheets,mean_goals_conceded,...,mean_creativity,mean_threat,mean_ict,mean_xg,mean_xa,mean_xgi,mean_xgc,mean_opp_att,mean_opp_def,latest_points
0,Emile Smith Rowe,12,Arsenal,MID,1.4,13.2,0.0,0.2,0.0,0.2,...,5.36,4.2,1.36,0.032,0.064,0.096,0.212,1093.0,1100.0,0
0,Bukayo Saka,13,Arsenal,MID,7.2,72.4,0.6,0.4,0.4,0.8,...,26.9,36.4,9.8,0.268,0.132,0.4,0.758,1093.0,1100.0,1


Let's get the last 5 games performance and most recent performance for all the players

In [57]:
last_5_df = last_5_all(points_select)

last_5_df.head()

Unnamed: 0,name,id,team,position,mean_points,mean_minutes,mean_goals_scored,mean_assists,mean_clean_sheets,mean_goals_conceded,...,mean_creativity,mean_threat,mean_ict,mean_xg,mean_xa,mean_xgi,mean_xgc,mean_opp_att,mean_opp_def,latest_points
0,Granit Xhaka,3,Arsenal,MID,5.0,68.4,0.4,0.0,0.4,0.4,...,11.72,15.4,4.78,0.136,0.084,0.22,0.592,1093.0,1100.0,1
0,Mohamed Elneny,4,Arsenal,MID,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1093.0,1100.0,0
0,Thomas Partey,6,Arsenal,MID,3.0,74.8,0.2,0.0,0.2,0.6,...,13.26,8.8,4.28,0.148,0.144,0.292,0.75,1093.0,1100.0,2
0,Martin Ødegaard,7,Arsenal,MID,5.0,88.8,0.4,0.2,0.4,0.8,...,33.64,25.4,8.96,0.332,0.246,0.578,0.98,1093.0,1100.0,2
0,Nicolas Pépé,9,Arsenal,MID,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1093.0,1100.0,0


In [58]:
# save the data to csv
#last_5_df.to_csv('data/last_5_gw14-19.csv')

In [59]:
def last_5_player_all_gw(df, player_id):
    '''
    get the mean stats for a given player_id over every group of 5 fixtures
    and the latest points from the subsequent fixture. 
    
    assume dataframe is sorted from oldest to newest fixtures
    '''
    df = df[df['id_player'] == player_id]
    
    new_df = pd.DataFrame()
    
    # iterate over fixtures for player
    for n in range(0, len(df.index)-5):
        last_5 = df.iloc[n:(n + 5)]
        recent = df.iloc[[n+5]]
        
        d = {'name': last_5['full_name'].iloc[0],
         'id': last_5['id_player'].iloc[0],
         'team': last_5['team'].iloc[0],
        'position': last_5['position'].iloc[0],
        'mean_points': last_5['total_points'].mean(),
        'mean_minutes': last_5['minutes'].mean(),
        'mean_goals_scored': last_5['goals_scored'].mean(),
        'mean_assists': last_5['assists'].mean(),
        'mean_clean_sheets': last_5['clean_sheets'].mean(),
        'mean_goals_conceded': last_5['goals_conceded'].mean(),
        'mean_own_goals': last_5['own_goals'].mean(),
        'mean_saves': last_5['saves'].mean(),
        'mean_bonus': last_5['bonus'].mean(),
        'mean_bps': last_5['bps'].mean(),
        'mean_influence': last_5['influence'].mean(),
        'mean_creativity': last_5['creativity'].mean(),
        'mean_threat': last_5['threat'].mean(),
        'mean_ict': last_5['ict_index'].mean(),
        'mean_xg': last_5['expected_goals'].mean(),
        'mean_xa': last_5['expected_assists'].mean(),
        'mean_xgi': last_5['expected_goal_involvements'].mean(),
        'mean_xgc': last_5['expected_goals_conceded'].mean(),
        'mean_opp_att': last_5['opp_att_strength'].mean(),
        'mean_opp_def': last_5['opp_def_strength'].mean()}
        
        last_5_mean = pd.DataFrame(data = d, index = [0])
        
        last_5_mean['latest_points'] = recent['total_points'].iloc[0]
        
        new_df = pd.concat([new_df, last_5_mean])
        
    return new_df

In [60]:
# test id_player = 13
last_5_player_all_gw(points_select, 13)

Unnamed: 0,name,id,team,position,mean_points,mean_minutes,mean_goals_scored,mean_assists,mean_clean_sheets,mean_goals_conceded,...,mean_creativity,mean_threat,mean_ict,mean_xg,mean_xa,mean_xgi,mean_xgc,mean_opp_att,mean_opp_def,latest_points
0,Bukayo Saka,13,Arsenal,MID,4.2,87.4,0.0,0.6,0.4,0.8,...,29.9,32.2,7.8,0.248,0.196,0.444,0.616,1076.0,1100.0,7
0,Bukayo Saka,13,Arsenal,MID,4.4,87.4,0.2,0.4,0.2,1.4,...,32.08,35.4,9.0,0.276,0.188,0.464,0.684,1082.0,1121.0,9
0,Bukayo Saka,13,Arsenal,MID,5.8,88.8,0.2,0.8,0.4,1.0,...,31.66,37.4,9.72,0.314,0.156,0.47,0.724,1080.0,1123.0,2
0,Bukayo Saka,13,Arsenal,MID,5.6,89.4,0.2,0.8,0.2,1.2,...,39.48,42.6,11.2,0.364,0.17,0.534,0.986,1118.0,1159.0,15
0,Bukayo Saka,13,Arsenal,MID,7.6,89.4,0.6,0.6,0.2,1.4,...,35.14,53.0,12.68,0.678,0.152,0.83,1.022,1155.0,1191.0,9
0,Bukayo Saka,13,Arsenal,MID,8.4,88.2,0.8,0.4,0.4,1.2,...,30.26,47.0,11.86,0.606,0.102,0.708,1.276,1154.0,1191.0,1
0,Bukayo Saka,13,Arsenal,MID,7.2,88.2,0.6,0.4,0.4,0.8,...,27.16,38.8,10.1,0.54,0.096,0.636,1.1,1140.0,1170.0,4
0,Bukayo Saka,13,Arsenal,MID,6.2,75.4,0.6,0.2,0.2,0.8,...,26.96,34.2,9.2,0.526,0.094,0.62,1.002,1140.0,1165.0,5
0,Bukayo Saka,13,Arsenal,MID,6.8,75.4,0.6,0.4,0.4,0.6,...,21.86,28.6,8.18,0.494,0.08,0.574,0.746,1136.0,1147.0,3
0,Bukayo Saka,13,Arsenal,MID,4.4,75.4,0.2,0.4,0.6,0.2,...,30.6,10.0,5.92,0.086,0.12,0.206,0.68,1106.0,1099.0,9


In [61]:
def last_5_all_gw(df):
    ''' 
    get last mean stats for all players in df over 5 consecutive fixtures
    plus points from subsequent fixture across all fixtures.
    '''
    last_5_all = pd.DataFrame() # empty dataframe
    for p in df['id_player'].unique():
        player_df = last_5_player_all_gw(df, p)
        last_5_all = pd.concat([last_5_all, player_df])
    return last_5_all

In [62]:
last_5_df_all_gw = last_5_all_gw(points_select)

last_5_df_all_gw.head()

Unnamed: 0,name,id,team,position,mean_points,mean_minutes,mean_goals_scored,mean_assists,mean_clean_sheets,mean_goals_conceded,...,mean_creativity,mean_threat,mean_ict,mean_xg,mean_xa,mean_xgi,mean_xgc,mean_opp_att,mean_opp_def,latest_points
0,Granit Xhaka,3,Arsenal,MID,4.8,89.4,0.2,0.4,0.4,0.8,...,24.9,11.2,5.98,0.094,0.076,0.17,0.642,1076.0,1100.0,2
0,Granit Xhaka,3,Arsenal,MID,4.8,89.4,0.2,0.4,0.2,1.4,...,24.88,20.0,6.82,0.126,0.09,0.216,0.71,1082.0,1121.0,5
0,Granit Xhaka,3,Arsenal,MID,3.4,89.4,0.0,0.4,0.4,1.0,...,27.02,15.0,5.94,0.058,0.12,0.178,0.724,1080.0,1123.0,9
0,Granit Xhaka,3,Arsenal,MID,4.0,90.0,0.2,0.2,0.2,1.2,...,24.26,20.0,6.36,0.124,0.132,0.256,0.986,1118.0,1159.0,2
0,Granit Xhaka,3,Arsenal,MID,4.0,90.0,0.2,0.2,0.2,1.4,...,20.72,21.2,6.3,0.122,0.168,0.29,1.046,1155.0,1191.0,3


In [63]:
# save the data
last_5_df_all_gw.to_csv('Data/5_gw_perform_upto_gw30.csv')

Combining past data with newer data

In [65]:
# get the existing data
past_data = pd.read_csv("data/5_gw_perform_upto_gw30.csv")

# add the newest data to the past data
new_data = pd.concat([past_data, last_5_df], axis = 1)

# save the new data
new_data.to_csv("data/plyr_form_data_20230413.csv")

InvalidIndexError: Reindexing only valid with uniquely valued Index objects