In [1]:
import pandas as pd
import numpy as np
import datetime as dt

In [2]:
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 100)


from IPython.core.display import display, HTML
display(HTML("<style>.container { width:80% !important; }</style>"))

# Features overview
## Feature long list catagories:

>### Player details (as of most recent week)
- ICT scores
- Selected (by other FPL players)
- Player value


>### Details of fixture - FIX
- Relative strength of teams
    - Position in league
    - Team strength stats in teams csv
- Home or away
- Forecasted minutes played

>### Player's recent performance (LW = last week)
- Points
- Goals
- Bonus points
- Red/yellow cards
- Own goals
- Saves (GK only)
- Goals conceded (GK, Def)
- Clean sheets (GK, Def, MF)
- 

>### Player's recent performance (L4W = last 4 weeks)
- Average (and stdev) of 
    - Points
    - Goals
    - Bonus points
    - Red/yellow cards
    - Own goals
    - Saves (GK only)
    - Goals conceded (GK, Def)
    - Clean sheets (GK, Def, MF)


>### Player's performance across the season (TSS = This season)
- Average (and stdev) of 
    - Points
    - Goals
    - Bonus points
    - Red/yellow cards
    - Own goals
    - Saves (GK only)
    - Goals conceded (GK, Def)
    - Clean sheets (GK, Def, MF)



>### Player's historic performance (LSS = last season)
- Average (and stdev) of 
    - Points
    - Goals
    - Bonus points
    - Red/yellow cards
    - Own goals
    - Saves (GK only)
    - Goals conceded (GK, Def)
    - Clean sheets (GK, Def, MF)

## Target
- Total points


# Data addresses

In [3]:
this_season_folder = "/Users/calumthompson/Documents/Fantasy football/GitHub_data/data/2019-20/"
last_season_folder = "/Users/calumthompson/Documents/Fantasy football/GitHub_data/data/2018-19/"

In [4]:
team_link = "teams.csv"

In [5]:
players_raw = "players_raw.csv"

In [6]:
fixtures_link = 'https://fixturedownload.com/download/epl-2019-GMTStandardTime.csv'

In [7]:
table_link = "https://www.footballwebpages.co.uk/league-table.csv?comp=1&showHa=yes"

# 1. Create fixtures data for each player 
## 1.1. The spine of the data is the fixtures list

In [44]:
Date_format = lambda x: pd.datetime.strptime(x, "%d/%m/%Y %H:%M")
fixtures_RAW = pd.read_csv(fixtures_link,parse_dates = ['Date'], date_parser = Date_format)
fixtures_RAW.head()

Unnamed: 0,Round Number,Date,Location,Home Team,Away Team,Result
0,1,2019-08-09 20:00:00,Anfield,Liverpool,Norwich,4 - 1
1,1,2019-08-10 12:30:00,London Stadium,West Ham,Man City,0 - 5
2,1,2019-08-10 15:00:00,Vitality Stadium,Bournemouth,Sheffield Utd,1 - 1
3,1,2019-08-10 15:00:00,Turf Moor,Burnley,Southampton,3 - 0
4,1,2019-08-10 15:00:00,Selhurst Park,Crystal Palace,Everton,0 - 0


In [45]:
fixtures_RAW[['home_score' , 'away_score']] = fixtures_RAW['Result'].str.split(' - ', expand = True)
teams = fixtures_RAW['Home Team'].unique()

merge = []

for team in teams:
        
    team_record = fixtures_RAW.loc[(fixtures_RAW['Home Team'] == team) | (fixtures_RAW['Away Team'] == team)]

    col_team = np.full(38, team)
    col_GWs = np.arange(1,39)
    col_opponent = np.where(team_record['Home Team'] == team, team_record['Away Team'],team_record['Home Team'])
    col_home = np.where((team_record['Home Team'] == team), 1 , 0)
    col_date = team_record['Date'].dt.date
    col_goals = np.where((team_record['Home Team'] == team), team_record['home_score'], team_record['away_score']) 
    col_opp_goals = np.where((team_record['Home Team'] == team), team_record['away_score'], team_record['home_score'])

    col_points = np.where(col_goals > col_opp_goals, 3, 
                                                         np.where(col_goals == col_opp_goals, 1, 0))

    team_record = pd.DataFrame({'Team':col_team,'GW':col_GWs,'Fixture_date':col_date
                                ,'Opponent':col_opponent,'FIX_Home?':col_home
                                ,'FIX_team_goals' : col_goals, 'FIX_opp_goals' : col_opp_goals, 'FIX_points_gained' : col_points })
    
    merge.append(team_record)
    
fixtures_df = pd.concat(merge).reset_index(drop = True)    
    

In [46]:
fixtures_df['FIX_goals_last_week'] = fixtures_df.groupby('Team')['FIX_team_goals'].shift(periods = 1 )
fixtures_df['FIX_cumulative_goals_incoming'] = fixtures_df.groupby('Team')['FIX_goals_last_week'].rolling(38, min_periods = 1).sum().reset_index(level=['Team'], drop = True)

fixtures_df['FIX_points_last_week'] = fixtures_df.groupby('Team')['FIX_points_gained'].shift(periods = 1 )
fixtures_df['FIX_cumulative_points_incoming'] = fixtures_df.groupby('Team')['FIX_points_last_week'].rolling(38, min_periods = 1).sum().reset_index(level=['Team'], drop = True)




In [48]:
fixtures_df.head(10)

Unnamed: 0,Team,GW,Fixture_date,Opponent,FIX_Home?,FIX_team_goals,FIX_opp_goals,FIX_points_gained,FIX_goals_last_week,FIX_cumulative_goals_incoming,FIX_points_last_week,FIX_cumulative_points_incoming
0,Liverpool,1,2019-08-09,Norwich,1,4,1,3,,,,
1,Liverpool,2,2019-08-17,Southampton,0,2,1,3,4.0,4.0,3.0,3.0
2,Liverpool,3,2019-08-24,Arsenal,1,3,1,3,2.0,6.0,3.0,6.0
3,Liverpool,4,2019-08-31,Burnley,0,3,0,3,3.0,9.0,3.0,9.0
4,Liverpool,5,2019-09-14,Newcastle,1,3,1,3,3.0,12.0,3.0,12.0
5,Liverpool,6,2019-09-22,Chelsea,0,2,1,3,3.0,15.0,3.0,15.0
6,Liverpool,7,2019-09-28,Sheffield Utd,0,1,0,3,2.0,17.0,3.0,18.0
7,Liverpool,8,2019-10-05,Leicester,1,2,1,3,1.0,18.0,3.0,21.0
8,Liverpool,9,2019-10-20,Man Utd,0,1,1,1,2.0,20.0,3.0,24.0
9,Liverpool,10,2019-10-27,Spurs,1,2,1,3,1.0,21.0,1.0,25.0


## 1.2. Map to Vaastav's team ids 

In [11]:
Vaastav_teams = pd.read_csv(this_season_folder+team_link, usecols = ['id','name'])
Team_keys = dict(zip(Vaastav_teams['name'], Vaastav_teams['id']))

In [12]:
fixtures_df['Team_id'] = fixtures_df['Team'].map(Team_keys)
fixtures_df['Opponent_id'] = fixtures_df['Opponent'].map(Team_keys)
fixtures_df.head()

Unnamed: 0,Team,GW,Fixture_date,Opponent,Home?,Goals,opp_goals,points_marg,Team_id,Opponent_id
0,Liverpool,1,2019-08-09,Norwich,1,4,1,3,10,14
1,Liverpool,2,2019-08-17,Southampton,0,2,1,3,10,16
2,Liverpool,3,2019-08-24,Arsenal,1,3,1,3,10,1
3,Liverpool,4,2019-08-31,Burnley,0,3,0,3,10,5
4,Liverpool,5,2019-09-14,Newcastle,1,3,1,3,10,13


In [13]:
# fixtures_df.loc[fixtures_df['Team'] == 'Wolves']

## 1.3. Match players to team
As players do not have a team column, join on opponent at given date

In [14]:
merged_gw_cols = pd.read_csv(this_season_folder + "gws/merged_gw.csv", parse_dates = ['kickoff_time'])
merged_gw_cols['Fixture_date'] = merged_gw_cols['kickoff_time'].dt.date 
merged_gw_cols = merged_gw_cols.sort_values(['Fixture_date','name']) 
merged_gw_cols.head()


Unnamed: 0,name,assists,bonus,bps,clean_sheets,creativity,element,fixture,goals_conceded,goals_scored,ict_index,influence,kickoff_time,minutes,opponent_team,own_goals,penalties_missed,penalties_saved,red_cards,round,saves,selected,team_a_score,team_h_score,threat,total_points,transfers_balance,transfers_in,transfers_out,value,was_home,yellow_cards,GW,Fixture_date
7,Adam_Lallana_195,0,0,0,0,0.0,195,1,0,0,0.0,0.0,2019-08-09 19:00:00+00:00,0,14,0,0,0,0,1,0,7143,1.0,4.0,0.0,0,0,0,0,60,True,0,1,2019-08-09
13,Adrián_San Miguel del Castillo_526,0,0,7,0,0.0,526,1,1,0,1.1,10.6,2019-08-09 19:00:00+00:00,51,14,0,0,0,0,1,2,0,1.0,4.0,0.0,1,0,0,0,45,True,0,1,2019-08-09
23,Alex_Oxlade-Chamberlain_193,0,0,0,0,0.0,193,1,0,0,0.0,0.0,2019-08-09 19:00:00+00:00,0,14,0,0,0,0,1,0,28598,1.0,4.0,0.0,0,0,0,0,65,True,0,1,2019-08-09
17,Alexander_Tettey_289,0,0,0,0,0.0,289,1,0,0,0.0,0.0,2019-08-09 19:00:00+00:00,0,10,0,0,0,0,1,0,10849,1.0,4.0,0.0,0,0,0,0,45,False,0,1,2019-08-09
25,Alisson_Ramses Becker_189,0,0,7,0,0.0,189,1,0,0,0.6,5.6,2019-08-09 19:00:00+00:00,38,14,0,0,0,0,1,2,1769075,1.0,4.0,0.0,1,0,0,0,60,True,0,1,2019-08-09


In [15]:
players_df = merged_gw_cols[['name','opponent_team', 'minutes', 'Fixture_date','total_points']]
players_df = players_df.rename(columns  = {'opponent_team' : 'Opponent_id'})
players_df.head()

Unnamed: 0,name,Opponent_id,minutes,Fixture_date,total_points
7,Adam_Lallana_195,14,0,2019-08-09,0
13,Adrián_San Miguel del Castillo_526,14,51,2019-08-09,1
23,Alex_Oxlade-Chamberlain_193,14,0,2019-08-09,0
17,Alexander_Tettey_289,10,0,2019-08-09,0
25,Alisson_Ramses Becker_189,14,38,2019-08-09,1


In [16]:
dataset_to_date = pd.merge(fixtures_df,players_df, on = ['Opponent_id','Fixture_date'], how = 'left')
# dataset_to_date = dataset_to_date[['name','Fixture_date', 'GW','Team','Team_id','Opponent','Opponent_id','Home?', 'minutes','total_points']]
dataset_to_date.head()

Unnamed: 0,Team,GW,Fixture_date,Opponent,Home?,Goals,opp_goals,points_marg,Team_id,Opponent_id,name,minutes,total_points
0,Liverpool,1,2019-08-09,Norwich,1,4,1,3,10,14,Adam_Lallana_195,0.0,0.0
1,Liverpool,1,2019-08-09,Norwich,1,4,1,3,10,14,Adrián_San Miguel del Castillo_526,51.0,1.0
2,Liverpool,1,2019-08-09,Norwich,1,4,1,3,10,14,Alex_Oxlade-Chamberlain_193,0.0,0.0
3,Liverpool,1,2019-08-09,Norwich,1,4,1,3,10,14,Alisson_Ramses Becker_189,38.0,1.0
4,Liverpool,1,2019-08-09,Norwich,1,4,1,3,10,14,Andrew_Robertson_181,90.0,2.0


## 1.4. Downfill players for weeks that have yet to be completed

In [17]:
downfill_merge = []

for team in dataset_to_date['Team'].unique():
    team_df = dataset_to_date.loc[dataset_to_date['Team'] == team]

    team_players = team_df.loc[team_df['Fixture_date'] < dt.date.today()]
    team_players = team_players.loc[team_players['Fixture_date'] == team_players['Fixture_date'].max()]

    games_to_play = team_df.loc[team_df['Fixture_date'] >= dt.date.today()]['Fixture_date'].to_list()

    merge = []

    for i in games_to_play:

        names_col = team_players['name'].unique()
        downfill_df = pd.DataFrame({'Team': team, 'Fixture_date': i, 'name': names_col}).dropna()
        merge.append(downfill_df)

    team_downfill = pd.concat(merge).reset_index(drop = True)    

    downfill_merge.append(team_downfill)
    
downfill_df = pd.concat(downfill_merge)
downfill_df.head()

Unnamed: 0,Team,Fixture_date,name
0,Liverpool,2020-06-21,Adam_Lallana_195
1,Liverpool,2020-06-21,Adrián_San Miguel del Castillo_526
2,Liverpool,2020-06-21,Alex_Oxlade-Chamberlain_193
3,Liverpool,2020-06-21,Alisson_Ramses Becker_189
4,Liverpool,2020-06-21,Andrew_Robertson_181


In [18]:
player_fixtures_df= pd.merge(dataset_to_date,downfill_df, how = 'left',on = ['Team','Fixture_date'])
player_fixtures_df['name'] = np.where(player_fixtures_df['name_x'].isna(), player_fixtures_df['name_y'],player_fixtures_df['name_x']  )
player_fixtures_df = player_fixtures_df.drop(columns = ['name_x','name_y'])
player_fixtures_df['forecast'] = np.where(player_fixtures_df['Fixture_date'] < dt.date.today(), 0 , 1)
player_fixtures_df['player_id'] = player_fixtures_df['name'].str.extract('(\d+)').astype(int)
player_fixtures_df.head()

Unnamed: 0,Team,GW,Fixture_date,Opponent,Home?,Goals,opp_goals,points_marg,Team_id,Opponent_id,minutes,total_points,name,forecast,player_id
0,Liverpool,1,2019-08-09,Norwich,1,4,1,3,10,14,0.0,0.0,Adam_Lallana_195,0,195
1,Liverpool,1,2019-08-09,Norwich,1,4,1,3,10,14,51.0,1.0,Adrián_San Miguel del Castillo_526,0,526
2,Liverpool,1,2019-08-09,Norwich,1,4,1,3,10,14,0.0,0.0,Alex_Oxlade-Chamberlain_193,0,193
3,Liverpool,1,2019-08-09,Norwich,1,4,1,3,10,14,38.0,1.0,Alisson_Ramses Becker_189,0,189
4,Liverpool,1,2019-08-09,Norwich,1,4,1,3,10,14,90.0,2.0,Andrew_Robertson_181,0,181


## 1.5 Player positions
1 = GK 2 = DEF 3 = MID 4 = FWD

In [19]:
player_positions = pd.read_csv(this_season_folder + players_raw, usecols = ['id','element_type'])
player_positions = player_positions.rename(columns = {'id':'player_id', 'element_type' :'position'})                                                                        
player_fixtures_df = pd.merge(player_fixtures_df,player_positions, how = 'left')

## 1.6 Tidy

In [21]:
# player_fixtures_df = player_fixtures_df[['player_id','name','position','forecast','Fixture_date', 'GW'
#                          ,'Team','Team_id','Opponent', 'Opponent_id','Home?','minutes'
#                          ,'total_points']]
player_fixtures_df.head()

Unnamed: 0,Team,GW,Fixture_date,Opponent,Home?,Goals,opp_goals,points_marg,Team_id,Opponent_id,minutes,total_points,name,forecast,player_id,position
0,Liverpool,1,2019-08-09,Norwich,1,4,1,3,10,14,0.0,0.0,Adam_Lallana_195,0,195,3
1,Liverpool,1,2019-08-09,Norwich,1,4,1,3,10,14,51.0,1.0,Adrián_San Miguel del Castillo_526,0,526,1
2,Liverpool,1,2019-08-09,Norwich,1,4,1,3,10,14,0.0,0.0,Alex_Oxlade-Chamberlain_193,0,193,3
3,Liverpool,1,2019-08-09,Norwich,1,4,1,3,10,14,38.0,1.0,Alisson_Ramses Becker_189,0,189,1
4,Liverpool,1,2019-08-09,Norwich,1,4,1,3,10,14,90.0,2.0,Andrew_Robertson_181,0,181,2


# 2. Import features 
# 2.1 Data from last week
- Points
- Goals
- Minutes played 
- Bonus points 
- Red/yellow cards 
- Own goals
- ICT
- Selected by
- Cost

To be added later: (?)
- Saves (GK only)
- Goals conceded (GK, Def) 
- Clean sheets (GK, Def, MF)


In [None]:
LW_df = merged_gw_cols.copy()

In [None]:
LW_df['LW_total_points'] = merged_gw_cols.groupby('name')['total_points'].shift(periods = 1 )
LW_df['LW_goals_scored'] = merged_gw_cols.groupby('name')['goals_scored'].shift(periods = 1 )
LW_df['LW_minutes_played'] = merged_gw_cols.groupby('name')['minutes'].shift(periods = 1 )
LW_df['LW_bonus_points'] = merged_gw_cols.groupby('name')['bonus'].shift(periods = 1 )
LW_df['LW_yellow_cards'] = merged_gw_cols.groupby('name')['yellow_cards'].shift(periods = 1 )
LW_df['LW_red_cards'] = merged_gw_cols.groupby('name')['red_cards'].shift(periods = 1 )
LW_df['LW_own_goals'] = merged_gw_cols.groupby('name')['own_goals'].shift(periods = 1 )

LW_df['LW_creativity'] = merged_gw_cols.groupby('name')['creativity'].shift(periods = 1 )
LW_df['LW_influence'] = merged_gw_cols.groupby('name')['influence'].shift(periods = 1 )
LW_df['LW_threat'] = merged_gw_cols.groupby('name')['threat'].shift(periods = 1 )

LW_df['LW_GK_saves'] = merged_gw_cols.groupby('name')['saves'].shift(periods = 1 )
LW_df['LW_GK_DEF_goals_conceded'] = merged_gw_cols.groupby('name')['goals_conceded'].shift(periods = 1 )
LW_df['LW_GK_DEF_MID_clean_sheets'] = merged_gw_cols.groupby('name')['clean_sheets'].shift(periods = 1 )

LW_df['LW_selected_by'] = merged_gw_cols.groupby('name')['selected'].shift(periods = 1 )
LW_df['LW_cost'] = merged_gw_cols.groupby('name')['value'].shift(periods = 1 )                                                             

In [None]:
LW_columns_to_use = LW_df.columns[pd.Series(LW_df.columns).str.startswith('LW')]
LW_columns_to_use = pd.Index(['name','Fixture_date']).append(LW_columns_to_use)

LW_df = LW_df[LW_columns_to_use]
LW_df.sort_values(['name','Fixture_date']).head()

## 2.2. Last 4 week average 

### Player's recent performance (L4W = last 4 weeks)
Average and stdev of 
- Points
- Goals
- Minutes played 
- Bonus points 
- Red/yellow cards 
- Own goals
- ICT
- Selected by
- Cost

To be added later: (?)
- Saves (GK only)
- Goals conceded (GK, Def) 
- Clean sheets (GK, Def, MF)

In [None]:
L4W_df = LW_df.copy()
L4W_df = L4W_df.sort_values(['name','Fixture_date'])

In [None]:
L4W_df['L4W_avg_total_points'] = L4W_df.groupby('name')['LW_total_points'].rolling(4, min_periods = 1).mean().reset_index(level=['name'], drop = True)
L4W_df['L4W_stdev_total_points'] = L4W_df.groupby('name')['LW_total_points'].rolling(4, min_periods = 1).std().reset_index(level=['name'], drop = True)

L4W_df['L4W_avg_goals_scored'] = L4W_df.groupby('name')['LW_goals_scored'].rolling(4, min_periods = 1).mean().reset_index(level=['name'], drop = True)
L4W_df['L4W_stdev_goals_scored'] = L4W_df.groupby('name')['LW_goals_scored'].rolling(4, min_periods = 1).std().reset_index(level=['name'], drop = True)

L4W_df['L4W_avg_minutes_played'] = L4W_df.groupby('name')['LW_minutes_played'].rolling(4, min_periods = 1).mean().reset_index(level=['name'], drop = True)
L4W_df['L4W_stdev_minutes_played'] = L4W_df.groupby('name')['LW_minutes_played'].rolling(4, min_periods = 1).std().reset_index(level=['name'], drop = True)

L4W_df['L4W_avg_bonus_points'] = L4W_df.groupby('name')['LW_bonus_points'].rolling(4, min_periods = 1).mean().reset_index(level=['name'], drop = True)
L4W_df['L4W_stdev_bonus_points'] = L4W_df.groupby('name')['LW_bonus_points'].rolling(4, min_periods = 1).std().reset_index(level=['name'], drop = True)

L4W_df['L4W_avg_yellow_cards'] = L4W_df.groupby('name')['LW_yellow_cards'].rolling(4, min_periods = 1).mean().reset_index(level=['name'], drop = True)
L4W_df['L4W_stdev_yellow_cards'] = L4W_df.groupby('name')['LW_yellow_cards'].rolling(4, min_periods = 1).std().reset_index(level=['name'], drop = True)

L4W_df['L4W_avg_red_cards'] = L4W_df.groupby('name')['LW_red_cards'].rolling(4, min_periods = 1).mean().reset_index(level=['name'], drop = True)
L4W_df['L4W_stdev_red_cards'] = L4W_df.groupby('name')['LW_red_cards'].rolling(4, min_periods = 1).std().reset_index(level=['name'], drop = True)

L4W_df['L4W_avg_own_goals'] = L4W_df.groupby('name')['LW_own_goals'].rolling(4, min_periods = 1).mean().reset_index(level=['name'], drop = True)
L4W_df['L4W_stdev_own_goals'] = L4W_df.groupby('name')['LW_own_goals'].rolling(4, min_periods = 1).std().reset_index(level=['name'], drop = True)

L4W_df['L4W_avg_creativity'] = L4W_df.groupby('name')['LW_creativity'].rolling(4, min_periods = 1).mean().reset_index(level=['name'], drop = True)
L4W_df['L4W_stdev_creativity'] = L4W_df.groupby('name')['LW_creativity'].rolling(4, min_periods = 1).std().reset_index(level=['name'], drop = True)

L4W_df['L4W_avg_influence'] = L4W_df.groupby('name')['LW_influence'].rolling(4, min_periods = 1).mean().reset_index(level=['name'], drop = True)
L4W_df['L4W_stdev_influence'] = L4W_df.groupby('name')['LW_influence'].rolling(4, min_periods = 1).std().reset_index(level=['name'], drop = True)

L4W_df['L4W_avg_threat'] = L4W_df.groupby('name')['LW_threat'].rolling(4, min_periods = 1).mean().reset_index(level=['name'], drop = True)
L4W_df['L4W_stdev_threat'] = L4W_df.groupby('name')['LW_threat'].rolling(4, min_periods = 1).std().reset_index(level=['name'], drop = True)


L4W_df['L4W_avg_GK_saves'] = L4W_df.groupby('name')['LW_GK_saves'].rolling(4, min_periods = 1).mean().reset_index(level=['name'], drop = True)
L4W_df['L4W_stdev_GK_saves'] = L4W_df.groupby('name')['LW_GK_saves'].rolling(4, min_periods = 1).std().reset_index(level=['name'], drop = True)

L4W_df['L4W_avg_GK_DEF_goals_concedede'] = L4W_df.groupby('name')['LW_GK_DEF_goals_conceded'].rolling(4, min_periods = 1).mean().reset_index(level=['name'], drop = True)
L4W_df['L4W_stdev_GK_DEF_goals_conceded'] = L4W_df.groupby('name')['LW_GK_DEF_goals_conceded'].rolling(4, min_periods = 1).std().reset_index(level=['name'], drop = True)

L4W_df['L4W_avg_GK_DEF_MID_clean_sheets'] = L4W_df.groupby('name')['LW_GK_DEF_MID_clean_sheets'].rolling(4, min_periods = 1).mean().reset_index(level=['name'], drop = True)
L4W_df['L4W_stdev_GK_DEF_MID_clean_sheets'] = L4W_df.groupby('name')['LW_GK_DEF_MID_clean_sheets'].rolling(4, min_periods = 1).std().reset_index(level=['name'], drop = True)


L4W_df['L4W_avg_selected_by'] = L4W_df.groupby('name')['LW_selected_by'].rolling(4, min_periods = 1).mean().reset_index(level=['name'], drop = True)
L4W_df['L4W_stdev_selected_by'] = L4W_df.groupby('name')['LW_selected_by'].rolling(4, min_periods = 1).std().reset_index(level=['name'], drop = True)

L4W_df['L4W_avg_cost'] = L4W_df.groupby('name')['LW_cost'].rolling(4, min_periods = 1).mean().reset_index(level=['name'], drop = True)
L4W_df['L4W_stdev_cost'] = L4W_df.groupby('name')['LW_cost'].rolling(4, min_periods = 1).std().reset_index(level=['name'], drop = True)


In [None]:
L4W_columns_to_use = L4W_df.columns[pd.Series(L4W_df.columns).str.startswith('L4W')]
L4W_columns_to_use = pd.Index(['name','Fixture_date']).append(L4W_columns_to_use)

L4W_df = L4W_df[L4W_columns_to_use]
L4W_df.sort_values(['name','Fixture_date']).head(5)

## 2.3. This season (TSS)

Average and stdev of 
- Points
- Goals
- Minutes played 
- Bonus points 
- Red/yellow cards 
- Own goals
- ICT
- Selected by
- Cost

To be added later: (?)
- Saves (GK only)
- Goals conceded (GK, Def) 
- Clean sheets (GK, Def, MF)

In [None]:
TSS_df = LW_df.copy().sort_values(['name','Fixture_date'])

In [None]:
TSS_df['TSS_avg_total_points'] = TSS_df.groupby('name')['LW_total_points'].rolling(38, min_periods = 1).mean().reset_index(level=['name'], drop = True)
TSS_df['TSS_stdev_total_points'] = TSS_df.groupby('name')['LW_total_points'].rolling(38, min_periods = 1).std().reset_index(level=['name'], drop = True)

TSS_df['TSS_avg_goals_scored'] = TSS_df.groupby('name')['LW_goals_scored'].rolling(38, min_periods = 1).mean().reset_index(level=['name'], drop = True)
TSS_df['TSS_stdev_goals_scored'] = TSS_df.groupby('name')['LW_goals_scored'].rolling(38, min_periods = 1).std().reset_index(level=['name'], drop = True)

TSS_df['TSS_avg_minutes_played'] = TSS_df.groupby('name')['LW_minutes_played'].rolling(38, min_periods = 1).mean().reset_index(level=['name'], drop = True)
TSS_df['TSS_stdev_minutes_played'] = TSS_df.groupby('name')['LW_minutes_played'].rolling(38, min_periods = 1).std().reset_index(level=['name'], drop = True)

TSS_df['TSS_avg_bonus_points'] = TSS_df.groupby('name')['LW_bonus_points'].rolling(38, min_periods = 1).mean().reset_index(level=['name'], drop = True)
TSS_df['TSS_stdev_bonus_points'] = TSS_df.groupby('name')['LW_bonus_points'].rolling(38, min_periods = 1).std().reset_index(level=['name'], drop = True)

TSS_df['TSS_avg_yellow_cards'] = TSS_df.groupby('name')['LW_yellow_cards'].rolling(38, min_periods = 1).mean().reset_index(level=['name'], drop = True)
TSS_df['TSS_stdev_yellow_cards'] = TSS_df.groupby('name')['LW_yellow_cards'].rolling(38, min_periods = 1).std().reset_index(level=['name'], drop = True)

TSS_df['TSS_avg_red_cards'] = TSS_df.groupby('name')['LW_red_cards'].rolling(38, min_periods = 1).mean().reset_index(level=['name'], drop = True)
TSS_df['TSS_stdev_red_cards'] = TSS_df.groupby('name')['LW_red_cards'].rolling(38, min_periods = 1).std().reset_index(level=['name'], drop = True)

TSS_df['TSS_avg_own_goals'] = TSS_df.groupby('name')['LW_own_goals'].rolling(38, min_periods = 1).mean().reset_index(level=['name'], drop = True)
TSS_df['TSS_stdev_own_goals'] = TSS_df.groupby('name')['LW_own_goals'].rolling(38, min_periods = 1).std().reset_index(level=['name'], drop = True)


TSS_df['TSS_avg_creativity'] = TSS_df.groupby('name')['LW_creativity'].rolling(38, min_periods = 1).mean().reset_index(level=['name'], drop = True)
TSS_df['TSS_stdev_creativity'] = TSS_df.groupby('name')['LW_creativity'].rolling(38, min_periods = 1).std().reset_index(level=['name'], drop = True)

TSS_df['TSS_avg_influence'] = TSS_df.groupby('name')['LW_influence'].rolling(38, min_periods = 1).mean().reset_index(level=['name'], drop = True)
TSS_df['TSS_stdev_influence'] = TSS_df.groupby('name')['LW_influence'].rolling(38, min_periods = 1).std().reset_index(level=['name'], drop = True)

TSS_df['TSS_avg_threat'] = TSS_df.groupby('name')['LW_threat'].rolling(38, min_periods = 1).mean().reset_index(level=['name'], drop = True)
TSS_df['TSS_stdev_threat'] = TSS_df.groupby('name')['LW_threat'].rolling(38, min_periods = 1).std().reset_index(level=['name'], drop = True)


TSS_df['TSS_avg_GK_saves'] = TSS_df.groupby('name')['LW_GK_saves'].rolling(38, min_periods = 1).mean().reset_index(level=['name'], drop = True)
TSS_df['TSS_stdev_GK_saves'] = TSS_df.groupby('name')['LW_GK_saves'].rolling(38, min_periods = 1).std().reset_index(level=['name'], drop = True)

TSS_df['TSS_avg_GK_DEF_goals_concedede'] = TSS_df.groupby('name')['LW_GK_DEF_goals_conceded'].rolling(38, min_periods = 1).mean().reset_index(level=['name'], drop = True)
TSS_df['TSS_stdev_GK_DEF_goals_conceded'] = TSS_df.groupby('name')['LW_GK_DEF_goals_conceded'].rolling(38, min_periods = 1).std().reset_index(level=['name'], drop = True)

TSS_df['TSS_avg_GK_DEF_MID_clean_sheets'] = TSS_df.groupby('name')['LW_GK_DEF_MID_clean_sheets'].rolling(38, min_periods = 1).mean().reset_index(level=['name'], drop = True)
TSS_df['TSS_stdev_GK_DEF_MID_clean_sheets'] = TSS_df.groupby('name')['LW_GK_DEF_MID_clean_sheets'].rolling(38, min_periods = 1).std().reset_index(level=['name'], drop = True)


TSS_df['TSS_avg_selected_by'] = TSS_df.groupby('name')['LW_selected_by'].rolling(38, min_periods = 1).mean().reset_index(level=['name'], drop = True)
TSS_df['TSS_stdev_selected_by'] = TSS_df.groupby('name')['LW_selected_by'].rolling(38, min_periods = 1).std().reset_index(level=['name'], drop = True)

TSS_df['TSS_avg_cost'] = TSS_df.groupby('name')['LW_cost'].rolling(38, min_periods = 1).mean().reset_index(level=['name'], drop = True)
TSS_df['TSS_stdev_cost'] = TSS_df.groupby('name')['LW_cost'].rolling(38, min_periods = 1).std().reset_index(level=['name'], drop = True)

In [None]:
TSS_columns_to_use = TSS_df.columns[pd.Series(TSS_df.columns).str.startswith('TSS')]
TSS_columns_to_use = pd.Index(['name','Fixture_date']).append(TSS_columns_to_use)

TSS_df = TSS_df[TSS_columns_to_use]
TSS_df.sort_values(['name','Fixture_date']).head()

## 2.4. Last seasons data LSS

In [None]:
LSS_df = pd.read_csv(last_season_folder + "gws/merged_gw.csv", encoding = "ISO-8859-1")

In [None]:
LSS_maths = LSS_df.groupby('name').mean()
# LSS_maths.columns = LSS_maths.columns.to_flat_index()
LSS_maths = LSS_maths.reset_index()
LSS_maths['name'] = LSS_maths['name'].str.replace('\d+', '')
LSS_maths

In [None]:
LSS_df = LSS_maths[['name','total_points', 'goals_scored','minutes','bonus', 'yellow_cards','red_cards'
                   ,'own_goals','creativity','influence','threat'
                   ,'saves', 'goals_conceded','clean_sheets','selected','value']]

LSS_df.columns =['name_sub_id', 'LSS_avg_total_points', 
       'LSS_avg_goals_scored', 'LSS_avg_minutes_played',
        'LSS_avg_bonus_points',
       'LSS_avg_yellow_cards', 'LSS_avg_red_cards',
        'LSS_avg_own_goals',  'LSS_avg_creativity',
        'LSS_avg_influence',  'LSS_avg_threat',
        'LSS_avg_GK_saves','LSS_avg_GK_DEF_goals_conceded', 'LSS_avg_GK_DEF_MID_clean_sheets',
        'LSS_avg_selected_by', 'LSS_avg_cost']

In [None]:
LSS_df.head()

## 2.5. Team strength

In [None]:
pd.read_csv(table_link)

# 3. Merge data

In [None]:
# Last week LW
dataset_df = pd.merge(player_fixtures_df,LW_df, how = 'left').sort_values(['name','Fixture_date'])

# Last 4 weeks L4W
dataset_df = pd.merge(dataset_df, L4W_df, how = 'left').sort_values(['name','Fixture_date'])

# This season to date TSS
dataset_df = pd.merge(dataset_df, TSS_df, how = 'left').sort_values(['name','Fixture_date'])

# Last season LSS
dataset_df['name_sub_id'] = dataset_df['name'].str.replace('\d+', '')
dataset_df = pd.merge(left = dataset_df, right = LSS_df, how = 'left', left_on = 'name_sub_id', right_on = 'name_sub_id')#.sort_values(['name','Fixture_date'])

for i in dataset_df.columns:
    print(i)
dataset_df.head(50)
