# Add new features with aggregated historical data for each sample in merged seasons data

In [40]:
from src.features.data_engineering import get_merged_seasons_data

## Load merged seasons dataframe

In [41]:
data = get_merged_seasons_data()

In [42]:
data.head()

Unnamed: 0,name,assists,bonus,bps,clean_sheets,creativity,element,fixture,goals_conceded,goals_scored,...,total_points,transfers_balance,transfers_in,transfers_out,value,was_home,yellow_cards,GW,position,season
0,Leonardo_Ulloa,0,0,2,0,0.1,185,4,0,0,...,1,0,0,0,60,False,0,1,FWD,2016-17
1,Christian_Fuchs,0,0,17,0,13.8,168,4,2,0,...,0,0,0,0,55,False,1,1,DEF,2016-17
2,Ron-Robert_Zieler,0,0,0,0,0.0,166,4,0,0,...,0,0,0,0,45,False,0,1,GK,2016-17
3,Moses_Odubajo,0,0,0,0,0.0,151,4,0,0,...,0,0,0,0,45,True,0,1,DEF,2016-17
4,Robert_Snodgrass,0,3,40,0,37.7,155,4,1,1,...,10,0,0,0,55,True,0,1,MID,2016-17


## For now, work only with single season data

In [43]:
data19 = data[data['season'] == '2019-20']

### And start with only one player, to make it simpler to test

In [44]:
data_auba = data19[data19['name'] == 'Pierre-Emerick_Aubameyang_11']

In [45]:
data_auba.insert(4, 'bps-3', data_auba.groupby('element', as_index=False)['bps'].rolling(3, min_periods=1).mean()['bps'])

In [46]:
data_auba.head(15)

Unnamed: 0,name,assists,bonus,bps,bps-3,clean_sheets,creativity,element,fixture,goals_conceded,...,total_points,transfers_balance,transfers_in,transfers_out,value,was_home,yellow_cards,GW,position,season
68333,Pierre-Emerick_Aubameyang_11,0,0,25,25.0,1,6.5,11,10,0,...,6,0,0,0,110,False,0,1,FWD,2019-20
68465,Pierre-Emerick_Aubameyang_11,0,1,25,25.0,0,18.9,11,11,1,...,6,113772,181990,68218,110,True,1,2,FWD,2019-20
69329,Pierre-Emerick_Aubameyang_11,1,0,17,22.333333,0,25.3,11,24,3,...,5,-248663,52971,301634,110,False,0,3,FWD,2019-20
70012,Pierre-Emerick_Aubameyang_11,0,3,35,25.666667,0,26.6,11,31,2,...,9,-109901,59121,169022,110,True,0,4,FWD,2019-20
70528,Pierre-Emerick_Aubameyang_11,0,3,54,35.333333,0,2.2,11,49,2,...,13,-59610,66384,125994,110,False,0,5,FWD,2019-20
71072,Pierre-Emerick_Aubameyang_11,0,0,26,38.333333,0,1.6,11,51,2,...,5,117466,158891,41425,110,True,1,6,FWD,2019-20
71667,Pierre-Emerick_Aubameyang_11,0,3,33,37.666667,0,25.7,11,67,1,...,9,-30817,72845,103662,110,False,0,7,FWD,2019-20
72049,Pierre-Emerick_Aubameyang_11,0,0,0,19.666667,1,12.8,11,71,0,...,2,201560,219717,18157,110,True,0,8,FWD,2019-20
72770,Pierre-Emerick_Aubameyang_11,0,0,7,13.333333,0,4.7,11,88,1,...,2,133927,185133,51206,111,False,0,9,FWD,2019-20
73192,Pierre-Emerick_Aubameyang_11,0,0,5,4.0,0,12.4,11,91,2,...,2,52748,105976,53228,111,True,0,10,FWD,2019-20


In [47]:
def update_team_score_feature(df):
    """
    Create feature 'player_team_score' - team_h_score if was_home, team_a_score otherwise and 'opponent_team_score' likewise
    """
    player_team_score = df.apply(lambda row: row['team_h_score'] if row['was_home'] else row['team_a_score'], axis=1)
    opponent_team_score = df.apply(lambda row: row['team_a_score'] if row['was_home'] else row['team_h_score'], axis=1)

    df.insert(list(df.columns).index('team_a_score'), 'player_team_score', player_team_score)
    df.insert(list(df.columns).index('team_h_score'), 'opponent_team_score', opponent_team_score)
    df.drop(['team_h_score', 'team_a_score'], axis=1, inplace=True)

    return df

In [48]:
def create_rolling_features(df, rolling_columns, times):
    for t in times:
        t_str = '-all' if t == 'all' else '-' + str(t)
        t = df.groupby(['season', 'element'], as_index=False).size()['size'][0] if t == 'all' else t
        for col in rolling_columns:
            insert_loc = list(df.columns).index(col) + 1
            df.insert(insert_loc, col + t_str, df.groupby(['season', 'element'], as_index=False)[col].rolling(t, min_periods=1).mean()[col])
    return df

In [49]:
rolling_columns = ['assists', 'bonus', 'bps', 'clean_sheets',
                   'creativity', 'goals_conceded', 'goals_scored',
                   'ict_index', 'influence', 'minutes',
                   'own_goals', 'penalties_missed', 'penalties_saved', 'red_cards',
                   'saves', 'selected', 'player_team_score', 'opponent_team_score', 'threat',
                   'total_points', 'transfers_in', 'transfers_out',
                   'value', 'yellow_cards']

In [50]:
times = ['all', 6, 3]

In [51]:
data_updated = update_team_score_feature(data.copy())
data_rolling = create_rolling_features(data_updated, rolling_columns, times)

In [58]:
list(data_rolling.columns)

['name',
 'assists',
 'assists-3',
 'assists-6',
 'assists-all',
 'bonus',
 'bonus-3',
 'bonus-6',
 'bonus-all',
 'bps',
 'bps-3',
 'bps-6',
 'bps-all',
 'clean_sheets',
 'clean_sheets-3',
 'clean_sheets-6',
 'clean_sheets-all',
 'creativity',
 'creativity-3',
 'creativity-6',
 'creativity-all',
 'element',
 'fixture',
 'goals_conceded',
 'goals_conceded-3',
 'goals_conceded-6',
 'goals_conceded-all',
 'goals_scored',
 'goals_scored-3',
 'goals_scored-6',
 'goals_scored-all',
 'ict_index',
 'ict_index-3',
 'ict_index-6',
 'ict_index-all',
 'influence',
 'influence-3',
 'influence-6',
 'influence-all',
 'kickoff_time',
 'minutes',
 'minutes-3',
 'minutes-6',
 'minutes-all',
 'opponent_team',
 'own_goals',
 'own_goals-3',
 'own_goals-6',
 'own_goals-all',
 'penalties_missed',
 'penalties_missed-3',
 'penalties_missed-6',
 'penalties_missed-all',
 'penalties_saved',
 'penalties_saved-3',
 'penalties_saved-6',
 'penalties_saved-all',
 'red_cards',
 'red_cards-3',
 'red_cards-6',
 'red_card