In [92]:
import pandas as pd
import numpy as np
import json
import os
from tqdm import tqdm
import time
import re
from IPython.display import display
from pathlib import Path 
DATAPATH = Path(r'Data')

In [93]:
def remove_tags(string):
    """
    Clean text from html tag using regex
    """
    result = re.sub('<.*?>','',string)
    return result

In [94]:
def new_date_features(df) :
    
    df['date'] = pd.to_datetime(df['date'])
    df['year'] = df.date.dt.year 
    df['quarter'] = df.date.dt.quarter
    df['month'] = df.date.dt.month  
    df['week'] = df.date.dt.week 
    df['day'] = df.date.dt.day  
    df['weekday'] = df.date.dt.weekday
    
    df['is_monday'] = np.where((df['weekday'] == 0), 1, 0)
    df['is_tuesday'] = np.where((df['weekday'] == 1), 1, 0)
    df['is_wednesday'] = np.where((df['weekday'] == 2), 1, 0)
    df['is_thursday'] = np.where((df['weekday'] == 3), 1, 0)
    df['is_friday'] = np.where((df['weekday'] == 4), 1, 0)
    df['is_saturday'] = np.where((df['weekday'] == 5), 1, 0)
    df['is_sunday'] = np.where((df['weekday'] == 6), 1, 0)
    
    return df

In [95]:
def new_features(df) :
    
    # remove html tags > normalize between 0-1
    df["possession_home"] = df["possession_home"].apply(lambda x : int(remove_tags(str(x)).strip("%"))/100 ) 
    df["possession_away"] = df["possession_away"].apply(lambda x : int(remove_tags(str(x)).strip("%"))/100 )
    # --
    df["shot_on_target_home_raw"] = df["shot_on_target_home"].apply(lambda x : remove_tags(str(x))) 
    df["shot_on_target_away_raw"] = df["shot_on_target_away"].apply(lambda x : remove_tags(str(x)))
    
    # regex values : [shot on target, total shot, prc of shot on target] or [prc of shot on target, total shot, shot on target]
    
    df["shot_on_target_home"] = df["shot_on_target_home_raw"].apply(lambda x : int(re.findall(r"\d+",
                                                                                          re.sub('\d+%',' ',x))[0]))
    df["shot_total_home"] = df["shot_on_target_home_raw"].apply(lambda x : int(re.findall(r"\d+",
                                                                                      re.sub('\d+%',' ',x))[1]))
    df["shot_on_target_away"] = df["shot_on_target_away_raw"].apply(lambda x : int(re.findall(r"\d+",
                                                                                          re.sub('\d+%',' ',x))[0]))
    df["shot_total_away"] = df["shot_on_target_away_raw"].apply(lambda x : int(re.findall(r"\d+",
                                                                                      re.sub('\d+%',' ',x))[1]))
    # --
    df["saves_home_raw"] = df["saves_home"].apply(lambda x : remove_tags(str(x))) 
    df["saves_away_raw"] = df["saves_away"].apply(lambda x : remove_tags(str(x)))
    
    df["saves_home"] = df["saves_home_raw"].apply(lambda x : int(re.findall(r"\d+",
                                                                                          re.sub('\d+%',' ',x))[0]))
    df["saves_total_home"] = df["saves_home_raw"].apply(lambda x : int(re.findall(r"\d+",
                                                                                      re.sub('\d+%',' ',x))[1]))
    df["saves_away"] = df["saves_away_raw"].apply(lambda x : int(re.findall(r"\d+",
                                                                                          re.sub('\d+%',' ',x))[0]))
    df["saves_total_away"] = df["saves_away_raw"].apply(lambda x : int(re.findall(r"\d+",
                                                                                      re.sub('\d+%',' ',x))[1]))
    df["goal_home"] = df["score"].apply(lambda x : int(re.split(r"–|-",x)[0]))
    df["goal_away"] = df["score"].apply(lambda x : int(re.split(r"–|-",x)[1]))
    
    # Victory : 3 pts, Draw : 1 pts and Loose : 0 pts
    df["points_result_home"] = np.where(eda_df['goal_home'] > eda_df['goal_away'] , 3,
                                        np.where(eda_df['goal_home'] == eda_df['goal_away'], 1, 0))
    
    df["points_result_away"] = np.where(eda_df['goal_away'] > eda_df['goal_home'] , 3,
                                        np.where(eda_df['goal_away'] == eda_df['goal_home'], 1, 0))
    
    df = df.drop(df.filter(regex='_raw').columns, axis=1)
    return df

In [96]:
def new_cumul_sum_features(df) :
    """
    This are only cumsum features to keep track of features week by week
    TODO : We need to supress thoses column because of data leakage. 
    Solution : remove last row to only keep the cumulative sum before the last row. but if no value (first row) replace by np.NaN
    """
    
    features = ['points_result', 'shot_on_target', 'saves', 'shot_total',
       'saves_total', 'goal', 'points_result_against',
       'shot_on_target_against', 'saves_against', 'shot_total_against',
       'saves_total_against', 'goal_against',]
    
    for feature in features :
        col = {f"cumul_{feature}" :
                           df.groupby(
                            ["season", "team"], sort = False)[f'{feature}'].agg('cumsum')}
        df = df.assign(**col)
        
    df["goal_difference"] = df["cumul_goal"] - df["cumul_goal_against"]
                               
    return df

In [97]:
def new_cumul_average_features(df) :
    
    # Average cumul of point
    features = ['points_result', 'shot_on_target', 'saves', 'shot_total',
       'saves_total', 'goal', 'points_result_against',
       'shot_on_target_against', 'saves_against', 'shot_total_against',
       'saves_total_against', 'goal_against',]
    for feature in features :
    
        col = {f"cumul_average_{feature}" :
                       df.groupby(
                        ["season", "team"], sort = False)[f'{feature}'].transform(
                        lambda x: x.expanding().mean())}

        df = df.assign(**col)
    
    
    
    return df 

### Moving average features

points_result, goal, goal_against, goal-goal_against

In [98]:
def new_moving_average_features(df) :
    
    # moving average for
    features = ['points_result', 'shot_on_target', 'saves', 'shot_total',
       'saves_total', 'goal', 'points_result_against',
       'shot_on_target_against', 'saves_against', 'shot_total_against',
       'saves_total_against', 'goal_against',]
    
    for feature in features :
        for i in [1,2,3,4,5,6] :
            # create a column moving_average_1_point_result
            #  closed = "right" mean that we 
            col = {f"moving_average_{i}_{feature}" :
                   df.groupby(
                    ["season", "team"], sort = False)[f'{feature}'].transform(
                    lambda x: x.rolling(i, closed = 'left').mean())}
            df = df.assign(**col)
    
    return df 

### Fatigue features

In [99]:
def new_fatigues_features(df) :
    
    # moving average for
    for i in [2,3,4,5,6] :
        df["date_timestamp"] = df.groupby(["season", "team"], sort = False)['date'].transform(
            lambda y: y.rolling(i))
        
        col = {f"date_timestamp" :
               df.groupby(["season", "team"], sort = False)['date'].transform(
                   lambda y: y.rolling(i))}
        
        df = df.assign(**col)
        
        df[f"fatigue_{i-1}_match"] = df[f"date_timestamp"].apply(lambda x : (x.iloc[-1] - x.iloc[0]) if len(x)>=i else np.nan)
        df[f"fatigue_{i-1}_match"] = df[f"fatigue_{i-1}_match"].apply(lambda x : abs(x.days) if x != np.nan else np.nan)
        
    df = df.drop("date_timestamp", axis = 1)
        
    return df
        

# Production

In [100]:
filepath = 'Ligue-1-2015-2022.csv'

In [101]:
df_raw = pd.read_csv(DATAPATH /filepath)

In [102]:
features = ['gameweek', 'dayofweek', 'date', 'start_time', 'home_team',
 'score', 'away_team',
 'season', 'possession_home', 'possession_away',
 'shot_on_target_home', 'shot_on_target_away', 'saves_home',
 'saves_away'
           ]

In [103]:
eda_df = df_raw
eda_df = eda_df[features].dropna(axis = 0)
eda_df["SEASON_ID"] = eda_df['season'].astype('category').cat.codes
eda_df = new_date_features(eda_df)
eda_df = new_features(eda_df)

  df['week'] = df.date.dt.week


In [104]:
#SEASON = "2015-2016"
#df_unique = eda_df.query(f"(season == '{SEASON}')").drop_duplicates()
df_unique = eda_df.drop_duplicates()

In [105]:
features_home = ["gameweek", "season",
                 "date", "start_time", "home_team",
                 "points_result_home", "shot_on_target_home",
                "saves_home", "shot_total_home", "saves_total_home",
                "goal_home",
                "points_result_away", "shot_on_target_away", 
                "saves_away", "shot_total_away", "saves_total_away",
                "goal_away"]

features_away = ["gameweek", "season", 
                 "date", "start_time","away_team",
                 "points_result_away", "shot_on_target_away", 
                "saves_away", "shot_total_away", "saves_total_away",
                "goal_away",
                "points_result_home", "shot_on_target_home",
                "saves_home", "shot_total_home", "saves_total_home",
                "goal_home",]

In [106]:
# We split home and away match for each team and concatenate after
home_df = df_unique[features_home].groupby(features_home).first().reset_index()
home_df.rename(columns=lambda x: x.replace("_home",'') if "_home" in x else(x.replace("home_",'') if "home_" in x else x), inplace = True)
home_df.rename(columns=lambda x: x.replace("_away",'_against') if "_away" in x else(x.replace("away_",'against_') if "away_" in x else x), inplace = True)

away_df = df_unique[features_away].groupby(features_away).first().reset_index()
away_df.rename(columns=lambda x: x.replace("_away",'') if "_away" in x else(x.replace("away_",'') if "away_" in x else x), inplace = True)
away_df.rename(columns=lambda x: x.replace("_home",'_against') if "_home" in x else(x.replace("home_",'against_') if "home_" in x else x), inplace = True)

concat_team = pd.concat([home_df, away_df])
concat_team = concat_team.sort_values(by = ['season', "gameweek"])

In [107]:
league_table = new_cumul_sum_features(concat_team)
league_table = new_cumul_average_features(league_table)
league_table = new_moving_average_features(league_table)
league_table = new_fatigues_features(league_table)
league_table_test = league_table.copy()

In [108]:
league_table[league_table["team"] == "Paris S-G"].head()

Unnamed: 0,gameweek,season,date,start_time,team,points_result,shot_on_target,saves,shot_total,saves_total,...,moving_average_2_goal_against,moving_average_3_goal_against,moving_average_4_goal_against,moving_average_5_goal_against,moving_average_6_goal_against,fatigue_1_match,fatigue_2_match,fatigue_3_match,fatigue_4_match,fatigue_5_match
0,1.0,2015-2016,2015-08-07,20:30,Paris S-G,3,3,2,7,2,...,,,,,,,,,,
79,2.0,2015-2016,2015-08-16,21:00,Paris S-G,3,6,2,18,2,...,,,,,,9.0,,,,
140,3.0,2015-2016,2015-08-21,20:30,Paris S-G,3,5,3,9,3,...,0.0,,,,,5.0,14.0,,,
219,4.0,2015-2016,2015-08-30,21:00,Paris S-G,3,8,2,19,2,...,0.0,0.0,,,,9.0,14.0,23.0,,
280,5.0,2015-2016,2015-09-11,20:30,Paris S-G,1,5,1,19,3,...,0.0,0.0,0.0,,,12.0,21.0,26.0,35.0,


In [109]:
league_table.to_csv(DATAPATH / f"league_table_{filepath}",index=False)

## Update original CSV with transformed data

In [110]:
def new_unique_id(df) :
    
    # GAME_ID : YEAR_id, ex : 2022380 : last game of 2022
    df["dummy_id"] = 1
    df["id"] = df.groupby('season')["dummy_id"].agg('cumsum')
    df["GAME_ID"] = df.apply(lambda x : int(f"{x['year']}{x['id']:03d}"), axis = 1)
    
    df = df.drop(["dummy_id", "id"], axis = 1)
    
    return df
    

In [111]:
eda_df = new_unique_id(eda_df)
league_table["TEAM_ID"] = league_table['team'].astype('category').cat.codes

In [112]:
with pd.option_context('display.max_columns', None) :
    display(league_table[league_table['team']== 'Paris S-G'].head())

Unnamed: 0,gameweek,season,date,start_time,team,points_result,shot_on_target,saves,shot_total,saves_total,goal,points_result_against,shot_on_target_against,saves_against,shot_total_against,saves_total_against,goal_against,cumul_points_result,cumul_shot_on_target,cumul_saves,cumul_shot_total,cumul_saves_total,cumul_goal,cumul_points_result_against,cumul_shot_on_target_against,cumul_saves_against,cumul_shot_total_against,cumul_saves_total_against,cumul_goal_against,goal_difference,cumul_average_points_result,cumul_average_shot_on_target,cumul_average_saves,cumul_average_shot_total,cumul_average_saves_total,cumul_average_goal,cumul_average_points_result_against,cumul_average_shot_on_target_against,cumul_average_saves_against,cumul_average_shot_total_against,cumul_average_saves_total_against,cumul_average_goal_against,moving_average_1_points_result,moving_average_2_points_result,moving_average_3_points_result,moving_average_4_points_result,moving_average_5_points_result,moving_average_6_points_result,moving_average_1_shot_on_target,moving_average_2_shot_on_target,moving_average_3_shot_on_target,moving_average_4_shot_on_target,moving_average_5_shot_on_target,moving_average_6_shot_on_target,moving_average_1_saves,moving_average_2_saves,moving_average_3_saves,moving_average_4_saves,moving_average_5_saves,moving_average_6_saves,moving_average_1_shot_total,moving_average_2_shot_total,moving_average_3_shot_total,moving_average_4_shot_total,moving_average_5_shot_total,moving_average_6_shot_total,moving_average_1_saves_total,moving_average_2_saves_total,moving_average_3_saves_total,moving_average_4_saves_total,moving_average_5_saves_total,moving_average_6_saves_total,moving_average_1_goal,moving_average_2_goal,moving_average_3_goal,moving_average_4_goal,moving_average_5_goal,moving_average_6_goal,moving_average_1_points_result_against,moving_average_2_points_result_against,moving_average_3_points_result_against,moving_average_4_points_result_against,moving_average_5_points_result_against,moving_average_6_points_result_against,moving_average_1_shot_on_target_against,moving_average_2_shot_on_target_against,moving_average_3_shot_on_target_against,moving_average_4_shot_on_target_against,moving_average_5_shot_on_target_against,moving_average_6_shot_on_target_against,moving_average_1_saves_against,moving_average_2_saves_against,moving_average_3_saves_against,moving_average_4_saves_against,moving_average_5_saves_against,moving_average_6_saves_against,moving_average_1_shot_total_against,moving_average_2_shot_total_against,moving_average_3_shot_total_against,moving_average_4_shot_total_against,moving_average_5_shot_total_against,moving_average_6_shot_total_against,moving_average_1_saves_total_against,moving_average_2_saves_total_against,moving_average_3_saves_total_against,moving_average_4_saves_total_against,moving_average_5_saves_total_against,moving_average_6_saves_total_against,moving_average_1_goal_against,moving_average_2_goal_against,moving_average_3_goal_against,moving_average_4_goal_against,moving_average_5_goal_against,moving_average_6_goal_against,fatigue_1_match,fatigue_2_match,fatigue_3_match,fatigue_4_match,fatigue_5_match,TEAM_ID
0,1.0,2015-2016,2015-08-07,20:30,Paris S-G,3,3,2,7,2,1,0,2,1,12,3,0,3,3,2,7,2,1,0,2,1,12,3,0,1,3.0,3.0,2.0,7.0,2.0,1.0,0.0,2.0,1.0,12.0,3.0,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,22
79,2.0,2015-2016,2015-08-16,21:00,Paris S-G,3,6,2,18,2,2,0,2,4,8,6,0,6,9,4,25,4,3,0,4,5,20,9,0,3,3.0,4.5,2.0,12.5,2.0,1.5,0.0,2.0,2.5,10.0,4.5,0.0,3.0,,,,,,3.0,,,,,,2.0,,,,,,7.0,,,,,,2.0,,,,,,1.0,,,,,,0.0,,,,,,2.0,,,,,,1.0,,,,,,12.0,,,,,,3.0,,,,,,0.0,,,,,,9.0,,,,,22
140,3.0,2015-2016,2015-08-21,20:30,Paris S-G,3,5,3,9,3,1,0,3,4,10,5,0,9,14,7,34,7,4,0,7,9,30,14,0,4,3.0,4.666667,2.333333,11.333333,2.333333,1.333333,0.0,2.333333,3.0,10.0,4.666667,0.0,3.0,3.0,,,,,6.0,4.5,,,,,2.0,2.0,,,,,18.0,12.5,,,,,2.0,2.0,,,,,2.0,1.5,,,,,0.0,0.0,,,,,2.0,2.0,,,,,4.0,2.5,,,,,8.0,10.0,,,,,6.0,4.5,,,,,0.0,0.0,,,,,5.0,14.0,,,,22
219,4.0,2015-2016,2015-08-30,21:00,Paris S-G,3,8,2,19,2,3,0,2,5,3,8,0,12,22,9,53,9,7,0,9,14,33,22,0,7,3.0,5.5,2.25,13.25,2.25,1.75,0.0,2.25,3.5,8.25,5.5,0.0,3.0,3.0,3.0,,,,5.0,5.5,4.666667,,,,3.0,2.5,2.333333,,,,9.0,13.5,11.333333,,,,3.0,2.5,2.333333,,,,1.0,1.5,1.333333,,,,0.0,0.0,0.0,,,,3.0,2.5,2.333333,,,,4.0,4.0,3.0,,,,10.0,9.0,10.0,,,,5.0,5.5,4.666667,,,,0.0,0.0,0.0,,,,9.0,14.0,23.0,,,22
280,5.0,2015-2016,2015-09-11,20:30,Paris S-G,1,5,1,19,3,2,1,3,3,10,5,2,13,27,10,72,12,9,1,12,17,43,27,2,7,2.6,5.4,2.0,14.4,2.4,1.8,0.2,2.4,3.4,8.6,5.4,0.4,3.0,3.0,3.0,3.0,,,8.0,6.5,6.333333,5.5,,,2.0,2.5,2.333333,2.25,,,19.0,14.0,15.333333,13.25,,,2.0,2.5,2.333333,2.25,,,3.0,2.0,2.0,1.75,,,0.0,0.0,0.0,0.0,,,2.0,2.5,2.333333,2.25,,,5.0,4.5,4.333333,3.5,,,3.0,6.5,7.0,8.25,,,8.0,6.5,6.333333,5.5,,,0.0,0.0,0.0,0.0,,,12.0,21.0,26.0,35.0,,22


In [113]:
# We merge at first the features of the HOME teams with the suffix HOME
games = eda_df.merge(
    league_table.add_suffix('_HOME'), how = 'left', 
    left_on = ['gameweek', 'home_team', 'season'],
    right_on = ['gameweek_HOME', 'team_HOME', 'season_HOME'])

# Then we add the AWAY team features
games = games.merge(
    league_table.add_suffix('_AWAY'), how = 'left', 
    left_on = ['gameweek', 'away_team', 'season'],
    right_on = ['gameweek_AWAY', 'team_AWAY', 'season_AWAY'])

In [125]:
features_to_supress = ["season_HOME", "date_HOME", "start_time_HOME", "team_HOME",
                       "gameweek_AWAY", "season_AWAY", "date_AWAY", "start_time_AWAY", "team_AWAY"]
for f in features_to_supress :
    games = games.drop(f, axis = 1)

In [126]:
with pd.option_context('display.max_columns', None) :
    display(games[games["TEAM_ID_AWAY"] == 22].head(3))

Unnamed: 0,GAME_ID,TEAM_ID_HOME,TEAM_ID_AWAY,gameweek,season,possession_home,possession_away,shot_on_target_home,shot_on_target_away,saves_home,saves_away,year,quarter,month,week,day,weekday,is_monday,is_tuesday,is_wednesday,is_thursday,is_friday,is_saturday,is_sunday,shot_total_home,shot_total_away,saves_total_home,saves_total_away,goal_home,goal_away,points_result_home,points_result_away,dayofweek,date,start_time,home_team,score,away_team,SEASON_ID,gameweek_HOME,points_result_HOME,shot_on_target_HOME,saves_HOME,shot_total_HOME,saves_total_HOME,goal_HOME,points_result_against_HOME,shot_on_target_against_HOME,saves_against_HOME,shot_total_against_HOME,saves_total_against_HOME,goal_against_HOME,cumul_points_result_HOME,cumul_shot_on_target_HOME,cumul_saves_HOME,cumul_shot_total_HOME,cumul_saves_total_HOME,cumul_goal_HOME,cumul_points_result_against_HOME,cumul_shot_on_target_against_HOME,cumul_saves_against_HOME,cumul_shot_total_against_HOME,cumul_saves_total_against_HOME,cumul_goal_against_HOME,goal_difference_HOME,cumul_average_points_result_HOME,cumul_average_shot_on_target_HOME,cumul_average_saves_HOME,cumul_average_shot_total_HOME,cumul_average_saves_total_HOME,cumul_average_goal_HOME,cumul_average_points_result_against_HOME,cumul_average_shot_on_target_against_HOME,cumul_average_saves_against_HOME,cumul_average_shot_total_against_HOME,cumul_average_saves_total_against_HOME,cumul_average_goal_against_HOME,moving_average_1_points_result_HOME,moving_average_2_points_result_HOME,moving_average_3_points_result_HOME,moving_average_4_points_result_HOME,moving_average_5_points_result_HOME,moving_average_6_points_result_HOME,moving_average_1_shot_on_target_HOME,moving_average_2_shot_on_target_HOME,moving_average_3_shot_on_target_HOME,moving_average_4_shot_on_target_HOME,moving_average_5_shot_on_target_HOME,moving_average_6_shot_on_target_HOME,moving_average_1_saves_HOME,moving_average_2_saves_HOME,moving_average_3_saves_HOME,moving_average_4_saves_HOME,moving_average_5_saves_HOME,moving_average_6_saves_HOME,moving_average_1_shot_total_HOME,moving_average_2_shot_total_HOME,moving_average_3_shot_total_HOME,moving_average_4_shot_total_HOME,moving_average_5_shot_total_HOME,moving_average_6_shot_total_HOME,moving_average_1_saves_total_HOME,moving_average_2_saves_total_HOME,moving_average_3_saves_total_HOME,moving_average_4_saves_total_HOME,moving_average_5_saves_total_HOME,moving_average_6_saves_total_HOME,moving_average_1_goal_HOME,moving_average_2_goal_HOME,moving_average_3_goal_HOME,moving_average_4_goal_HOME,moving_average_5_goal_HOME,moving_average_6_goal_HOME,moving_average_1_points_result_against_HOME,moving_average_2_points_result_against_HOME,moving_average_3_points_result_against_HOME,moving_average_4_points_result_against_HOME,moving_average_5_points_result_against_HOME,moving_average_6_points_result_against_HOME,moving_average_1_shot_on_target_against_HOME,moving_average_2_shot_on_target_against_HOME,moving_average_3_shot_on_target_against_HOME,moving_average_4_shot_on_target_against_HOME,moving_average_5_shot_on_target_against_HOME,moving_average_6_shot_on_target_against_HOME,moving_average_1_saves_against_HOME,moving_average_2_saves_against_HOME,moving_average_3_saves_against_HOME,moving_average_4_saves_against_HOME,moving_average_5_saves_against_HOME,moving_average_6_saves_against_HOME,moving_average_1_shot_total_against_HOME,moving_average_2_shot_total_against_HOME,moving_average_3_shot_total_against_HOME,moving_average_4_shot_total_against_HOME,moving_average_5_shot_total_against_HOME,moving_average_6_shot_total_against_HOME,moving_average_1_saves_total_against_HOME,moving_average_2_saves_total_against_HOME,moving_average_3_saves_total_against_HOME,moving_average_4_saves_total_against_HOME,moving_average_5_saves_total_against_HOME,moving_average_6_saves_total_against_HOME,moving_average_1_goal_against_HOME,moving_average_2_goal_against_HOME,moving_average_3_goal_against_HOME,moving_average_4_goal_against_HOME,moving_average_5_goal_against_HOME,moving_average_6_goal_against_HOME,fatigue_1_match_HOME,fatigue_2_match_HOME,fatigue_3_match_HOME,fatigue_4_match_HOME,fatigue_5_match_HOME,points_result_AWAY,shot_on_target_AWAY,saves_AWAY,shot_total_AWAY,saves_total_AWAY,goal_AWAY,points_result_against_AWAY,shot_on_target_against_AWAY,saves_against_AWAY,shot_total_against_AWAY,saves_total_against_AWAY,goal_against_AWAY,cumul_points_result_AWAY,cumul_shot_on_target_AWAY,cumul_saves_AWAY,cumul_shot_total_AWAY,cumul_saves_total_AWAY,cumul_goal_AWAY,cumul_points_result_against_AWAY,cumul_shot_on_target_against_AWAY,cumul_saves_against_AWAY,cumul_shot_total_against_AWAY,cumul_saves_total_against_AWAY,cumul_goal_against_AWAY,goal_difference_AWAY,cumul_average_points_result_AWAY,cumul_average_shot_on_target_AWAY,cumul_average_saves_AWAY,cumul_average_shot_total_AWAY,cumul_average_saves_total_AWAY,cumul_average_goal_AWAY,cumul_average_points_result_against_AWAY,cumul_average_shot_on_target_against_AWAY,cumul_average_saves_against_AWAY,cumul_average_shot_total_against_AWAY,cumul_average_saves_total_against_AWAY,cumul_average_goal_against_AWAY,moving_average_1_points_result_AWAY,moving_average_2_points_result_AWAY,moving_average_3_points_result_AWAY,moving_average_4_points_result_AWAY,moving_average_5_points_result_AWAY,moving_average_6_points_result_AWAY,moving_average_1_shot_on_target_AWAY,moving_average_2_shot_on_target_AWAY,moving_average_3_shot_on_target_AWAY,moving_average_4_shot_on_target_AWAY,moving_average_5_shot_on_target_AWAY,moving_average_6_shot_on_target_AWAY,moving_average_1_saves_AWAY,moving_average_2_saves_AWAY,moving_average_3_saves_AWAY,moving_average_4_saves_AWAY,moving_average_5_saves_AWAY,moving_average_6_saves_AWAY,moving_average_1_shot_total_AWAY,moving_average_2_shot_total_AWAY,moving_average_3_shot_total_AWAY,moving_average_4_shot_total_AWAY,moving_average_5_shot_total_AWAY,moving_average_6_shot_total_AWAY,moving_average_1_saves_total_AWAY,moving_average_2_saves_total_AWAY,moving_average_3_saves_total_AWAY,moving_average_4_saves_total_AWAY,moving_average_5_saves_total_AWAY,moving_average_6_saves_total_AWAY,moving_average_1_goal_AWAY,moving_average_2_goal_AWAY,moving_average_3_goal_AWAY,moving_average_4_goal_AWAY,moving_average_5_goal_AWAY,moving_average_6_goal_AWAY,moving_average_1_points_result_against_AWAY,moving_average_2_points_result_against_AWAY,moving_average_3_points_result_against_AWAY,moving_average_4_points_result_against_AWAY,moving_average_5_points_result_against_AWAY,moving_average_6_points_result_against_AWAY,moving_average_1_shot_on_target_against_AWAY,moving_average_2_shot_on_target_against_AWAY,moving_average_3_shot_on_target_against_AWAY,moving_average_4_shot_on_target_against_AWAY,moving_average_5_shot_on_target_against_AWAY,moving_average_6_shot_on_target_against_AWAY,moving_average_1_saves_against_AWAY,moving_average_2_saves_against_AWAY,moving_average_3_saves_against_AWAY,moving_average_4_saves_against_AWAY,moving_average_5_saves_against_AWAY,moving_average_6_saves_against_AWAY,moving_average_1_shot_total_against_AWAY,moving_average_2_shot_total_against_AWAY,moving_average_3_shot_total_against_AWAY,moving_average_4_shot_total_against_AWAY,moving_average_5_shot_total_against_AWAY,moving_average_6_shot_total_against_AWAY,moving_average_1_saves_total_against_AWAY,moving_average_2_saves_total_against_AWAY,moving_average_3_saves_total_against_AWAY,moving_average_4_saves_total_against_AWAY,moving_average_5_saves_total_against_AWAY,moving_average_6_saves_total_against_AWAY,moving_average_1_goal_against_AWAY,moving_average_2_goal_against_AWAY,moving_average_3_goal_against_AWAY,moving_average_4_goal_against_AWAY,moving_average_5_goal_against_AWAY,moving_average_6_goal_against_AWAY,fatigue_1_match_AWAY,fatigue_2_match_AWAY,fatigue_3_match_AWAY,fatigue_4_match_AWAY,fatigue_5_match_AWAY
0,2015001,11,22,1.0,2015-2016,0.52,0.48,2,3,1,2,2015,3,8,32,7,4,0,0,0,0,1,0,0,12,7,3,2,0,1,0,3,Fri,2015-08-07,20:30,Lille,0–1,Paris S-G,0,1.0,0,2,1,12,3,0,3,3,2,7,2,1,0,2,1,12,3,0,3,3,2,7,2,1,-1,0.0,2.0,1.0,12.0,3.0,0.0,3.0,3.0,2.0,7.0,2.0,1.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,3,3,2,7,2,1,0,2,1,12,3,0,3,3,2,7,2,1,0,2,1,12,3,0,1,3.0,3.0,2.0,7.0,2.0,1.0,0.0,2.0,1.0,12.0,3.0,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
20,2015021,17,22,3.0,2015-2016,0.36,0.64,3,5,4,3,2015,3,8,34,21,4,0,0,0,0,1,0,0,10,9,5,3,0,1,0,3,Fri,2015-08-21,20:30,Montpellier,0–1,Paris S-G,0,3.0,0,3,4,10,5,0,3,5,3,9,3,1,0,7,10,25,13,0,9,13,7,35,7,4,-4,0.0,2.333333,3.333333,8.333333,4.333333,0.0,3.0,4.333333,2.333333,11.666667,2.333333,1.333333,0.0,0.0,,,,,0.0,2.0,,,,,2.0,3.0,,,,,4.0,7.5,,,,,2.0,4.0,,,,,0.0,0.0,,,,,3.0,3.0,,,,,2.0,4.0,,,,,0.0,2.0,,,,,10.0,13.0,,,,,0.0,2.0,,,,,1.0,1.5,,,,,6.0,13.0,,,,3,5,3,9,3,1,0,3,4,10,5,0,9,14,7,34,7,4,0,7,9,30,14,0,4,3.0,4.666667,2.333333,11.333333,2.333333,1.333333,0.0,2.333333,3.0,10.0,4.666667,0.0,3.0,3.0,,,,,6.0,4.5,,,,,2.0,2.0,,,,,18.0,12.5,,,,,2.0,2.0,,,,,2.0,1.5,,,,,0.0,0.0,,,,,2.0,2.0,,,,,4.0,2.5,,,,,8.0,10.0,,,,,6.0,4.5,,,,,0.0,0.0,,,,,5.0,14.0,,,
39,2015040,16,22,4.0,2015-2016,0.29,0.71,2,8,5,2,2015,3,8,35,30,6,0,0,0,0,0,0,1,3,19,8,2,0,3,0,3,Sun,2015-08-30,21:00,Monaco,0–3,Paris S-G,0,4.0,0,2,5,3,8,0,3,8,2,19,2,3,5,14,11,44,16,3,5,16,11,46,14,5,-2,1.25,3.5,2.75,11.0,4.0,0.75,1.25,4.0,2.75,11.5,3.5,1.25,1.0,1.0,1.666667,,,,3.0,3.0,4.0,,,,3.0,2.5,2.0,,,,14.0,11.5,13.666667,,,,4.0,3.0,2.666667,,,,1.0,0.5,1.0,,,,1.0,1.0,0.666667,,,,4.0,3.0,2.666667,,,,2.0,2.5,3.0,,,,12.0,10.5,9.0,,,,3.0,3.0,4.0,,,,1.0,0.5,0.666667,,,,8.0,16.0,22.0,,,3,8,2,19,2,3,0,2,5,3,8,0,12,22,9,53,9,7,0,9,14,33,22,0,7,3.0,5.5,2.25,13.25,2.25,1.75,0.0,2.25,3.5,8.25,5.5,0.0,3.0,3.0,3.0,,,,5.0,5.5,4.666667,,,,3.0,2.5,2.333333,,,,9.0,13.5,11.333333,,,,3.0,2.5,2.333333,,,,1.0,1.5,1.333333,,,,0.0,0.0,0.0,,,,3.0,2.5,2.333333,,,,4.0,4.0,3.0,,,,10.0,9.0,10.0,,,,5.0,5.5,4.666667,,,,0.0,0.0,0.0,,,,9.0,14.0,23.0,,


In [127]:
# reorder columns
features = ["GAME_ID","TEAM_ID_HOME","TEAM_ID_AWAY",'gameweek', 'season',
       'possession_home', 'possession_away', 'shot_on_target_home',
       'shot_on_target_away', 'saves_home', 'saves_away', 'year',
       'quarter', 'month', 'week', 'day', 'weekday', 'is_monday',
       'is_tuesday', 'is_wednesday', 'is_thursday', 'is_friday',
       'is_saturday', 'is_sunday', 'shot_total_home', 'shot_total_away',
       'saves_total_home', 'saves_total_away', 'goal_home', 'goal_away',
       'points_result_home', 'points_result_away',
           ]
for i, col in enumerate(features) :
    games.insert(i, col, games.pop(col))

In [27]:
games.to_csv(DATAPATH / f"games_{filepath}",index=False)