In [26]:
import pandas as pd
import numpy as np
import json
import seaborn as sns

from sklearn.preprocessing import MinMaxScaler, Normalizer

# accessing data library
from statsbombpy import sb

# opening the libraries as DF's
from mplsoccer import Sbopen
parser = Sbopen()

# plotting
import os
import pathlib
import warnings
import statsmodels.api as sm
import statsmodels.formula.api as smf
from mplsoccer import Pitch, VerticalPitch
import matplotlib.pyplot as plt

pd.options.mode.chained_assignment = None
warnings.filterwarnings('ignore')

In [56]:
season = '2018_19'
ssn = '1819'

In [57]:
KPIs = pd.read_csv(f'../data/KPIs_{ssn}.csv')
KPIs.columns

Index(['team', 'season', 'total_GKs', 'short_GKs', 'long_GKs',
       'GKs_distnce_std', 'avg_GK_dist', 'all_scsfl_GKs', 'shrt_scsfl_GKs',
       'lng_scsfl_GKs', 'shortGK_avg_xkick_lft', 'shortGK_avg_ykick_lft',
       'shortGK_avg_xkick_cntr', 'shortGK_avg_ykick_cntr',
       'shortGK_avg_xkick_rght', 'shortGK_avg_ykick_rght',
       'longGK_avg_xkick_lft', 'longGK_avg_ykick_lft', 'longGK_avg_xkick_cntr',
       'longGK_avg_ykick_cntr', 'longGK_avg_xkick_rght',
       'longGK_avg_ykick_rght', 'ttl_avg_num_passes', 'ttl_avg_chain_speed',
       'scsfl_prgpass_pcnt', 'scsfl_prgpass_avg_dist', 'ttl_prgpass_pcnt',
       'ttl_prgpass_avg_dist', 'short_avg_num_passes', 'shrt_all_prgpass_pcnt',
       'shrt_all_prgpass_avg_dist', 'shrt_scsfl_prgpass_pcnt',
       'shrt_scsfl_prgpass_avg_dist', 'long_avg_num_passes',
       'long_all_prgpass_pcnt', 'long_all_prgpass_avg_dist',
       'long_scsfl_prgpass_pcnt', 'long_scsfl_prgpass_avg_dist',
       'short_end_shot', 'short_end_goal', 'short_

In [58]:
def table_merges(season,ssn):
    
    lg_tbl = pd.read_csv(f'../data/lg_table_{season}.csv')

    lg_tbl.rename(columns={'Team': 'team'}, inplace=True)

    KPIs = pd.read_csv(f'../data/KPIs_{ssn}.csv')

    KPIs['short_GK_pc'] = round((KPIs['short_GKs'] / KPIs['total_GKs']) *100,2)
    KPIs['long_GK_pc'] = round((KPIs['long_GKs'] / KPIs['total_GKs']) *100,2)

    KPIs['short_shot_pc'] = round((KPIs['short_end_shot']/KPIs['short_GKs'])*100,2)
    KPIs['short_goal_pc'] = round((KPIs['short_end_goal']/KPIs['short_GKs'])*100,2)

    KPIs['long_shot_pc'] = round((KPIs['long_end_shot']/KPIs['long_GKs'])*100,2)
    KPIs['long_goal_pc'] = round((KPIs['long_end_goal']/KPIs['long_GKs'])*100,2)
    
    KPIs['fllw_shrt_shots_pc'] = round((KPIs['fllw_shrt_shots_rtrn']/KPIs['short_GKs'])*100,2)
    KPIs['fllw_shrt_goals_pc'] = round((KPIs['fllw_shrt_goals_rtrn']/KPIs['short_GKs'])*100,2)

    KPIs['short_agg_gains'] = round(KPIs['short_avg_x_prog'] - KPIs['fllw_shrt_x_rtrn'],2)
    KPIs['long_agg_gains'] = round(KPIs['long_avg_x_prog'] - KPIs['fllw_long_x_rtrn'],2)

    KPIs['fllw_long_shots_pc'] = round((KPIs['fllw_long_shots_rtrn']/KPIs['long_GKs'])*100,2)
    KPIs['fllw_long_goals_pc'] = round((KPIs['fllw_long_goals_rtrn']/KPIs['long_GKs'])*100,2)
    
    def create_team_season(row):
        return f"{row['team']} | {row['season']}"
    
    KPIs['short_gains_map'] = KPIs.apply(create_team_season, axis=1)
    KPIs['long_gains_map'] = KPIs.apply(create_team_season, axis=1)
    
    KPIs['short_ntwrk_map'] = KPIs.apply(create_team_season, axis=1)
    KPIs['long_ntwrk_map'] = KPIs.apply(create_team_season, axis=1)
    
    KPIs['GK_pizza_plot'] = KPIs.apply(create_team_season, axis=1)
    
    KPIs['name_display'] = KPIs.apply(create_team_season, axis=1)

    KPIs = KPIs[['team', 'season', 'total_GKs', 'short_GKs', 'short_goal_pc', 'long_GKs', 'long_GK_pc',
            'avg_GK_dist','all_scsfl_GKs','shrt_scsfl_GKs','lng_scsfl_GKs',
          'short_avg_x_prog','fllw_shrt_x_rtrn','short_agg_gains',
                 'long_avg_x_prog','fllw_long_x_rtrn','long_agg_gains',
          'GKs_distnce_std', 'ttl_avg_num_passes', 'ttl_avg_chain_speed',
                 'scsfl_prgpass_pcnt','scsfl_prgpass_avg_dist','ttl_prgpass_pcnt','ttl_prgpass_avg_dist',
            'short_avg_num_passes','long_avg_num_passes', 
          'shrt_all_prgpass_pcnt', 'long_all_prgpass_pcnt',
          'shrt_scsfl_prgpass_pcnt', 'long_scsfl_prgpass_pcnt',
          'shrt_all_prgpass_avg_dist', 'long_all_prgpass_avg_dist',
          'shrt_scsfl_prgpass_avg_dist','long_scsfl_prgpass_avg_dist',
          'short_end_shot', 'short_shot_pc','long_end_shot', 'long_shot_pc',
          'short_end_goal','short_GK_pc', 'long_end_goal', 'long_goal_pc', 
          'short_avg_xg', 'long_avg_xg',
          'fllw_shrt_shots_rtrn','fllw_shrt_shots_pc','fllw_long_shots_rtrn','fllw_long_shots_pc',
          'fllw_shrt_goals_rtrn','fllw_shrt_goals_pc','fllw_long_goals_rtrn','fllw_long_goals_pc',
          'fllw_shrt_avg_xg_rtrn','fllw_long_avg_xg_rtrn',
        'short_gains_map','long_gains_map','short_ntwrk_map','long_ntwrk_map','GK_pizza_plot','name_display']]
    
# Split the data into elements

### General

    df_gen = KPIs[['team', 'season', 'total_GKs', 
                   'short_GKs','short_GK_pc',
                   'long_GKs','long_GK_pc',
                   'GKs_distnce_std']]

### Yards Gained / Conceded
    
    df_gains = KPIs[['team', 'season',
                     'short_avg_x_prog', 'fllw_shrt_x_rtrn','short_agg_gains',
                     'long_avg_x_prog', 'fllw_long_x_rtrn','long_agg_gains']]

### Passes

    df_passes = KPIs[['team', 'season',
                      'GKs_distnce_std','ttl_avg_num_passes', 'ttl_avg_chain_speed',
                      'short_avg_num_passes','long_avg_num_passes', 
                      'shrt_all_prgpass_pcnt', 'long_all_prgpass_pcnt',
                      'shrt_scsfl_prgpass_pcnt', 'long_scsfl_prgpass_pcnt',
                      'shrt_all_prgpass_avg_dist','long_all_prgpass_avg_dist',
                      'shrt_scsfl_prgpass_avg_dist','long_scsfl_prgpass_avg_dist']]

### Shot Creation

    df_shots = KPIs[['team', 'season', 
                     'short_end_shot', 'short_shot_pc','long_end_shot', 'long_shot_pc',
                     'short_end_goal','short_goal_pc',
                     'long_end_goal', 'long_goal_pc',
                     'short_avg_xg','long_avg_xg', 
                     'fllw_shrt_shots_rtrn','fllw_shrt_shots_pc',
                     'fllw_long_shots_rtrn','fllw_long_shots_pc',  
                     'fllw_shrt_goals_rtrn', 'fllw_shrt_goals_pc',
                     'fllw_long_goals_rtrn', 'fllw_long_goals_pc',
                     'fllw_long_avg_xg_rtrn','fllw_shrt_avg_xg_rtrn',
                     'short_gains_map','long_gains_map','short_ntwrk_map','long_ntwrk_map']]

### Merge With League Table

    KPIs_tbl = pd.merge(lg_tbl, KPIs, how='inner', on='team')
    
    gen_tbl = pd.merge(lg_tbl, df_gen, how='inner', on='team')
    
    gains_tbl = pd.merge(lg_tbl, df_gains, how='inner', on='team')
    gains_tbl = gains_tbl[['season', 'Pos', 'team', 'Pts', 
                           'short_avg_x_prog', 'fllw_shrt_x_rtrn', 'short_agg_gains', 
                           'long_avg_x_prog', 'fllw_long_x_rtrn', 'long_agg_gains']]

    pass_tbl = pd.merge(lg_tbl, df_passes, how='inner', on='team')
    pass_tbl = pass_tbl[['season','Pos', 'team', 'Pts', 
                'GKs_distnce_std', 'ttl_avg_num_passes', 'ttl_avg_chain_speed',
              'short_avg_num_passes', 'long_avg_num_passes',
              'shrt_all_prgpass_pcnt', 'long_all_prgpass_pcnt',
              'shrt_scsfl_prgpass_pcnt', 'long_scsfl_prgpass_pcnt',
              'shrt_all_prgpass_avg_dist', 'long_all_prgpass_avg_dist',
              'shrt_scsfl_prgpass_avg_dist', 'long_scsfl_prgpass_avg_dist']]
    
    shots_tbl = pd.merge(lg_tbl, df_shots, how='inner', on='team')
    shots_tbl = shots_tbl[['season','Pos', 'team','GF', 'GA', 'Pts', 
               'short_end_shot', 'short_shot_pc', 'long_end_shot', 'long_shot_pc',
               'short_end_goal', 'short_goal_pc', 'long_end_goal', 'long_goal_pc',
               'short_avg_xg', 'long_avg_xg', 'fllw_shrt_shots_rtrn',
               'fllw_shrt_shots_pc', 'fllw_long_shots_rtrn', 'fllw_long_shots_pc',
               'fllw_shrt_goals_rtrn', 'fllw_shrt_goals_pc', 'fllw_long_goals_rtrn',
               'fllw_long_goals_pc', 'fllw_long_avg_xg_rtrn', 'fllw_shrt_avg_xg_rtrn']]
    
    KPIs_tbl.to_csv(f'../data/KPIs_tbl{ssn}.csv', index=False)
    
    gen_tbl.to_csv(f'../data/gen_tbl{ssn}.csv', index=False)
    
    gains_tbl.to_csv(f'../data/gains_tbl{ssn}.csv', index=False)
    
    pass_tbl.to_csv(f'../data/pass_tbl{ssn}.csv', index=False)
    
    shots_tbl.to_csv(f'../data/shots_tbl{ssn}.csv', index=False)

In [61]:
table_merges('2018_19','1819')

In [62]:
### Concat The Tables

KPIs_tbl1819 = pd.read_csv(f'../data/KPIs_tbl1819.csv')

KPIs_tbl1920 = pd.read_csv(f'../data/KPIs_tbl1920.csv')

KPIs_tbl2021 = pd.read_csv(f'../data/KPIs_tbl2021.csv')

KPIs_tbl_all = pd.concat([KPIs_tbl1819, KPIs_tbl1920, KPIs_tbl2021])

KPIs_tbl_all.to_csv(f'../data/KPIs_tbl_all.csv', index=False)

In [None]:
### Concat The Tables

gen_tbl1819 = pd.read_csv(f'../data/gen_tbl1819.csv')

gen_tbl1920 = pd.read_csv(f'../data/gen_tbl1920.csv')

gen_tbl2021 = pd.read_csv(f'../data/gen_tbl2021.csv')

gen_tbl_all = pd.concat([gen_tbl1819, gen_tbl1920, gen_tbl2021])

gen_tbl_all.to_csv(f'../data/gen_tbl_all.csv', index=False)

In [None]:
gains_tbl1819 = pd.read_csv(f'../data/gains_tbl1819.csv')

gains_tbl1920 = pd.read_csv(f'../data/gains_tbl1920.csv')

gains_tbl2021 = pd.read_csv(f'../data/gains_tbl2021.csv')

gains_tbl_all = pd.concat([gains_tbl1819,gains_tbl1920,gains_tbl2021])

gains_tbl_all.to_csv(f'../data/gains_tbl_all.csv', index=False)

In [None]:
pass_tbl1819 = pd.read_csv(f'../data/pass_tbl1819.csv')

pass_tbl1920 = pd.read_csv(f'../data/pass_tbl1920.csv')

pass_tbl2021 = pd.read_csv(f'../data/pass_tbl2021.csv')

pass_tbl_all = pd.concat([pass_tbl1819, pass_tbl1920, pass_tbl2021])

pass_tbl_all.to_csv(f'../data/pass_tbl_all.csv', index=False)

In [None]:
shots_tbl1819 = pd.read_csv(f'../data/shots_tbl1819.csv')

shots_tbl1920 = pd.read_csv(f'../data/shots_tbl1920.csv')

shots_tbl2021 = pd.read_csv(f'../data/shots_tbl2021.csv')

shots_tbl_all = pd.concat([shots_tbl1819, shots_tbl1920, shots_tbl2021])

shots_tbl_all.to_csv(f'../data/shots_tbl_all.csv', index=False)

In [None]:
gen_tbl1819

# Split the data short / long

In [None]:
short_KPIs = KPIs[['team', 'season', 'total_GKs', 'short_GKs','short_GK_pc', 
                                 'GKs_distnce_std',
                                 'short_avg_x_prog', 'fllw_shrt_x_rtrn', 
                                 'short_avg_num_passes',
                                 'shrt_all_prgpass_pcnt', 'shrt_scsfl_prgpass_pcnt', 
                                 'shrt_all_prgpass_avg_dist','shrt_scsfl_prgpass_avg_dist',
                                 'short_end_shot', 'short_shot_pc',
                                 'fllw_shrt_shots_rtrn','fllw_shrt_shots_pc',
                                 'short_end_goal','short_goal_pc','short_avg_xg',
                                 'fllw_shrt_goals_rtrn', 'fllw_shrt_goals_pc',
                                 'short_avg_xg', 'fllw_shrt_avg_xg_rtrn']]

In [None]:
short_KPIs

In [None]:
long_KPIs = KPIs[['team', 'season', 'total_GKs', 'long_GKs', 'long_GK_pc',
                               'GKs_distnce_std',
                               'long_avg_x_prog', 'fllw_long_x_rtrn',
                               'long_avg_num_passes', 
                               'long_all_prgpass_pcnt', 'long_scsfl_prgpass_pcnt',
                               'long_all_prgpass_avg_dist','long_scsfl_prgpass_avg_dist',
                               'long_end_shot', 'long_shot_pc',
                               'fllw_long_shots_rtrn','fllw_long_shots_pc',
                               'long_end_goal', 'long_goal_pc',
                               'fllw_long_goals_rtrn', 'fllw_long_goals_pc',
                               'long_avg_xg', 'fllw_long_avg_xg_rtrn']] 

In [None]:
long_KPIs