In [364]:
import pandas as pd
import numpy as np
import json
import seaborn as sns

# accessing data library
from statsbombpy import sb

# opening the libraries as DF's
from mplsoccer import Sbopen
parser = Sbopen()

# plotting
import os
import pathlib
import warnings
import statsmodels.api as sm
import statsmodels.formula.api as smf
from mplsoccer import Pitch, VerticalPitch
import matplotlib.pyplot as plt

pd.options.mode.chained_assignment = None
warnings.filterwarnings('ignore')

# 2018/19 Season

In [386]:
start_KPIs_1819 = {'team': [],#
            'season' : [],#
            'total_GKs' : [],#
            'short_GKs' : [],#
            'long_GKs' : [],#
            'GKs_distnce_std' : [],#                   
            'avg_GK_dist' : [],#
            'all_scsfl_GKs' : [],#
            'shrt_scsfl_GKs' : [],#
            'lng_scsfl_GKs' : [],#                   
            'shortGK_avg_xkick_lft' : [],#
            'shortGK_avg_ykick_lft' : [],# 
            'shortGK_avg_xkick_cntr' : [],#
            'shortGK_avg_ykick_cntr' : [],#
            'shortGK_avg_xkick_rght' : [],#
            'shortGK_avg_ykick_rght' : [],#        
            'longGK_avg_xkick_lft' : [],#
            'longGK_avg_ykick_lft' : [],# 
            'longGK_avg_xkick_cntr' : [],#
            'longGK_avg_ykick_cntr' : [],#
            'longGK_avg_xkick_rght' : [],#
            'longGK_avg_ykick_rght' : [],#

            'ttl_avg_num_passes' : [],
            'ttl_avg_chain_speed' : [],
            
            'scsfl_prgpass_pcnt' : [],
            'scsfl_prgpass_avg_dist' : [],
            'ttl_prgpass_pcnt' : [],
            'ttl_prgpass_avg_dist' : [],
            
            'short_avg_num_passes' : [],#
            'shrt_all_prgpass_pcnt' : [],#
            'shrt_all_prgpass_avg_dist':[],#
            'shrt_scsfl_prgpass_pcnt' : [],#
            'shrt_scsfl_prgpass_avg_dist':[],
            'long_avg_num_passes' : [],#
            'long_all_prgpass_pcnt' : [],#
            'long_all_prgpass_avg_dist' : [],#
            'long_scsfl_prgpass_pcnt' : [],#
            'long_scsfl_prgpass_avg_dist' : [],#
            'short_end_shot' : [],#
            'short_end_goal' : [],#
            'short_avg_xg' : [],#
            'long_end_shot' : [],#
            'long_end_goal' : [],#
            'long_avg_xg' : [],#
            'short_avg_x_prog' : [],#
            'fllw_shrt_x_rtrn' : [],#
            'fllw_shrt_shots_rtrn' : [],#
            'fllw_shrt_goals_rtrn' : [],#
            'fllw_shrt_avg_xg_rtrn' : [],#
            'long_avg_x_prog' : [],#
            'fllw_long_x_rtrn' : [],#
            'fllw_long_shots_rtrn' : [],#
            'fllw_long_goals_rtrn' : [],#
            'fllw_long_avg_xg_rtrn' : []#
            }

KPIs_1819_df = pd.DataFrame(start_KPIs_1819)

###### Retrieving and preparing the 2018/19 data ######
ssn1819 = pd.read_csv('../data/matches_ssn1819.csv')

## 2018/19 Team List
ssn1819_teams = ssn1819['team'].unique().tolist()


##Preparing the data
ssn1819 = ssn1819.loc[(ssn1819["type"] == "Pass") | 
                      (ssn1819["type"] == "Carry") | 
                      (ssn1819["type"] == 'Shot') | 
                      (ssn1819["type"] == 'Goal Keeper') |
                      (ssn1819["type"] == 'Starting XI') |
                      (ssn1819["type"] == 'Half Start') |
                      (ssn1819["type"] == 'Injury Stoppage') |
                      (ssn1819['type'] == 'Half End')]

# CONVERTING TIMESTAMP TO MINUTE, SECOND, MILLISECOND COLUMNS

ssn1819['timestamp_1'] = pd.to_datetime(ssn1819['timestamp'])

ssn1819['minute'] = ssn1819['timestamp_1'].apply(lambda row: row.minute)
ssn1819['second'] = ssn1819['timestamp_1'].apply(lambda row: row.second)
ssn1819['millisecond'] = ssn1819['timestamp_1'].apply(lambda row: row.microsecond // 1000) 

ssn1819['timestamp'] = pd.to_timedelta(ssn1819['timestamp'])

ssn1819 = ssn1819[['match','home_team','away_team','timestamp','minute','second','millisecond',
                   'duration','possession','possession_team','defending_team',
                   'play_pattern','type','team','player','position',
                   'pass_length','pass_angle','pass_height','pass_outcome','pass_body_part','pass_shot_assist',
                   'shot_statsbomb_xg','shot_outcome',
                   'x_start', 'y_start','x_end','y_end','z_end_shot']]

ssn1819 = ssn1819.sort_values(by=['match',
                                  'minute','second','millisecond',
                                  'possession']).reset_index(drop=True)


## Create a unique value to filter DF to ONLY 'From Goal Kick' play patterns across all games
def create_GKmatch_possession(row):
    if row['play_pattern'] == 'From Goal Kick':
        return f"{row['match']},{row['possession']}"
    else:
        return None

## Apply the custom function to create the 'match_possession' column
ssn1819['GK_match_possession'] = ssn1819.apply(create_GKmatch_possession, axis=1)

# FILTER DF TO ONLY GOALKICK PLAYS
GKonly_1819 = ssn1819[ssn1819['GK_match_possession'].notnull()]

In [387]:
def kpi_1819_values(team):
    ## Filter the DF to a single team's GoalKick plays
    team_gk = GKonly_1819[GKonly_1819['possession_team']== f'{team}']
    
    ## Filter the DF to the start of the team's GK chains only:
    team_gk_starts = team_gk.groupby('GK_match_possession').first()
    
    #################### ADDING THE VALUES TO THE KPI DF ####################
    
    start_KPIs_1819['season'].append('2018/19')
    start_KPIs_1819['team'].append(team)
    
    ## TOTAL GoalKicks:
    start_KPIs_1819['total_GKs'].append(len(team_gk_starts))
    
    ## Total Short Goalkicks:
    start_KPIs_1819['short_GKs'].append(len(team_gk_starts[team_gk_starts['x_end']<25.1]))
    
    ## Total Long Goalkicks:
    start_KPIs_1819['long_GKs'].append(len(team_gk_starts[team_gk_starts['x_end'] >= 25.1]))
    
    ## GKs_distnce_std:
    start_KPIs_1819['GKs_distnce_std'].append(round(team_gk_starts['x_end'].std(),3))
    
        ## avg Goalkicks distance:
    start_KPIs_1819['avg_GK_dist'].append(round(team_gk_starts['x_end'].mean(),2))

        ## % ALL sccsfl Goalkicks:
    team_gk['pass_outcome'] = team_gk['pass_outcome'].fillna('Successful')
    GKs = team_gk.groupby('GK_match_possession').first()
    all_scsfl_GKs = GKs[GKs['pass_outcome'] == 'Successful']['pass_outcome'].count()
    start_KPIs_1819['all_scsfl_GKs'].append(all_scsfl_GKs)

        ## % Short sccsfl Goalkicks:
    shrt_scsfl_GKs = GKs[(GKs['x_end'] < 25.1) & (GKs['pass_outcome'] == 'Successful')]['pass_outcome'].count()
    start_KPIs_1819['shrt_scsfl_GKs'].append(shrt_scsfl_GKs)

        ## % Long sccsfl Goalkicks:
    lngshrt_scsfl_GKs = GKs[(GKs['x_end'] >= 25.1) & (GKs['pass_outcome'] == 'Successful')]['pass_outcome'].count()
    start_KPIs_1819['lng_scsfl_GKs'].append(lngshrt_scsfl_GKs)
    
    ## shortGK_avg_xkick_lft
    xkick_lft = round(team_gk_starts[(team_gk_starts['x_end']<25.1) & 
                                     (team_gk_starts['y_start'] <= 38)]['x_end'].mean(),2)
    start_KPIs_1819['shortGK_avg_xkick_lft'].append(xkick_lft)
    
    ## shortGK_avg_ykick_lft
    ykick_lft = round(team_gk_starts[(team_gk_starts['x_end']<25.1) & 
                                     (team_gk_starts['y_start'] <= 38)]['y_end'].mean(),2)
    start_KPIs_1819['shortGK_avg_ykick_lft'].append(ykick_lft)
    
    ## shortGK_avg_xend_cntr:
    
    xkick_cntr = round(team_gk_starts[(team_gk_starts['x_end']<25.1) & 
                                         ((team_gk_starts['y_start'] >= 38.1) & 
                                          (team_gk_starts['y_start'] <= 42.9))]['x_end'].mean(),2)
    start_KPIs_1819['shortGK_avg_xkick_cntr'].append(xkick_cntr)
    
    
    ## shortGK_avg_yend_cntr:
    
    ykick_cntr = round(team_gk_starts[(team_gk_starts['x_end']<25.1) & 
                                         ((team_gk_starts['y_start'] >= 38.1) &
                                          (team_gk_starts['y_start'] <= 42.9))]['y_end'].mean(),2)
    start_KPIs_1819['shortGK_avg_ykick_cntr'].append(ykick_cntr)
    
    
    ## shortGK_avg_xkick_rght
    
    xkick_rght = round(team_gk_starts[(team_gk_starts['x_end']<25.1) &
                         (team_gk_starts['y_start'] >= 43)]['x_end'].mean(),2)
    start_KPIs_1819['shortGK_avg_xkick_rght'].append(xkick_rght)
    
    ## shortGK_avg_ykick_rght
    
    ykick_rght = round(team_gk_starts[(team_gk_starts['x_end']<25.1) &
                         (team_gk_starts['y_start'] >= 43)]['y_end'].mean(),2)
    start_KPIs_1819['shortGK_avg_ykick_rght'].append(ykick_rght)
    
    
    ##longGK_avg_xkick_lft
    lng_xkick_lft = round(team_gk_starts[(team_gk_starts['x_end']>=25.1) & 
                                     (team_gk_starts['y_start'] <= 38)]['x_end'].mean(),2)
    start_KPIs_1819['longGK_avg_xkick_lft'].append(lng_xkick_lft)
    
    ##longGK_avg_ykick_lft
    lng_ykick_lft = round(team_gk_starts[(team_gk_starts['x_end']>=25.1) & 
                                     (team_gk_starts['y_start'] <= 38)]['y_end'].mean(),2)
    start_KPIs_1819['longGK_avg_ykick_lft'].append(lng_ykick_lft)
    
    ##longGK_avg_xkick_cntr
    lng_xkick_cntr = round(team_gk_starts[(team_gk_starts['x_end']>=25.1) & 
                                         ((team_gk_starts['y_start'] >= 38.1) & 
                                          (team_gk_starts['y_start'] <= 42.9))]['x_end'].mean(),2)
    start_KPIs_1819['longGK_avg_xkick_cntr'].append(lng_xkick_cntr)
    
    ##longGK_avg_ykick_cntr
    lng_ykick_cntr = round(team_gk_starts[(team_gk_starts['x_end']>=25.1) & 
                                         ((team_gk_starts['y_start'] >= 38.1) &
                                          (team_gk_starts['y_start'] <= 42.9))]['y_end'].mean(),2)
    start_KPIs_1819['longGK_avg_ykick_cntr'].append(lng_ykick_cntr)
    
    ##longGK_avg_xkick_rght
    lng_xkick_rght = round(team_gk_starts[(team_gk_starts['x_end']>=25.1) &
                         (team_gk_starts['y_start'] >= 43)]['x_end'].mean(),2)
    start_KPIs_1819['longGK_avg_xkick_rght'].append(lng_xkick_rght)
    
    ##longGK_avg_ykick_rght
    lng_ykick_rght = round(team_gk_starts[(team_gk_starts['x_end']>=25.1) &
                         (team_gk_starts['y_start'] >= 43)]['y_end'].mean(),2)
    start_KPIs_1819['longGK_avg_ykick_rght'].append(lng_ykick_rght)
    
    ## short_avg_num_passes
    mask = team_gk.groupby('GK_match_possession').first()['x_end']<25.1
    team_gk['short_GK_start'] = mask[team_gk['GK_match_possession']].values
    team_shrt_gk_passes = len(team_gk[(team_gk['short_GK_start']==True) & 
                                      (team_gk['type']=='Pass')].loc[team_gk['team']==team])
    team_gk_shrt_chains = len(team_gk[team_gk['short_GK_start']==True]['GK_match_possession'].unique())
    
    start_KPIs_1819['short_avg_num_passes'].append(round(team_shrt_gk_passes/team_gk_shrt_chains,3))
   
    
    ########## Calculating all SCSSFL Progressive Passes ##########

    team_scss_passes = team_gk[team_gk['type']=='Pass'].loc[(team_gk['team']==team)&
                                                             (team_gk['pass_outcome']=='Successful')]

    team_scss_passes = team_scss_passes[['team','player',
                                         'x_start','y_start',
                                         'x_end','y_end']].reset_index(drop=True)

    team_scss_passes['prog_pass_start'] = np.sqrt(np.square(120 - team_scss_passes['x_start']) +
                                                  np.square(40 - team_scss_passes['y_start']))

    team_scss_passes['prog_pass_end'] = np.sqrt(np.square(120 - team_scss_passes['x_end']) + 
                                                np.square(40 - team_scss_passes['y_end']))

    team_scss_passes['prog_pass'] = [(team_scss_passes['prog_pass_end'][x]) / 
                                     (team_scss_passes['prog_pass_start'][x]) < 
                                     0.75 for x in range (len(team_scss_passes.prog_pass_start))]

    team_scss_passes['prog_pass_dist'] = (team_scss_passes['prog_pass_start']) - (team_scss_passes['prog_pass_end'])

    ## the percentage of SHORT progressive passes ##
    scsfl_prgpass_pcnt = round((len(team_scss_passes[team_scss_passes['prog_pass']==True])/
                          len(team_scss_passes))*100,2)

    start_KPIs_1819['scsfl_prgpass_pcnt'].append(scsfl_prgpass_pcnt)


    ## the avg distance of SHORT progressive passes ##
    scsfl_prgpass_avg_dist = round(team_scss_passes[
        team_scss_passes['prog_pass']==True]['prog_pass_dist'].mean(),2)

    start_KPIs_1819['scsfl_prgpass_avg_dist'].append(scsfl_prgpass_avg_dist)



    ########## Calculating all attempted Progressive Passes ##########

    team_passes = team_gk[team_gk['type']=='Pass'].loc[team_gk['team']==team]

    team_passes = team_passes[['team','player',
                                         'x_start','y_start',
                                         'x_end','y_end']].reset_index(drop=True)

    team_passes['prog_pass_start'] = np.sqrt(np.square(120 - team_passes['x_start']) +
                                                  np.square(40 - team_passes['y_start']))

    team_passes['prog_pass_end'] = np.sqrt(np.square(120 - team_passes['x_end']) + 
                                                np.square(40 - team_passes['y_end']))

    team_passes['prog_pass'] = [(team_passes['prog_pass_end'][x]) / 
                                     (team_passes['prog_pass_start'][x]) < 
                                     0.75 for x in range (len(team_passes.prog_pass_start))]

    team_passes['prog_pass_dist'] = (team_passes['prog_pass_start']) - (team_passes['prog_pass_end'])

    ## the percentage of SHORT progressive passes ##
    ttl_prgpass_pcnt = round((len(team_passes[team_passes['prog_pass']==True])/
                          len(team_passes))*100,2)

    start_KPIs_1819['ttl_prgpass_pcnt'].append(ttl_prgpass_pcnt)


    ## the avg distance of SHORT progressive passes ##
    ttl_prgpass_avg_dist = round(team_scss_passes[
        team_passes['prog_pass']==True]['prog_pass_dist'].mean(),2)

    start_KPIs_1819['ttl_prgpass_avg_dist'].append(ttl_prgpass_avg_dist)


    ########## Calculating Short SCSSFL Progressive Passes ##########
    
    team_shrt_scss_passes = team_gk[(team_gk['short_GK_start']==True) & 
                               (team_gk['type']=='Pass')].loc[(team_gk['team']==team)&
                                                             (team_gk['pass_outcome']=='Successful')]
    
    team_shrt_scss_passes = team_shrt_scss_passes[['team','player',
                                         'x_start','y_start',
                                         'x_end','y_end']].reset_index(drop=True)
    
    team_shrt_scss_passes['prog_pass_start'] = np.sqrt(np.square(120 - team_shrt_scss_passes['x_start']) +
                                                  np.square(40 - team_shrt_scss_passes['y_start']))
    
    team_shrt_scss_passes['prog_pass_end'] = np.sqrt(np.square(120 - team_shrt_scss_passes['x_end']) + 
                                                np.square(40 - team_shrt_scss_passes['y_end']))
    
    team_shrt_scss_passes['prog_pass'] = [(team_shrt_scss_passes['prog_pass_end'][x]) / 
                                     (team_shrt_scss_passes['prog_pass_start'][x]) < 
                                     0.75 for x in range (len(team_shrt_scss_passes.prog_pass_start))]
    
    team_shrt_scss_passes['prog_pass_dist'] = (team_shrt_scss_passes['prog_pass_start']) - (team_shrt_scss_passes['prog_pass_end'])
    
    ## the percentage of SHORT progressive passes ##
    shrt_prgpass_pcnt = round((len(team_shrt_scss_passes[team_shrt_scss_passes['prog_pass']==True])/len(team_shrt_scss_passes))*100,2)
    start_KPIs_1819['shrt_scsfl_prgpass_pcnt'].append(shrt_prgpass_pcnt)
    
    
    ## the avg distance of SHORT progressive passes ##
    shrt_prgpass_avg_dist = round(team_shrt_scss_passes[team_shrt_scss_passes['prog_pass']==True]['prog_pass_dist'].mean(),2)
    start_KPIs_1819['shrt_scsfl_prgpass_avg_dist'].append(shrt_prgpass_avg_dist)
    
    
    ## CHAIN PASSES ##
    avg_chain_passes = round(len(team_gk[team_gk[
        'type']=='Pass']['type'])/len(team_gk['GK_match_possession'].unique()),4)
    
    start_KPIs_1819['ttl_avg_num_passes'].append(avg_chain_passes)    
    
    
    ## CHAIN SPEED ##
    chain_time = (team_gk.groupby(
    'GK_match_possession').last()['timestamp'] - (team_gk.groupby(
    'GK_match_possession').first()['timestamp'])).dt.total_seconds()

    chain_distance = team_gk.groupby(
    'GK_match_possession').last()['x_start'] - (team_gk.groupby(
    'GK_match_possession').first()['x_start'])

    chain_speed = round((chain_distance/chain_time).mean(),4)
    start_KPIs_1819['ttl_avg_chain_speed'].append(chain_speed)


    ########## Calculating ALL Short Progressive Passes ##########
    
    team_shrt_all_passes = team_gk[(team_gk['short_GK_start']==True) & 
                               (team_gk['type']=='Pass')].loc[(team_gk['team']==team)]
    
    team_shrt_all_passes = team_shrt_all_passes[['team','player',
                                         'x_start','y_start',
                                         'x_end','y_end']].reset_index(drop=True)
    
    team_shrt_all_passes['prog_pass_start'] = np.sqrt(np.square(120 - team_shrt_all_passes['x_start']) +
                                                  np.square(40 - team_shrt_all_passes['y_start']))
    
    team_shrt_all_passes['prog_pass_end'] = np.sqrt(np.square(120 - team_shrt_all_passes['x_end']) + 
                                                np.square(40 - team_shrt_all_passes['y_end']))
    
    team_shrt_all_passes['prog_pass'] = [(team_shrt_all_passes['prog_pass_end'][x]) / 
                                     (team_shrt_all_passes['prog_pass_start'][x]) < 
                                     0.75 for x in range (len(team_shrt_all_passes.prog_pass_start))]
    
    team_shrt_all_passes['prog_pass_dist'] = (team_shrt_all_passes['prog_pass_start']) - (team_shrt_all_passes['prog_pass_end'])
    
    ## the percentage of SHORT progressive passes ##
    all_shrt_prgpass_pcnt = round((len(team_shrt_all_passes[team_shrt_all_passes['prog_pass']==True])
                              /len(team_shrt_all_passes))*100,2)
    start_KPIs_1819['shrt_all_prgpass_pcnt'].append(all_shrt_prgpass_pcnt)
    
    
    ## the avg distance of SHORT progressive passes ##
    all_shrt_prgpass_avg_dist = round(team_shrt_all_passes[team_shrt_all_passes['prog_pass']==True]['prog_pass_dist'].mean(),2)
    start_KPIs_1819['shrt_all_prgpass_avg_dist'].append(all_shrt_prgpass_avg_dist)
    
    
    ## long_avg_num_passes
    team_lng_gk_passes = len(team_gk[(team_gk['short_GK_start']==False) &
                                          (team_gk['type']=='Pass')].loc[team_gk['team']==team])
    team_gk_lng_chains = len(team_gk[team_gk['short_GK_start']==False]['GK_match_possession'].unique())
    
    start_KPIs_1819['long_avg_num_passes'].append(round(team_lng_gk_passes/team_gk_lng_chains,3))


    
    
    ########## Calculating Successful Long Progressive Passes ##########
    
    team_long_scss_passes = team_gk[(team_gk['short_GK_start']==False) & 
                               (team_gk['type']=='Pass')].loc[(team_gk['team']==team)&
                                                             (team_gk['pass_outcome']=='Successful')]
    
    team_long_scss_passes = team_long_scss_passes[['team','player',
                                         'x_start','y_start',
                                         'x_end','y_end']].reset_index(drop=True)
    
    team_long_scss_passes['prog_pass_start'] = np.sqrt(np.square(120 - team_long_scss_passes['x_start']) +
                                                  np.square(40 - team_long_scss_passes['y_start']))
    
    team_long_scss_passes['prog_pass_end'] = np.sqrt(np.square(120 - team_long_scss_passes['x_end']) + 
                                                np.square(40 - team_long_scss_passes['y_end']))
    
    team_long_scss_passes['prog_pass'] = [(team_long_scss_passes['prog_pass_end'][x]) / 
                                     (team_long_scss_passes['prog_pass_start'][x]) < 
                                     0.75 for x in range (len(team_long_scss_passes.prog_pass_start))]
    
    team_long_scss_passes['prog_pass_dist'] = (team_long_scss_passes['prog_pass_start']) - (team_long_scss_passes['prog_pass_end'])
    
    ## long_prgpass_pcnt ###
    
    long_prgpass_pcnt = round((len(team_long_scss_passes[team_long_scss_passes['prog_pass']==True])
                              /len(team_long_scss_passes))*100,2)
    start_KPIs_1819['long_scsfl_prgpass_pcnt'].append(long_prgpass_pcnt)
    
    ## long_prgpass_avg_dist ###
    long_prgpass_avg_dist = round(team_long_scss_passes[team_long_scss_passes['prog_pass']==True]['prog_pass_dist'].mean(),2)
    start_KPIs_1819['long_scsfl_prgpass_avg_dist'].append(long_prgpass_avg_dist)
    
    
    ########## Calculating ALL Long Progressive Passes ##########
    
    team_long_all_passes = team_gk[(team_gk['short_GK_start']==False) & 
                               (team_gk['type']=='Pass')].loc[(team_gk['team']==team)]
    
    team_long_all_passes = team_long_all_passes[['team','player',
                                         'x_start','y_start',
                                         'x_end','y_end']].reset_index(drop=True)
    
    team_long_all_passes['prog_pass_start'] = np.sqrt(np.square(120 - team_long_all_passes['x_start']) +
                                                  np.square(40 - team_long_all_passes['y_start']))
    
    team_long_all_passes['prog_pass_end'] = np.sqrt(np.square(120 - team_long_all_passes['x_end']) + 
                                                np.square(40 - team_long_all_passes['y_end']))
    
    team_long_all_passes['prog_pass'] = [(team_long_all_passes['prog_pass_end'][x]) / 
                                     (team_long_all_passes['prog_pass_start'][x]) < 
                                     0.75 for x in range (len(team_long_all_passes.prog_pass_start))]
    
    team_long_all_passes['prog_pass_dist'] = (team_long_all_passes['prog_pass_start']) - (team_long_all_passes['prog_pass_end'])
    
    ## long_all_prgpass_pcnt ###
    
    all_long_prgpass_pcnt = round((len(team_long_scss_passes[team_long_scss_passes['prog_pass']==True])
                              /len(team_long_scss_passes))*100,2)
    start_KPIs_1819['long_all_prgpass_pcnt'].append(all_long_prgpass_pcnt)
    
    ## long_all_prgpass_avg_dist ###
    all_long_prgpass_avg_dist = round(team_long_scss_passes[team_long_scss_passes['prog_pass']==True]['prog_pass_dist'].mean(),2)
    start_KPIs_1819['long_all_prgpass_avg_dist'].append(all_long_prgpass_avg_dist)


    ## short_end_shot
    short_shot = len(team_gk[(team_gk['short_GK_start']==True) & 
                             (team_gk['type']=='Shot')])
    start_KPIs_1819['short_end_shot'].append(short_shot)
    
    ## short_end_goal
    short_goal = len(team_gk[(team_gk['short_GK_start']==True) & 
                             (team_gk['shot_outcome']=='Goal')])
    start_KPIs_1819['short_end_goal'].append(short_goal)
    
    ## short_avg_xg 
    short_xg = round(team_gk[(team_gk['short_GK_start']==True) & 
                             (team_gk['type']=='Shot')]['shot_statsbomb_xg'].mean(),4)
    start_KPIs_1819['short_avg_xg'].append(short_xg)
    
    ## long_end_shot
    long_shot = len(team_gk[(team_gk['short_GK_start']==False) & (team_gk['type']=='Shot')])
    start_KPIs_1819['long_end_shot'].append(long_shot)
    
    ## long_end_goal
    long_goal = len(team_gk[(team_gk['short_GK_start']==False) & 
                                 (team_gk['shot_outcome']=='Goal')])
    start_KPIs_1819['long_end_goal'].append(long_goal)
    
    ## long_avg_xg
    long_xg = round(team_gk[(team_gk['short_GK_start']==False) & 
                            (team_gk['type']=='Shot')]['shot_statsbomb_xg'].mean(),4)
    start_KPIs_1819['long_avg_xg'].append(long_xg)
    
    ## short_avg_x_prog
    
    short_avg_x_prog = round(team_gk[team_gk["short_GK_start"] == True].loc[
        (team_gk["type"] == "Pass") | 
        (team_gk["type"] == "Carry") | 
        (team_gk["type"] == 'Shot')].groupby('GK_match_possession').last()['x_end'].mean(),2)
    
    start_KPIs_1819['short_avg_x_prog'].append(short_avg_x_prog)
    
    
    ## long_avg_x_prog
    long_avg_x_prog = round(team_gk[team_gk["short_GK_start"] == False].loc[
        (team_gk["type"] == "Pass") | 
        (team_gk["type"] == "Carry") | 
        (team_gk["type"] == 'Shot')].groupby('GK_match_possession').last()['x_end'].mean(),2)
    
    start_KPIs_1819['long_avg_x_prog'].append(long_avg_x_prog)
    
    
    ###### APPLY A NEW COLUMN FOR MATCH_POSSESSION FOR EVERYTHING TO COMPARE THE FOLLOWING GK CHAINS ######
    def new_match_possession(row):
        return f"{row['match']},{row['possession']}"
    
    ###### Apply the custom function to create the 'match_possession' column ######
    ssn1819['new_match_possession'] = ssn1819.apply(new_match_possession, axis=1)
    
    
    ###### Creating a Function to define following SHORT chains ######
    shrt_team_chain_vals = team_gk[(team_gk['short_GK_start']==True)&
                                   (team_gk['GK_match_possession'].notnull())]['GK_match_possession'].unique()
    
    def increment_second_number(chain):
        parts = chain.split(',')
        incremented_second_number = str(int(parts[1]) + 1)
        return f"{parts[0]},{incremented_second_number}"
    
    fllw_shrt_gk_chains = np.array([increment_second_number(chain) for chain in shrt_team_chain_vals])
    fllw_shrt_gk_chains = fllw_shrt_gk_chains.tolist()
    shrt_fllw_mask = ssn1819['new_match_possession'].isin(fllw_shrt_gk_chains)
    
    
    ## fllw_shrt_x_rtrn
    fllw_shrt_x = round(120 - (ssn1819.loc[shrt_fllw_mask].groupby('new_match_possession').last()['x_end'].mean()),3)
    start_KPIs_1819['fllw_shrt_x_rtrn'].append(fllw_shrt_x)
    
    ## fllw_shrt_shots_rtrn
    start_KPIs_1819['fllw_shrt_shots_rtrn'].append(len(ssn1819.loc[shrt_fllw_mask].loc[ssn1819["type"] == 'Shot']))
    
    ## fllw_shrt_goals_rtrn
    fllw_shrt_goals = len(ssn1819.loc[shrt_fllw_mask].loc[ssn1819["shot_outcome"] == 'Goal'])
    start_KPIs_1819['fllw_shrt_goals_rtrn'].append(fllw_shrt_goals)
    
    ## fllw_shrt_avg_xg_rtrn
    shrt_xg_rtrn = round(ssn1819.loc[shrt_fllw_mask]['shot_statsbomb_xg'].mean(),4)
    start_KPIs_1819['fllw_shrt_avg_xg_rtrn'].append(shrt_xg_rtrn)
    
    
    ###### Defining the following LONG chains ######
    
    lng_team_GK_chains = team_gk[(team_gk['short_GK_start']==False)&
                                      (team_gk['GK_match_possession'].notnull())]['GK_match_possession'].unique()
    fllw_lng_gk_chains = np.array([increment_second_number(chain) for chain in lng_team_GK_chains])
    fllw_lng_gk_chains = fllw_lng_gk_chains.tolist()
    lng_fllw_mask = ssn1819['new_match_possession'].isin(fllw_lng_gk_chains)
    
    
    ## fllw_long_x_rtrn
    long_x_rtrn = round(120 - (ssn1819.loc[lng_fllw_mask].groupby('new_match_possession').last()['x_end'].mean()),3)
    start_KPIs_1819['fllw_long_x_rtrn'].append(long_x_rtrn)
    
    ##fllw_long_shots_rtrn
    start_KPIs_1819['fllw_long_shots_rtrn'].append(len(ssn1819.loc[lng_fllw_mask].loc[ssn1819["type"] == 'Shot']))
    
    
    ## fllw_long_goals_rtrn
    long_goals_rtrn = len(ssn1819.loc[lng_fllw_mask].loc[ssn1819["shot_outcome"] == 'Goal'])
    start_KPIs_1819['fllw_long_goals_rtrn'].append(long_goals_rtrn)
    
    ## fllw_long_avg_xg_rtrn
    long_xg_rtrn = round(ssn1819.loc[lng_fllw_mask]['shot_statsbomb_xg'].mean(),4)
    start_KPIs_1819['fllw_long_avg_xg_rtrn'].append(long_xg_rtrn)
    

In [388]:
for team in ssn1819_teams:
    kpi_1819_values(team)

In [30]:
start_KPIs_1819

{'team': ['Manchester City WFC',
  'Chelsea FCW',
  'Arsenal WFC',
  'Liverpool WFC',
  'Brighton & Hove Albion WFC',
  'Bristol City WFC',
  'Reading WFC',
  'Yeovil Town LFC',
  'Birmingham City WFC',
  'Everton LFC',
  'West Ham United LFC'],
 'season': ['2018/19',
  '2018/19',
  '2018/19',
  '2018/19',
  '2018/19',
  '2018/19',
  '2018/19',
  '2018/19',
  '2018/19',
  '2018/19',
  '2018/19'],
 'total_GKs': [155, 114, 121, 177, 184, 254, 188, 256, 126, 183, 166],
 'short_GKs': [77, 46, 63, 25, 82, 44, 95, 3, 45, 67, 45],
 'long_GKs': [78, 68, 58, 152, 102, 210, 93, 253, 81, 116, 121],
 'GKs_distnce_std': [20.868,
  20.577,
  18.184,
  16.739,
  21.587,
  18.925,
  20.514,
  9.286,
  24.253,
  19.931,
  24.626],
 'shortGK_avg_xkick_lft': [17.3,
  15.8,
  14.26,
  13.71,
  12.0,
  17.67,
  18.36,
  6.0,
  13.43,
  11.74,
  12.78],
 'shortGK_avg_ykick_lft': [35.8,
  34.5,
  46.3,
  20.43,
  18.2,
  40.0,
  31.8,
  11.0,
  25.43,
  23.13,
  28.0],
 'shortGK_avg_xkick_cntr': [14.53,
  19

In [389]:
lengths = {key: len(value) for key, value in start_KPIs_1819.items()}
lengths

{'team': 11,
 'season': 11,
 'total_GKs': 11,
 'short_GKs': 11,
 'long_GKs': 11,
 'GKs_distnce_std': 11,
 'avg_GK_dist': 11,
 'all_scsfl_GKs': 11,
 'shrt_scsfl_GKs': 11,
 'lng_scsfl_GKs': 11,
 'shortGK_avg_xkick_lft': 11,
 'shortGK_avg_ykick_lft': 11,
 'shortGK_avg_xkick_cntr': 11,
 'shortGK_avg_ykick_cntr': 11,
 'shortGK_avg_xkick_rght': 11,
 'shortGK_avg_ykick_rght': 11,
 'longGK_avg_xkick_lft': 11,
 'longGK_avg_ykick_lft': 11,
 'longGK_avg_xkick_cntr': 11,
 'longGK_avg_ykick_cntr': 11,
 'longGK_avg_xkick_rght': 11,
 'longGK_avg_ykick_rght': 11,
 'ttl_avg_num_passes': 11,
 'ttl_avg_chain_speed': 11,
 'scsfl_prgpass_pcnt': 11,
 'scsfl_prgpass_avg_dist': 11,
 'ttl_prgpass_pcnt': 11,
 'ttl_prgpass_avg_dist': 11,
 'short_avg_num_passes': 11,
 'shrt_all_prgpass_pcnt': 11,
 'shrt_all_prgpass_avg_dist': 11,
 'shrt_scsfl_prgpass_pcnt': 11,
 'shrt_scsfl_prgpass_avg_dist': 11,
 'long_avg_num_passes': 11,
 'long_all_prgpass_pcnt': 11,
 'long_all_prgpass_avg_dist': 11,
 'long_scsfl_prgpass_pcnt'

In [390]:
KPIs_1819_df = pd.DataFrame(start_KPIs_1819)
#KPIs_1819_df

# 2019/20 Season

In [395]:
start_KPIs_1920 = {'team': [],
            'season' : [],
            'total_GKs' : [],#
            'short_GKs' : [],#
            'long_GKs' : [],#
            'GKs_distnce_std' : [],#      
            'avg_GK_dist' : [],
            'all_scsfl_GKs' : [],
            'shrt_scsfl_GKs' : [],
            'lng_scsfl_GKs' : [],     
            'shortGK_avg_xkick_lft' : [],#
            'shortGK_avg_ykick_lft' : [],# 
            'shortGK_avg_xkick_cntr' : [],#
            'shortGK_avg_ykick_cntr' : [],#
            'shortGK_avg_xkick_rght' : [],#
            'shortGK_avg_ykick_rght' : [],#                   
            'longGK_avg_xkick_lft' : [],#
            'longGK_avg_ykick_lft' : [],# 
            'longGK_avg_xkick_cntr' : [],#
            'longGK_avg_ykick_cntr' : [],#
            'longGK_avg_xkick_rght' : [],#
            'longGK_avg_ykick_rght' : [],#
            
            'ttl_avg_chain_speed' : [],
            'ttl_avg_num_passes' : [],
                   
            'scsfl_prgpass_pcnt' : [],
            'scsfl_prgpass_avg_dist' : [],
            'ttl_prgpass_pcnt' : [],
            'ttl_prgpass_avg_dist' : [],
            
            'short_avg_num_passes' : [],#     
            'shrt_all_prgpass_pcnt' : [],#
            'shrt_all_prgpass_avg_dist':[],#      
            'shrt_scsfl_prgpass_pcnt' : [],#
            'shrt_scsfl_prgpass_avg_dist' : [],#     
            'long_avg_num_passes' : [],#      
            'long_all_prgpass_pcnt' : [],#
            'long_all_prgpass_avg_dist' : [],#
            'long_scsfl_prgpass_pcnt' : [],#
            'long_scsfl_prgpass_avg_dist' : [],#     
            'short_end_shot' : [],#
            'short_end_goal' : [],#
            'short_avg_xg' : [],#
            'long_end_shot' : [],#
            'long_end_goal' : [],#
            'long_avg_xg' : [],#
            'short_avg_x_prog' : [],#
            'fllw_shrt_x_rtrn' : [],#
            'fllw_shrt_shots_rtrn' : [],#
            'fllw_shrt_goals_rtrn' : [],#
            'fllw_shrt_avg_xg_rtrn' : [],#
            'long_avg_x_prog' : [],#
            'fllw_long_x_rtrn' : [],#
            'fllw_long_shots_rtrn' : [],#
            'fllw_long_goals_rtrn' : [],#
            'fllw_long_avg_xg_rtrn' : []#
            }

KPIs_1920_df = pd.DataFrame(start_KPIs_1920)



## Retrieving the 2019/20 data
ssn1920 = pd.read_csv('../data/matches_ssn1920.csv')

## 2019/20 Team List
ssn1920_teams = ssn1920['team'].unique().tolist()

##Preparing the data
ssn1920 = ssn1920.loc[(ssn1920["type"] == "Pass") | 
                      (ssn1920["type"] == "Carry") | 
                      (ssn1920["type"] == 'Shot') | 
                      (ssn1920["type"] == 'Goal Keeper') |
                      (ssn1920["type"] == 'Starting XI') |
                      (ssn1920["type"] == 'Half Start') |
                      (ssn1920["type"] == 'Injury Stoppage') |
                      (ssn1920['type'] == 'Half End')]


# CONVERTING TIMESTAMP TO MINUTE, SECOND, MILLISECOND COLUMNS
ssn1920['timestamp_1'] = pd.to_datetime(ssn1920['timestamp'])

ssn1920['minute'] = ssn1920['timestamp_1'].apply(lambda row: row.minute)
ssn1920['second'] = ssn1920['timestamp_1'].apply(lambda row: row.second)
ssn1920['millisecond'] = ssn1920['timestamp_1'].apply(lambda row: row.microsecond // 1000) 

ssn1920['timestamp'] = pd.to_timedelta(ssn1920['timestamp'])

ssn1920 = ssn1920[['match','home_team','away_team','timestamp','minute','second','millisecond',
                   'duration','possession','possession_team','defending_team',
                   'play_pattern','type','team','player','position',
                   'pass_length','pass_angle','pass_height','pass_outcome','pass_body_part','pass_shot_assist',
                   'shot_statsbomb_xg','shot_outcome',
                   'x_start', 'y_start','x_end','y_end','z_end_shot']]


ssn1920 = ssn1920.sort_values(by=['match',
                                  'minute','second','millisecond',
                                  'possession']).reset_index(drop=True)


## Create a unique value to filter DF to ONLY 'From Goal Kick' play patterns
def create_GKmatch_possession(row):
    if row['play_pattern'] == 'From Goal Kick':
        return f"{row['match']},{row['possession']}"
    else:
        return None

## Apply the custom function to create the 'match_possession' column
ssn1920['GK_match_possession'] = ssn1920.apply(create_GKmatch_possession, axis=1)

# FILTER DF TO ONLY GOALKICK PLAYS
GKonly_1920 = ssn1920[ssn1920['GK_match_possession'].notnull()]

In [396]:
def kpi_1920_values(team):
    ## Filter the DF to a single team's GoalKick plays
    team_gk = GKonly_1920[GKonly_1920['possession_team']== f'{team}']
    
    ## Filter the DF to the start of the team's GK chains only:
    team_gk_starts = team_gk.groupby('GK_match_possession').first()
    
    #################### ADDING THE VALUES TO THE KPI DF ####################
    
    start_KPIs_1920['season'].append('2019/20')
    start_KPIs_1920['team'].append(team)
    
    ## TOTAL GoalKicks:
    start_KPIs_1920['total_GKs'].append(len(team_gk_starts))
    
    ## Total Short Goalkicks:
    start_KPIs_1920['short_GKs'].append(len(team_gk_starts[team_gk_starts['x_end']<25.1]))
    
    ## Total Long Goalkicks:
    start_KPIs_1920['long_GKs'].append(len(team_gk_starts[team_gk_starts['x_end'] >= 25.1]))
    
    ## GKs_distnce_std:
    start_KPIs_1920['GKs_distnce_std'].append(round(team_gk_starts['x_end'].std(),3))
    

        ## avg Goalkicks distance:
    start_KPIs_1920['avg_GK_dist'].append(round(team_gk_starts['x_end'].mean(),2))

        ## % ALL sccsfl Goalkicks:
    team_gk['pass_outcome'] = team_gk['pass_outcome'].fillna('Successful')
    GKs = team_gk.groupby('GK_match_possession').first()
    all_scsfl_GKs = GKs[GKs['pass_outcome'] == 'Successful']['pass_outcome'].count()
    start_KPIs_1920['all_scsfl_GKs'].append(all_scsfl_GKs)

        ## % Short sccsfl Goalkicks:
    shrt_scsfl_GKs = GKs[(GKs['x_end'] < 25.1) & (GKs['pass_outcome'] == 'Successful')]['pass_outcome'].count()
    start_KPIs_1920['shrt_scsfl_GKs'].append(shrt_scsfl_GKs)

        ## % Long sccsfl Goalkicks:
    lngshrt_scsfl_GKs = GKs[(GKs['x_end'] >= 25.1) & (GKs['pass_outcome'] == 'Successful')]['pass_outcome'].count()
    start_KPIs_1920['lng_scsfl_GKs'].append(lngshrt_scsfl_GKs)
    
    
    ## shortGK_avg_xkick_lft
    xkick_lft = round(team_gk_starts[(team_gk_starts['x_end']<25.1) & 
                                     (team_gk_starts['y_start'] <= 38)]['x_end'].mean(),2)
    start_KPIs_1920['shortGK_avg_xkick_lft'].append(xkick_lft)
    
    ## shortGK_avg_ykick_lft
    ykick_lft = round(team_gk_starts[(team_gk_starts['x_end']<25.1) & 
                                     (team_gk_starts['y_start'] <= 38)]['y_end'].mean(),2)
    start_KPIs_1920['shortGK_avg_ykick_lft'].append(ykick_lft)
    
    ## shortGK_avg_xend_cntr:    
    xkick_cntr = round(team_gk_starts[(team_gk_starts['x_end']<25.1) & 
                                         ((team_gk_starts['y_start'] >= 38.1) & 
                                          (team_gk_starts['y_start'] <= 42.9))]['x_end'].mean(),2)
    start_KPIs_1920['shortGK_avg_xkick_cntr'].append(xkick_cntr)
    
    ## shortGK_avg_yend_cntr:    
    ykick_cntr = round(team_gk_starts[(team_gk_starts['x_end']<25.1) & 
                                         ((team_gk_starts['y_start'] >= 38.1) &
                                          (team_gk_starts['y_start'] <= 42.9))]['y_end'].mean(),2)
    start_KPIs_1920['shortGK_avg_ykick_cntr'].append(ykick_cntr)
    
    
    ## shortGK_avg_xkick_rght
    xkick_rght = round(team_gk_starts[(team_gk_starts['x_end']<25.1) &
                         (team_gk_starts['y_start'] >= 43)]['x_end'].mean(),2)
    start_KPIs_1920['shortGK_avg_xkick_rght'].append(xkick_rght)
    
    ## shortGK_avg_ykick_rght
    ykick_rght = round(team_gk_starts[(team_gk_starts['x_end']<25.1) &
                         (team_gk_starts['y_start'] >= 43)]['y_end'].mean(),2)
    start_KPIs_1920['shortGK_avg_ykick_rght'].append(ykick_rght)
    
    ##longGK_avg_xkick_lft
    lng_xkick_lft = round(team_gk_starts[(team_gk_starts['x_end']>=25.1) & 
                                     (team_gk_starts['y_start'] <= 38)]['x_end'].mean(),2)
    start_KPIs_1920['longGK_avg_xkick_lft'].append(lng_xkick_lft)
    
    #longGK_avg_ykick_lft
    lng_ykick_lft = round(team_gk_starts[(team_gk_starts['x_end']>=25.1) & 
                                     (team_gk_starts['y_start'] <= 38)]['y_end'].mean(),2)
    start_KPIs_1920['longGK_avg_ykick_lft'].append(lng_ykick_lft)
    
    ##longGK_avg_xkick_cntr
    lng_xkick_cntr = round(team_gk_starts[(team_gk_starts['x_end']>=25.1) & 
                                         ((team_gk_starts['y_start'] >= 38.1) & 
                                          (team_gk_starts['y_start'] <= 42.9))]['x_end'].mean(),2)
    start_KPIs_1920['longGK_avg_xkick_cntr'].append(lng_xkick_cntr)
    
    ##longGK_avg_ykick_cntr
    lng_ykick_cntr = round(team_gk_starts[(team_gk_starts['x_end']>=25.1) & 
                                         ((team_gk_starts['y_start'] >= 38.1) &
                                          (team_gk_starts['y_start'] <= 42.9))]['y_end'].mean(),2)
    start_KPIs_1920['longGK_avg_ykick_cntr'].append(lng_ykick_cntr)
    
    ##longGK_avg_xkick_rght
    lng_xkick_rght = round(team_gk_starts[(team_gk_starts['x_end']>=25.1) &
                         (team_gk_starts['y_start'] >= 43)]['x_end'].mean(),2)
    start_KPIs_1920['longGK_avg_xkick_rght'].append(lng_xkick_rght)
    
    ##longGK_avg_ykick_rght
    lng_ykick_rght = round(team_gk_starts[(team_gk_starts['x_end']>=25.1) &
                         (team_gk_starts['y_start'] >= 43)]['y_end'].mean(),2)
    start_KPIs_1920['longGK_avg_ykick_rght'].append(lng_ykick_rght)
    
    ## short_avg_num_passes
    mask = team_gk.groupby('GK_match_possession').first()['x_end']<25.1
    team_gk['short_GK_start'] = mask[team_gk['GK_match_possession']].values
    team_shrt_gk_passes = len(team_gk[(team_gk['short_GK_start']==True) & 
                                      (team_gk['type']=='Pass')].loc[team_gk['team']==team])
    team_gk_shrt_chains = len(team_gk[team_gk['short_GK_start']==True]['GK_match_possession'].unique())
    
    start_KPIs_1920['short_avg_num_passes'].append(round(team_shrt_gk_passes/team_gk_shrt_chains,3))
    
    
    ########## Calculating all SCSSFL Progressive Passes ##########

    team_scss_passes = team_gk[team_gk['type']=='Pass'].loc[(team_gk['team']==team)&
                                                             (team_gk['pass_outcome']=='Successful')]

    team_scss_passes = team_scss_passes[['team','player',
                                         'x_start','y_start',
                                         'x_end','y_end']].reset_index(drop=True)

    team_scss_passes['prog_pass_start'] = np.sqrt(np.square(120 - team_scss_passes['x_start']) +
                                                  np.square(40 - team_scss_passes['y_start']))

    team_scss_passes['prog_pass_end'] = np.sqrt(np.square(120 - team_scss_passes['x_end']) + 
                                                np.square(40 - team_scss_passes['y_end']))

    team_scss_passes['prog_pass'] = [(team_scss_passes['prog_pass_end'][x]) / 
                                     (team_scss_passes['prog_pass_start'][x]) < 
                                     0.75 for x in range (len(team_scss_passes.prog_pass_start))]

    team_scss_passes['prog_pass_dist'] = (team_scss_passes['prog_pass_start']) - (team_scss_passes['prog_pass_end'])

    ## the percentage of all progressive passes ##
    scsfl_prgpass_pcnt = round((len(team_scss_passes[team_scss_passes['prog_pass']==True])/
                          len(team_scss_passes))*100,2)

    start_KPIs_1920['scsfl_prgpass_pcnt'].append(scsfl_prgpass_pcnt)


    ## the avg distance of all progressive passes ##
    scsfl_prgpass_avg_dist = round(team_scss_passes[
        team_scss_passes['prog_pass']==True]['prog_pass_dist'].mean(),2)

    start_KPIs_1920['scsfl_prgpass_avg_dist'].append(scsfl_prgpass_avg_dist)



    ########## Calculating all attempted Progressive Passes ##########

    team_passes = team_gk[team_gk['type']=='Pass'].loc[team_gk['team']==team]

    team_passes = team_passes[['team','player',
                                         'x_start','y_start',
                                         'x_end','y_end']].reset_index(drop=True)

    team_passes['prog_pass_start'] = np.sqrt(np.square(120 - team_passes['x_start']) +
                                                  np.square(40 - team_passes['y_start']))

    team_passes['prog_pass_end'] = np.sqrt(np.square(120 - team_passes['x_end']) + 
                                                np.square(40 - team_passes['y_end']))

    team_passes['prog_pass'] = [(team_passes['prog_pass_end'][x]) / 
                                     (team_passes['prog_pass_start'][x]) < 
                                     0.75 for x in range (len(team_passes.prog_pass_start))]

    team_passes['prog_pass_dist'] = (team_passes['prog_pass_start']) - (team_passes['prog_pass_end'])

    ## the percentage of SHORT progressive passes ##
    ttl_prgpass_pcnt = round((len(team_passes[team_passes['prog_pass']==True])/
                          len(team_passes))*100,2)

    start_KPIs_1920['ttl_prgpass_pcnt'].append(ttl_prgpass_pcnt)


    ## the avg distance of SHORT progressive passes ##
    ttl_prgpass_avg_dist = round(team_scss_passes[
        team_passes['prog_pass']==True]['prog_pass_dist'].mean(),2)

    start_KPIs_1920['ttl_prgpass_avg_dist'].append(ttl_prgpass_avg_dist)
    
    
    ########## Calculating SCSFL Short Progressive Passes ##########
    
    team_shrt_scss_passes = team_gk[(team_gk['short_GK_start']==True) & 
                               (team_gk['type']=='Pass')].loc[(team_gk['team']==team)&
                                                             (team_gk['pass_outcome']=='Successful')]
    
    team_shrt_scss_passes = team_shrt_scss_passes[['team','player',
                                         'x_start','y_start',
                                         'x_end','y_end']].reset_index(drop=True)
    
    team_shrt_scss_passes['prog_pass_start'] = np.sqrt(np.square(120 - team_shrt_scss_passes['x_start']) +
                                                  np.square(40 - team_shrt_scss_passes['y_start']))
    
    team_shrt_scss_passes['prog_pass_end'] = np.sqrt(np.square(120 - team_shrt_scss_passes['x_end']) + 
                                                np.square(40 - team_shrt_scss_passes['y_end']))
    
    team_shrt_scss_passes['prog_pass'] = [(team_shrt_scss_passes['prog_pass_end'][x]) / 
                                     (team_shrt_scss_passes['prog_pass_start'][x]) < 
                                     0.75 for x in range (len(team_shrt_scss_passes.prog_pass_start))]
    
    team_shrt_scss_passes['prog_pass_dist'] = (team_shrt_scss_passes['prog_pass_start']) - (team_shrt_scss_passes['prog_pass_end'])
    
    ## the percentage of SHORT progressive passes ##
    shrt_prgpass_pcnt = round(len(team_shrt_scss_passes[team_shrt_scss_passes['prog_pass']==True])
                              /len(team_shrt_scss_passes),4)
    start_KPIs_1920['shrt_scsfl_prgpass_pcnt'].append(shrt_prgpass_pcnt)
    
    
    ## the avg distance of SHORT progressive passes ##
    shrt_prgpass_avg_dist = round(team_shrt_scss_passes[team_shrt_scss_passes['prog_pass']==True]['prog_pass_dist'].mean(),2)
    start_KPIs_1920['shrt_scsfl_prgpass_avg_dist'].append(shrt_prgpass_avg_dist)
    
    
    ## CHAIN PASSES ##
    avg_chain_passes = round(len(team_gk[team_gk[
        'type']=='Pass']['type'])/len(team_gk['GK_match_possession'].unique()),4)
    
    start_KPIs_1920['ttl_avg_num_passes'].append(avg_chain_passes)    
     
    ## CHAIN SPEED ##
    chain_time = (team_gk.groupby(
    'GK_match_possession').last()['timestamp'] - (team_gk.groupby(
    'GK_match_possession').first()['timestamp'])).dt.total_seconds()

    chain_distance = team_gk.groupby(
    'GK_match_possession').last()['x_start'] - (team_gk.groupby(
    'GK_match_possession').first()['x_start'])

    chain_speed = round((chain_distance/chain_time).mean(),4)
    start_KPIs_1920['ttl_avg_chain_speed'].append(chain_speed)
    


    ########## Calculating ALL Short Progressive Passes ##########
    
    team_shrt_all_passes = team_gk[(team_gk['short_GK_start']==True) & 
                               (team_gk['type']=='Pass')].loc[(team_gk['team']==team)]
    
    team_shrt_all_passes = team_shrt_all_passes[['team','player',
                                         'x_start','y_start',
                                         'x_end','y_end']].reset_index(drop=True)
    
    team_shrt_all_passes['prog_pass_start'] = np.sqrt(np.square(120 - team_shrt_all_passes['x_start']) +
                                                  np.square(40 - team_shrt_all_passes['y_start']))
    
    team_shrt_all_passes['prog_pass_end'] = np.sqrt(np.square(120 - team_shrt_all_passes['x_end']) + 
                                                np.square(40 - team_shrt_all_passes['y_end']))
    
    team_shrt_all_passes['prog_pass'] = [(team_shrt_all_passes['prog_pass_end'][x]) / 
                                     (team_shrt_all_passes['prog_pass_start'][x]) < 
                                     0.75 for x in range (len(team_shrt_all_passes.prog_pass_start))]
    
    team_shrt_all_passes['prog_pass_dist'] = (team_shrt_all_passes['prog_pass_start']) - (team_shrt_all_passes['prog_pass_end'])
    
    ## the percentage of SHORT progressive passes ##
    all_shrt_prgpass_pcnt = round(len(team_shrt_all_passes[team_shrt_all_passes['prog_pass']==True])
                              /len(team_shrt_all_passes),4)
    start_KPIs_1920['shrt_all_prgpass_pcnt'].append(all_shrt_prgpass_pcnt)
    
    
    ## the avg distance of SHORT progressive passes ##
    all_shrt_prgpass_avg_dist = round(team_shrt_all_passes[team_shrt_all_passes['prog_pass']==True]['prog_pass_dist'].mean(),2)
    start_KPIs_1920['shrt_all_prgpass_avg_dist'].append(all_shrt_prgpass_avg_dist)
    
    
    ## long_avg_num_passes
    team_lng_gk_passes = len(team_gk[(team_gk['short_GK_start']==False) &
                                          (team_gk['type']=='Pass')].loc[team_gk['team']==team])
    team_gk_lng_chains = len(team_gk[team_gk['short_GK_start']==False]['GK_match_possession'].unique())
    
    start_KPIs_1920['long_avg_num_passes'].append(round(team_lng_gk_passes/team_gk_lng_chains,3))
    
    
    ########## Calculating SCSFL Long Progressive Passes ##########
    
    team_long_scss_passes = team_gk[(team_gk['short_GK_start']==False) & 
                               (team_gk['type']=='Pass')].loc[(team_gk['team']==team)&
                                                             (team_gk['pass_outcome']=='Successful')]
    
    team_long_scss_passes = team_long_scss_passes[['team','player',
                                         'x_start','y_start',
                                         'x_end','y_end']].reset_index(drop=True)
    
    team_long_scss_passes['prog_pass_start'] = np.sqrt(np.square(120 - team_long_scss_passes['x_start']) +
                                                  np.square(40 - team_long_scss_passes['y_start']))
    
    team_long_scss_passes['prog_pass_end'] = np.sqrt(np.square(120 - team_long_scss_passes['x_end']) + 
                                                np.square(40 - team_long_scss_passes['y_end']))
    
    team_long_scss_passes['prog_pass'] = [(team_long_scss_passes['prog_pass_end'][x]) / 
                                     (team_long_scss_passes['prog_pass_start'][x]) < 
                                     0.75 for x in range (len(team_long_scss_passes.prog_pass_start))]
    
    team_long_scss_passes['prog_pass_dist'] = (team_long_scss_passes['prog_pass_start']) - (team_long_scss_passes['prog_pass_end'])
    
    ## long_prgpass_pcnt ###
    
    long_prgpass_pcnt = round((len(team_long_scss_passes[team_long_scss_passes['prog_pass']==True])
                              /len(team_long_scss_passes))*100,2)
    start_KPIs_1920['long_scsfl_prgpass_pcnt'].append(long_prgpass_pcnt)
    
    ## long_prgpass_avg_dist ###
    long_prgpass_avg_dist = round(team_long_scss_passes[team_long_scss_passes['prog_pass']==True]['prog_pass_dist'].mean(),2)
    start_KPIs_1920['long_scsfl_prgpass_avg_dist'].append(long_prgpass_avg_dist)
     
        
    ########## Calculating ALL Long Progressive Passes ##########
    
    team_long_all_passes = team_gk[(team_gk['short_GK_start']==False) & 
                               (team_gk['type']=='Pass')].loc[(team_gk['team']==team)]
    
    team_long_all_passes = team_long_all_passes[['team','player',
                                         'x_start','y_start',
                                         'x_end','y_end']].reset_index(drop=True)
    
    team_long_all_passes['prog_pass_start'] = np.sqrt(np.square(120 - team_long_all_passes['x_start']) +
                                                  np.square(40 - team_long_all_passes['y_start']))
    
    team_long_all_passes['prog_pass_end'] = np.sqrt(np.square(120 - team_long_all_passes['x_end']) + 
                                                np.square(40 - team_long_all_passes['y_end']))
    
    team_long_all_passes['prog_pass'] = [(team_long_all_passes['prog_pass_end'][x]) / 
                                     (team_long_all_passes['prog_pass_start'][x]) < 
                                     0.75 for x in range (len(team_long_all_passes.prog_pass_start))]
    
    team_long_all_passes['prog_pass_dist'] = (team_long_all_passes['prog_pass_start']) - (team_long_all_passes['prog_pass_end'])
    
    ## long_prgpass_pcnt ###
    
    all_long_prgpass_pcnt = round((len(team_long_all_passes[team_long_all_passes['prog_pass']==True])
                              /len(team_long_all_passes))*100,2)
    start_KPIs_1920['long_all_prgpass_pcnt'].append(all_long_prgpass_pcnt)
    
    ## long_prgpass_avg_dist ###
    all_long_prgpass_avg_dist = round(team_long_all_passes[team_long_all_passes['prog_pass']==True]['prog_pass_dist'].mean(),2)
    start_KPIs_1920['long_all_prgpass_avg_dist'].append(all_long_prgpass_avg_dist)
    
    
    ## short_end_shot
    short_shot = len(team_gk[(team_gk['short_GK_start']==True) & (team_gk['type']=='Shot')])
    start_KPIs_1920['short_end_shot'].append(short_shot)
    
    ## short_end_goal
    short_goal = len(team_gk[(team_gk['short_GK_start']==True) & (team_gk['shot_outcome']=='Goal')])
    start_KPIs_1920['short_end_goal'].append(short_goal)
    
    ## short_avg_xg
    short_xg = round(team_gk[(team_gk['short_GK_start']==True) & 
                             (team_gk['type']=='Shot')]['shot_statsbomb_xg'].mean(),4)
    start_KPIs_1920['short_avg_xg'].append(short_xg)
    
    ## long_end_shot
    long_shot = len(team_gk[(team_gk['short_GK_start']==False) & (team_gk['type']=='Shot')])
    start_KPIs_1920['long_end_shot'].append(long_shot)
    
    ## long_end_goal
    long_goal = len(team_gk[(team_gk['short_GK_start']==False) & 
                                 (team_gk['shot_outcome']=='Goal')])
    start_KPIs_1920['long_end_goal'].append(long_goal)
    
    ## long_avg_xg
    long_xg = round(team_gk[(team_gk['short_GK_start']==False) & 
                            (team_gk['type']=='Shot')]['shot_statsbomb_xg'].mean(),4)
    start_KPIs_1920['long_avg_xg'].append(long_xg)
    
    ## short_avg_x_prog
    
    short_avg_x_prog = round(team_gk[team_gk["short_GK_start"] == True].loc[
        (team_gk["type"] == "Pass") | 
        (team_gk["type"] == "Carry") | 
        (team_gk["type"] == 'Shot')].groupby('GK_match_possession').last()['x_end'].mean(),2)
    
    start_KPIs_1920['short_avg_x_prog'].append(short_avg_x_prog)
    
    
    ## long_avg_x_prog
    long_avg_x_prog = round(team_gk[team_gk["short_GK_start"] == False].loc[
        (team_gk["type"] == "Pass") | 
            (team_gk["type"] == "Carry") | 
        (team_gk["type"] == 'Shot')].groupby('GK_match_possession').last()['x_end'].mean(),2)
    
    start_KPIs_1920['long_avg_x_prog'].append(long_avg_x_prog)
    
    
    ###### APPLY A NEW COLUMN FOR MATCH_POSSESSION FOR EVERYTHING TO COMPARE THE FOLLOWING GK CHAINS ######
    def new_match_possession(row):
        return f"{row['match']},{row['possession']}"
    
    ###### Apply the custom function to create the 'match_possession' column ######
    ssn1920['new_match_possession'] = ssn1920.apply(new_match_possession, axis=1)
        
    
    ###### Creating a Function to define following SHORT chains ######
    shrt_team_chain_vals = team_gk[(team_gk['short_GK_start']==True)&
                                   (team_gk['GK_match_possession'].notnull())]['GK_match_possession'].unique()
    
    def increment_second_number(chain):
        parts = chain.split(',')
        incremented_second_number = str(int(parts[1]) + 1)
        return f"{parts[0]},{incremented_second_number}"
    
    fllw_shrt_gk_chains = np.array([increment_second_number(chain) for chain in shrt_team_chain_vals])
    fllw_shrt_gk_chains = fllw_shrt_gk_chains.tolist()
    shrt_fllw_mask = ssn1920['new_match_possession'].isin(fllw_shrt_gk_chains)
    
    
    ## fllw_shrt_x_rtrn
    fllw_shrt_x = round(120 - (ssn1920.loc[shrt_fllw_mask].groupby('new_match_possession').last()['x_end'].mean()),3)
    start_KPIs_1920['fllw_shrt_x_rtrn'].append(fllw_shrt_x)
    
    ## fllw_shrt_shots_rtrn
    start_KPIs_1920['fllw_shrt_shots_rtrn'].append(len(ssn1920.loc[shrt_fllw_mask].loc[ssn1920["type"] == 'Shot']))
    
    ## fllw_shrt_goals_rtrn
    fllw_shrt_goals = len(ssn1920.loc[shrt_fllw_mask].loc[ssn1920["shot_outcome"] == 'Goal'])
    start_KPIs_1920['fllw_shrt_goals_rtrn'].append(fllw_shrt_goals)
    
    ## fllw_shrt_avg_xg_rtrn
    shrt_xg_rtrn = round(ssn1920.loc[shrt_fllw_mask]['shot_statsbomb_xg'].mean(),4)
    start_KPIs_1920['fllw_shrt_avg_xg_rtrn'].append(shrt_xg_rtrn)
    
    
    ###### Defining the following LONG chains ######
    
    lng_team_GK_chains = team_gk[(team_gk['short_GK_start']==False)&
                                      (team_gk['GK_match_possession'].notnull())]['GK_match_possession'].unique()
    fllw_lng_gk_chains = np.array([increment_second_number(chain) for chain in lng_team_GK_chains])
    fllw_lng_gk_chains = fllw_lng_gk_chains.tolist()
    lng_fllw_mask = ssn1920['new_match_possession'].isin(fllw_lng_gk_chains)
    
    
    ## fllw_long_x_rtrn
    long_x_rtrn = round(120 - (ssn1920.loc[lng_fllw_mask].groupby('new_match_possession').last()['x_end'].mean()),3)
    start_KPIs_1920['fllw_long_x_rtrn'].append(long_x_rtrn)
    
    ##fllw_long_shots_rtrn
    start_KPIs_1920['fllw_long_shots_rtrn'].append(len(ssn1920.loc[lng_fllw_mask].loc[ssn1920["type"] == 'Shot']))
    
    
    ## fllw_long_goals_rtrn
    long_goals_rtrn = len(ssn1920.loc[lng_fllw_mask].loc[ssn1920["shot_outcome"] == 'Goal'])
    start_KPIs_1920['fllw_long_goals_rtrn'].append(long_goals_rtrn)
    
    ## fllw_long_avg_xg_rtrn
    long_xg_rtrn = round(ssn1920.loc[lng_fllw_mask]['shot_statsbomb_xg'].mean(),4)
    start_KPIs_1920['fllw_long_avg_xg_rtrn'].append(long_xg_rtrn)

In [397]:
for team in ssn1920_teams:
    kpi_1920_values(team)

In [398]:
lengths = {key: len(value) for key, value in start_KPIs_1920.items()}
lengths

{'team': 12,
 'season': 12,
 'total_GKs': 12,
 'short_GKs': 12,
 'long_GKs': 12,
 'GKs_distnce_std': 12,
 'avg_GK_dist': 12,
 'all_scsfl_GKs': 12,
 'shrt_scsfl_GKs': 12,
 'lng_scsfl_GKs': 12,
 'shortGK_avg_xkick_lft': 12,
 'shortGK_avg_ykick_lft': 12,
 'shortGK_avg_xkick_cntr': 12,
 'shortGK_avg_ykick_cntr': 12,
 'shortGK_avg_xkick_rght': 12,
 'shortGK_avg_ykick_rght': 12,
 'longGK_avg_xkick_lft': 12,
 'longGK_avg_ykick_lft': 12,
 'longGK_avg_xkick_cntr': 12,
 'longGK_avg_ykick_cntr': 12,
 'longGK_avg_xkick_rght': 12,
 'longGK_avg_ykick_rght': 12,
 'ttl_avg_chain_speed': 12,
 'ttl_avg_num_passes': 12,
 'scsfl_prgpass_pcnt': 12,
 'scsfl_prgpass_avg_dist': 12,
 'ttl_prgpass_pcnt': 12,
 'ttl_prgpass_avg_dist': 12,
 'short_avg_num_passes': 12,
 'shrt_all_prgpass_pcnt': 12,
 'shrt_all_prgpass_avg_dist': 12,
 'shrt_scsfl_prgpass_pcnt': 12,
 'shrt_scsfl_prgpass_avg_dist': 12,
 'long_avg_num_passes': 12,
 'long_all_prgpass_pcnt': 12,
 'long_all_prgpass_avg_dist': 12,
 'long_scsfl_prgpass_pcnt'

In [399]:
KPIs_1920_df = pd.DataFrame(start_KPIs_1920)
#KPIs_1920_df

# 2020/21 Season

In [400]:
start_KPIs_2021 = {'team': [],
            'season' : [],
            'total_GKs' : [],#
            'short_GKs' : [],#
            'long_GKs' : [],#
            'GKs_distnce_std' : [],#            
            'avg_GK_dist' : [],
            'all_scsfl_GKs' : [],
            'shrt_scsfl_GKs' : [],
            'lng_scsfl_GKs' : [],            
            'shortGK_avg_xkick_lft' : [],#
            'shortGK_avg_ykick_lft' : [],# 
            'shortGK_avg_xkick_cntr' : [],#
            'shortGK_avg_ykick_cntr' : [],#
            'shortGK_avg_xkick_rght' : [],#
            'shortGK_avg_ykick_rght' : [],#                   
            'longGK_avg_xkick_lft' : [],#
            'longGK_avg_ykick_lft' : [],# 
            'longGK_avg_xkick_cntr' : [],#
            'longGK_avg_ykick_cntr' : [],#
            'longGK_avg_xkick_rght' : [],#
            'longGK_avg_ykick_rght' : [],#
                   
            'ttl_avg_chain_speed' : [],
            'ttl_avg_num_passes' : [],
                   
            'scsfl_prgpass_pcnt' : [],
            'scsfl_prgpass_avg_dist' : [],
            'ttl_prgpass_pcnt' : [],
            'ttl_prgpass_avg_dist' : [],
                   
            'short_avg_num_passes' : [],#                   
            'shrt_all_prgpass_pcnt' : [],#
            'shrt_all_prgpass_avg_dist':[],#
            'shrt_scsfl_prgpass_pcnt' : [],#
            'shrt_scsfl_prgpass_avg_dist':[],#                   
            'long_avg_num_passes' : [],#
            'long_all_prgpass_pcnt' : [],#
            'long_all_prgpass_avg_dist' : [],#
            'long_scsfl_prgpass_pcnt' : [],#
            'long_scsfl_prgpass_avg_dist' : [],#                   
            'short_end_shot' : [],#
            'short_end_goal' : [],#
            'short_avg_xg' : [],#
            'long_end_shot' : [],#
            'long_end_goal' : [],#
            'long_avg_xg' : [],#
            'short_avg_x_prog' : [],#
            'fllw_shrt_x_rtrn' : [],#
            'fllw_shrt_shots_rtrn' : [],#
            'fllw_shrt_goals_rtrn' : [],#
            'fllw_shrt_avg_xg_rtrn' : [],#
            'long_avg_x_prog' : [],#
            'fllw_long_x_rtrn' : [],#
            'fllw_long_shots_rtrn' : [],#
            'fllw_long_goals_rtrn' : [],#
            'fllw_long_avg_xg_rtrn' : []#
            }

KPIs_2021_df = pd.DataFrame(start_KPIs_2021)



## Retrieving the 2020/21 data
ssn2021 = pd.read_csv('../data/matches_ssn2021.csv')

## 2020/21 Team List
ssn2021_teams = ssn2021['team'].unique().tolist()

## Preparing the data
ssn2021 = ssn2021.loc[(ssn2021["type"] == "Pass") | 
                      (ssn2021["type"] == "Carry") | 
                      (ssn2021["type"] == 'Shot') | 
                      (ssn2021["type"] == 'Goal Keeper') |
                      (ssn2021["type"] == 'Starting XI') |
                      (ssn2021["type"] == 'Half Start') |
                      (ssn2021["type"] == 'Injury Stoppage') |
                      (ssn2021['type'] == 'Half End')]

# CONVERTING TIMESTAMP TO MINUTE, SECOND, MILLISECOND COLUMNS
ssn2021['timestamp_1'] = pd.to_datetime(ssn2021['timestamp'])

ssn2021['minute'] = ssn2021['timestamp_1'].apply(lambda row: row.minute)
ssn2021['second'] = ssn2021['timestamp_1'].apply(lambda row: row.second)
ssn2021['millisecond'] = ssn2021['timestamp_1'].apply(lambda row: row.microsecond // 1000) 

ssn2021['timestamp'] = pd.to_timedelta(ssn2021['timestamp'])

ssn2021 = ssn2021[['match','home_team','away_team','timestamp','minute','second','millisecond',
                   'duration','possession','possession_team','defending_team',
                   'play_pattern','type','team','player','position',
                   'pass_length','pass_angle','pass_height','pass_outcome','pass_body_part','pass_shot_assist',
                   'shot_statsbomb_xg','shot_outcome',
                   'x_start', 'y_start','x_end','y_end','z_end_shot']]

ssn2021 = ssn2021.sort_values(by=['match',
                                  'minute','second','millisecond',
                                  'possession']).reset_index(drop=True)


## Create a unique value to filter DF to ONLY 'From Goal Kick' play patterns
def create_GKmatch_possession(row):
    if row['play_pattern'] == 'From Goal Kick':
        return f"{row['match']},{row['possession']}"
    else:
        return None

## Apply the custom function to create the 'match_possession' column
ssn2021['GK_match_possession'] = ssn2021.apply(create_GKmatch_possession, axis=1)

# FILTER DF TO ONLY GOALKICK PLAYS
GKonly_2021 = ssn2021[ssn2021['GK_match_possession'].notnull()]

In [401]:
def kpi_2021_values(team):
    ## Filter the DF to a single team's GoalKick plays
    team_gk = GKonly_2021[GKonly_2021['possession_team']== f'{team}']
    
    ## Filter the DF to the start of the team's GK chains only:
    team_gk_starts = team_gk.groupby('GK_match_possession').first()
    
    #################### ADDING THE VALUES TO THE KPI DF ####################
    
    start_KPIs_2021['season'].append('2020/21')
    start_KPIs_2021['team'].append(team)
    
    ## TOTAL GoalKicks:
    start_KPIs_2021['total_GKs'].append(len(team_gk_starts))
    
    ## Total Short Goalkicks:
    start_KPIs_2021['short_GKs'].append(len(team_gk_starts[team_gk_starts['x_end']<25.1]))
    
    ## Total Long Goalkicks:
    start_KPIs_2021['long_GKs'].append(len(team_gk_starts[team_gk_starts['x_end'] >= 25.1]))
    
    ## GKs_distnce_std:
    start_KPIs_2021['GKs_distnce_std'].append(round(team_gk_starts['x_end'].std(),3))
    
    
        ## avg Goalkicks distance:
    start_KPIs_2021['avg_GK_dist'].append(round(team_gk_starts['x_end'].mean(),2))

        ## % ALL sccsfl Goalkicks:
    team_gk['pass_outcome'] = team_gk['pass_outcome'].fillna('Successful')
    GKs = team_gk.groupby('GK_match_possession').first()
    all_scsfl_GKs = GKs[GKs['pass_outcome'] == 'Successful']['pass_outcome'].count()
    start_KPIs_2021['all_scsfl_GKs'].append(all_scsfl_GKs)

        ## % Short sccsfl Goalkicks:
    shrt_scsfl_GKs = GKs[(GKs['x_end'] < 25.1) & (GKs['pass_outcome'] == 'Successful')]['pass_outcome'].count()
    start_KPIs_2021['shrt_scsfl_GKs'].append(shrt_scsfl_GKs)

        ## % Long sccsfl Goalkicks:
    lngshrt_scsfl_GKs = GKs[(GKs['x_end'] >= 25.1) & (GKs['pass_outcome'] == 'Successful')]['pass_outcome'].count()
    start_KPIs_2021['lng_scsfl_GKs'].append(lngshrt_scsfl_GKs)
    
    
    ## shortGK_avg_xkick_lft
    xkick_lft = round(team_gk_starts[(team_gk_starts['x_end']<25.1) & 
                                     (team_gk_starts['y_start'] <= 38)]['x_end'].mean(),2)
    start_KPIs_2021['shortGK_avg_xkick_lft'].append(xkick_lft)
    
    ## shortGK_avg_ykick_lft
    ykick_lft = round(team_gk_starts[(team_gk_starts['x_end']<25.1) & 
                                     (team_gk_starts['y_start'] <= 38)]['y_end'].mean(),2)
    start_KPIs_2021['shortGK_avg_ykick_lft'].append(ykick_lft)
    
    ## shortGK_avg_xend_cntr:    
    xkick_cntr = round(team_gk_starts[(team_gk_starts['x_end']<25.1) & 
                                         ((team_gk_starts['y_start'] >= 38.1) & 
                                          (team_gk_starts['y_start'] <= 42.9))]['x_end'].mean(),2)
    start_KPIs_2021['shortGK_avg_xkick_cntr'].append(xkick_cntr)
    
    ## shortGK_avg_yend_cntr:    
    ykick_cntr = round(team_gk_starts[(team_gk_starts['x_end']<25.1) & 
                                         ((team_gk_starts['y_start'] >= 38.1) &
                                          (team_gk_starts['y_start'] <= 42.9))]['y_end'].mean(),2)
    start_KPIs_2021['shortGK_avg_ykick_cntr'].append(ykick_cntr)
    
    ## shortGK_avg_xkick_rght    
    xkick_rght = round(team_gk_starts[(team_gk_starts['x_end']<25.1) &
                         (team_gk_starts['y_start'] >= 43)]['x_end'].mean(),2)
    start_KPIs_2021['shortGK_avg_xkick_rght'].append(xkick_rght)
    
    ## shortGK_avg_ykick_rght    
    ykick_rght = round(team_gk_starts[(team_gk_starts['x_end']<25.1) &
                         (team_gk_starts['y_start'] >= 43)]['y_end'].mean(),2)
    start_KPIs_2021['shortGK_avg_ykick_rght'].append(ykick_rght)
    
    ##longGK_avg_xkick_lft
    lng_xkick_lft = round(team_gk_starts[(team_gk_starts['x_end']>=25.1) & 
                                     (team_gk_starts['y_start'] <= 38)]['x_end'].mean(),2)
    start_KPIs_2021['longGK_avg_xkick_lft'].append(lng_xkick_lft)
    
    ##longGK_avg_ykick_lft
    lng_ykick_lft = round(team_gk_starts[(team_gk_starts['x_end']>=25.1) & 
                                     (team_gk_starts['y_start'] <= 38)]['y_end'].mean(),2)
    start_KPIs_2021['longGK_avg_ykick_lft'].append(lng_ykick_lft)
    
    ##longGK_avg_xkick_cntr
    lng_xkick_cntr = round(team_gk_starts[(team_gk_starts['x_end']>=25.1) & 
                                         ((team_gk_starts['y_start'] >= 38.1) & 
                                          (team_gk_starts['y_start'] <= 42.9))]['x_end'].mean(),2)
    start_KPIs_2021['longGK_avg_xkick_cntr'].append(lng_xkick_cntr)
    
    ##longGK_avg_ykick_cntr
    lng_ykick_cntr = round(team_gk_starts[(team_gk_starts['x_end']>=25.1) & 
                                         ((team_gk_starts['y_start'] >= 38.1) &
                                          (team_gk_starts['y_start'] <= 42.9))]['y_end'].mean(),2)
    start_KPIs_2021['longGK_avg_ykick_cntr'].append(lng_ykick_cntr)
    
    ##longGK_avg_xkick_rght
    lng_xkick_rght = round(team_gk_starts[(team_gk_starts['x_end']>=25.1) &
                         (team_gk_starts['y_start'] >= 43)]['x_end'].mean(),2)
    start_KPIs_2021['longGK_avg_xkick_rght'].append(lng_xkick_rght)
    
    ##longGK_avg_ykick_rght
    lng_ykick_rght = round(team_gk_starts[(team_gk_starts['x_end']>=25.1) &
                         (team_gk_starts['y_start'] >= 43)]['y_end'].mean(),2)
    start_KPIs_2021['longGK_avg_ykick_rght'].append(lng_ykick_rght)    
    
    ## short_avg_num_passes
    mask = team_gk.groupby('GK_match_possession').first()['x_end']<25.1
    team_gk['short_GK_start'] = mask[team_gk['GK_match_possession']].values
    team_shrt_gk_passes = len(team_gk[(team_gk['short_GK_start']==True) & 
                                      (team_gk['type']=='Pass')].loc[team_gk['team']==team])
    team_gk_shrt_chains = len(team_gk[team_gk['short_GK_start']==True]['GK_match_possession'].unique())
    
    start_KPIs_2021['short_avg_num_passes'].append(round(team_shrt_gk_passes/team_gk_shrt_chains,3))
    
        
    ## CHAIN PASSES ##
    avg_chain_passes = round(len(team_gk[team_gk[
        'type']=='Pass']['type'])/len(team_gk['GK_match_possession'].unique()),4)
    
    start_KPIs_2021['ttl_avg_num_passes'].append(avg_chain_passes)    
    
    ## CHAIN SPEED ##
    chain_time = (team_gk.groupby(
    'GK_match_possession').last()['timestamp'] - (team_gk.groupby(
    'GK_match_possession').first()['timestamp'])).dt.total_seconds()

    chain_distance = team_gk.groupby(
    'GK_match_possession').last()['x_start'] - (team_gk.groupby(
    'GK_match_possession').first()['x_start'])

    chain_speed = round((chain_distance/chain_time).mean(),4)
    start_KPIs_2021['ttl_avg_chain_speed'].append(chain_speed)
    

    ########## Calculating all SCSSFL Progressive Passes ##########

    team_scss_passes = team_gk[team_gk['type']=='Pass'].loc[(team_gk['team']==team)&
                                                             (team_gk['pass_outcome']=='Successful')]

    team_scss_passes = team_scss_passes[['team','player',
                                         'x_start','y_start',
                                         'x_end','y_end']].reset_index(drop=True)

    team_scss_passes['prog_pass_start'] = np.sqrt(np.square(120 - team_scss_passes['x_start']) +
                                                  np.square(40 - team_scss_passes['y_start']))

    team_scss_passes['prog_pass_end'] = np.sqrt(np.square(120 - team_scss_passes['x_end']) + 
                                                np.square(40 - team_scss_passes['y_end']))

    team_scss_passes['prog_pass'] = [(team_scss_passes['prog_pass_end'][x]) / 
                                     (team_scss_passes['prog_pass_start'][x]) < 
                                     0.75 for x in range (len(team_scss_passes.prog_pass_start))]

    team_scss_passes['prog_pass_dist'] = (team_scss_passes['prog_pass_start']) - (team_scss_passes['prog_pass_end'])

    ## the percentage of SHORT progressive passes ##
    scsfl_prgpass_pcnt = round((len(team_scss_passes[team_scss_passes['prog_pass']==True])/
                          len(team_scss_passes))*100,2)

    start_KPIs_2021['scsfl_prgpass_pcnt'].append(scsfl_prgpass_pcnt)


    ## the avg distance of SHORT progressive passes ##
    scsfl_prgpass_avg_dist = round(team_scss_passes[
        team_scss_passes['prog_pass']==True]['prog_pass_dist'].mean(),2)

    start_KPIs_2021['scsfl_prgpass_avg_dist'].append(scsfl_prgpass_avg_dist)



    ########## Calculating all attempted Progressive Passes ##########

    team_passes = team_gk[team_gk['type']=='Pass'].loc[team_gk['team']==team]

    team_passes = team_passes[['team','player',
                                         'x_start','y_start',
                                         'x_end','y_end']].reset_index(drop=True)

    team_passes['prog_pass_start'] = np.sqrt(np.square(120 - team_passes['x_start']) +
                                                  np.square(40 - team_passes['y_start']))

    team_passes['prog_pass_end'] = np.sqrt(np.square(120 - team_passes['x_end']) + 
                                                np.square(40 - team_passes['y_end']))

    team_passes['prog_pass'] = [(team_passes['prog_pass_end'][x]) / 
                                     (team_passes['prog_pass_start'][x]) < 
                                     0.75 for x in range (len(team_passes.prog_pass_start))]

    team_passes['prog_pass_dist'] = (team_passes['prog_pass_start']) - (team_passes['prog_pass_end'])

    ## the percentage of SHORT progressive passes ##
    ttl_prgpass_pcnt = round((len(team_passes[team_passes['prog_pass']==True])/
                          len(team_passes))*100,2)

    start_KPIs_2021['ttl_prgpass_pcnt'].append(ttl_prgpass_pcnt)


    ## the avg distance of SHORT progressive passes ##
    ttl_prgpass_avg_dist = round(team_scss_passes[
        team_passes['prog_pass']==True]['prog_pass_dist'].mean(),2)

    start_KPIs_2021['ttl_prgpass_avg_dist'].append(ttl_prgpass_avg_dist)
    
    
    ########## Calculating SCSFL Short Progressive Passes ##########
    
    team_shrt_scss_passes = team_gk[(team_gk['short_GK_start']==True) & 
                               (team_gk['type']=='Pass')].loc[(team_gk['team']==team)&
                                                             (team_gk['pass_outcome']=='Successful')]
    
    team_shrt_scss_passes = team_shrt_scss_passes[['team','player',
                                         'x_start','y_start',
                                         'x_end','y_end']].reset_index(drop=True)
    
    team_shrt_scss_passes['prog_pass_start'] = np.sqrt(np.square(120 - team_shrt_scss_passes['x_start']) +
                                                  np.square(40 - team_shrt_scss_passes['y_start']))
    
    team_shrt_scss_passes['prog_pass_end'] = np.sqrt(np.square(120 - team_shrt_scss_passes['x_end']) + 
                                                np.square(40 - team_shrt_scss_passes['y_end']))
    
    team_shrt_scss_passes['prog_pass'] = [(team_shrt_scss_passes['prog_pass_end'][x]) / 
                                     (team_shrt_scss_passes['prog_pass_start'][x]) < 
                                     0.75 for x in range (len(team_shrt_scss_passes.prog_pass_start))]
    
    team_shrt_scss_passes['prog_pass_dist'] = (team_shrt_scss_passes['prog_pass_start']) - (team_shrt_scss_passes['prog_pass_end'])
    
    ## the percentage of SHORT progressive passes ##
    shrt_prgpass_pcnt = round(len(team_shrt_scss_passes[team_shrt_scss_passes['prog_pass']==True])
                              /len(team_shrt_scss_passes),4)
    start_KPIs_2021['shrt_scsfl_prgpass_pcnt'].append(shrt_prgpass_pcnt)
        
    ## the avg distance of SHORT progressive passes ##
    shrt_prgpass_avg_dist = round(team_shrt_scss_passes[team_shrt_scss_passes['prog_pass']==True]['prog_pass_dist'].mean(),2)
    start_KPIs_2021['shrt_scsfl_prgpass_avg_dist'].append(shrt_prgpass_avg_dist)

    
    ########## Calculating ALL Short Progressive Passes ##########
    
    team_shrt_all_passes = team_gk[(team_gk['short_GK_start']==True) & 
                               (team_gk['type']=='Pass')].loc[(team_gk['team']==team)]
    
    team_shrt_all_passes = team_shrt_all_passes[['team','player',
                                         'x_start','y_start',
                                         'x_end','y_end']].reset_index(drop=True)
    
    team_shrt_all_passes['prog_pass_start'] = np.sqrt(np.square(120 - team_shrt_all_passes['x_start']) +
                                                  np.square(40 - team_shrt_all_passes['y_start']))
    
    team_shrt_all_passes['prog_pass_end'] = np.sqrt(np.square(120 - team_shrt_all_passes['x_end']) + 
                                                np.square(40 - team_shrt_all_passes['y_end']))
    
    team_shrt_all_passes['prog_pass'] = [(team_shrt_all_passes['prog_pass_end'][x]) / 
                                     (team_shrt_all_passes['prog_pass_start'][x]) < 
                                     0.75 for x in range (len(team_shrt_all_passes.prog_pass_start))]
    
    team_shrt_all_passes['prog_pass_dist'] = (team_shrt_all_passes['prog_pass_start']) - (team_shrt_all_passes['prog_pass_end'])
    
    ## the percentage of SHORT progressive passes ##
    all_shrt_prgpass_pcnt = round(len(team_shrt_all_passes[team_shrt_all_passes['prog_pass']==True])
                              /len(team_shrt_all_passes),4)
    start_KPIs_2021['shrt_all_prgpass_pcnt'].append(all_shrt_prgpass_pcnt)
        
    ## the avg distance of SHORT progressive passes ##
    all_shrt_prgpass_avg_dist = round(team_shrt_all_passes[team_shrt_all_passes['prog_pass']==True]['prog_pass_dist'].mean(),2)
    start_KPIs_2021['shrt_all_prgpass_avg_dist'].append(all_shrt_prgpass_avg_dist)
    
    
    
    ## long_avg_num_passes
    team_lng_gk_passes = len(team_gk[(team_gk['short_GK_start']==False) &
                                          (team_gk['type']=='Pass')].loc[team_gk['team']==team])
    team_gk_lng_chains = len(team_gk[team_gk['short_GK_start']==False]['GK_match_possession'].unique())
    
    start_KPIs_2021['long_avg_num_passes'].append(round(team_lng_gk_passes/team_gk_lng_chains,3))
    
    ########## Calculating SCSFL Long Progressive Passes ##########
    
    team_long_scss_passes = team_gk[(team_gk['short_GK_start']==False) & 
                               (team_gk['type']=='Pass')].loc[(team_gk['team']==team)&
                                                             (team_gk['pass_outcome']=='Successful')]
    
    team_long_scss_passes = team_long_scss_passes[['team','player',
                                         'x_start','y_start',
                                         'x_end','y_end']].reset_index(drop=True)
    
    team_long_scss_passes['prog_pass_start'] = np.sqrt(np.square(120 - team_long_scss_passes['x_start']) +
                                                  np.square(40 - team_long_scss_passes['y_start']))
    
    team_long_scss_passes['prog_pass_end'] = np.sqrt(np.square(120 - team_long_scss_passes['x_end']) + 
                                                np.square(40 - team_long_scss_passes['y_end']))
    
    team_long_scss_passes['prog_pass'] = [(team_long_scss_passes['prog_pass_end'][x]) / 
                                     (team_long_scss_passes['prog_pass_start'][x]) < 
                                     0.75 for x in range (len(team_long_scss_passes.prog_pass_start))]
    
    team_long_scss_passes['prog_pass_dist'] = (team_long_scss_passes['prog_pass_start']) - (team_long_scss_passes['prog_pass_end'])
    
    ## long_prgpass_pcnt ###    
    long_prgpass_pcnt = round((len(team_long_scss_passes[team_long_scss_passes['prog_pass']==True])
                              /len(team_long_scss_passes))*100,2)
    start_KPIs_2021['long_scsfl_prgpass_pcnt'].append(long_prgpass_pcnt)
    
    ## long_prgpass_avg_dist ###
    long_prgpass_avg_dist = round(team_long_scss_passes[team_long_scss_passes['prog_pass']==True]['prog_pass_dist'].mean(),2)
    start_KPIs_2021['long_scsfl_prgpass_avg_dist'].append(long_prgpass_avg_dist)

    
       
    ########## Calculating ALL Long Progressive Passes ##########
    
    team_long_all_passes = team_gk[(team_gk['short_GK_start']==False) & 
                               (team_gk['type']=='Pass')].loc[(team_gk['team']==team)]
    
    team_long_all_passes = team_long_all_passes[['team','player',
                                         'x_start','y_start',
                                         'x_end','y_end']].reset_index(drop=True)
    
    team_long_all_passes['prog_pass_start'] = np.sqrt(np.square(120 - team_long_all_passes['x_start']) +
                                                  np.square(40 - team_long_all_passes['y_start']))
    
    team_long_all_passes['prog_pass_end'] = np.sqrt(np.square(120 - team_long_all_passes['x_end']) + 
                                                np.square(40 - team_long_all_passes['y_end']))
    
    team_long_all_passes['prog_pass'] = [(team_long_all_passes['prog_pass_end'][x]) / 
                                     (team_long_all_passes['prog_pass_start'][x]) < 
                                     0.75 for x in range (len(team_long_all_passes.prog_pass_start))]
    
    team_long_all_passes['prog_pass_dist'] = (team_long_all_passes['prog_pass_start']) - (team_long_all_passes['prog_pass_end'])
    
    ## long_prgpass_pcnt ###    
    all_long_prgpass_pcnt = round((len(team_long_all_passes[team_long_all_passes['prog_pass']==True])
                              /len(team_long_all_passes))*100,2)
    start_KPIs_2021['long_all_prgpass_pcnt'].append(all_long_prgpass_pcnt)
    
    ## long_prgpass_avg_dist ###
    all_long_prgpass_avg_dist = round(team_long_all_passes[team_long_all_passes['prog_pass']==True]['prog_pass_dist'].mean(),2)
    start_KPIs_2021['long_all_prgpass_avg_dist'].append(all_long_prgpass_avg_dist)

    
    
    ## short_end_shot
    short_shot = len(team_gk[(team_gk['short_GK_start']==True) & (team_gk['type']=='Shot')])
    start_KPIs_2021['short_end_shot'].append(short_shot)
    
    ## short_end_goal
    short_goal = len(team_gk[(team_gk['short_GK_start']==True) & (team_gk['shot_outcome']=='Goal')])
    start_KPIs_2021['short_end_goal'].append(short_goal)
    
    ## short_avg_xg
    short_xg = round(team_gk[(team_gk['short_GK_start']==True) & 
                             (team_gk['type']=='Shot')]['shot_statsbomb_xg'].mean(),4)
    start_KPIs_2021['short_avg_xg'].append(short_xg)
    
    ## long_end_shot
    long_shot = len(team_gk[(team_gk['short_GK_start']==False) & (team_gk['type']=='Shot')])
    start_KPIs_2021['long_end_shot'].append(long_shot)
    
    ## long_end_goal
    long_goal = len(team_gk[(team_gk['short_GK_start']==False) & 
                                 (team_gk['shot_outcome']=='Goal')])
    start_KPIs_2021['long_end_goal'].append(long_goal)
    
    ## long_avg_xg
    long_xg = round(team_gk[(team_gk['short_GK_start']==False) & 
                            (team_gk['type']=='Shot')]['shot_statsbomb_xg'].mean(),4)
    start_KPIs_2021['long_avg_xg'].append(long_xg)
    
    ## short_avg_x_prog    
    short_avg_x_prog = round(team_gk[team_gk["short_GK_start"] == True].loc[
        (team_gk["type"] == "Pass") | 
        (team_gk["type"] == "Carry") | 
        (team_gk["type"] == 'Shot')].groupby('GK_match_possession').last()['x_end'].mean(),2)
    
    start_KPIs_2021['short_avg_x_prog'].append(short_avg_x_prog)
    
    ## long_avg_x_prog
    long_avg_x_prog = round(team_gk[team_gk["short_GK_start"] == False].loc[
        (team_gk["type"] == "Pass") | 
        (team_gk["type"] == "Carry") | 
        (team_gk["type"] == 'Shot')].groupby('GK_match_possession').last()['x_end'].mean(),2)
    
    start_KPIs_2021['long_avg_x_prog'].append(long_avg_x_prog)
    
    ###### APPLY A NEW COLUMN FOR MATCH_POSSESSION FOR EVERYTHING TO COMPARE THE FOLLOWING GK CHAINS ######
    def new_match_possession(row):
        return f"{row['match']},{row['possession']}"
    
    ###### Apply the custom function to create the 'match_possession' column ######
    ssn2021['new_match_possession'] = ssn2021.apply(new_match_possession, axis=1)
    
    
    ###### Creating a Function to define following SHORT chains ######
    shrt_team_chain_vals = team_gk[(team_gk['short_GK_start']==True)&
                                   (team_gk['GK_match_possession'].notnull())]['GK_match_possession'].unique()
    
    def increment_second_number(chain):
        parts = chain.split(',')
        incremented_second_number = str(int(parts[1]) + 1)
        return f"{parts[0]},{incremented_second_number}"
    
    fllw_shrt_gk_chains = np.array([increment_second_number(chain) for chain in shrt_team_chain_vals])
    fllw_shrt_gk_chains = fllw_shrt_gk_chains.tolist()
    shrt_fllw_mask = ssn2021['new_match_possession'].isin(fllw_shrt_gk_chains)
    
    ## fllw_shrt_x_rtrn
    fllw_shrt_x = round(120 - (ssn2021.loc[shrt_fllw_mask].groupby('new_match_possession').last()['x_end'].mean()),3)
    start_KPIs_2021['fllw_shrt_x_rtrn'].append(fllw_shrt_x)
    
    ## fllw_shrt_shots_rtrn
    start_KPIs_2021['fllw_shrt_shots_rtrn'].append(len(ssn2021.loc[shrt_fllw_mask].loc[ssn2021["type"] == 'Shot']))
    
    ## fllw_shrt_goals_rtrn
    fllw_shrt_goals = len(ssn2021.loc[shrt_fllw_mask].loc[ssn2021["shot_outcome"] == 'Goal'])
    start_KPIs_2021['fllw_shrt_goals_rtrn'].append(fllw_shrt_goals)
    
    ## fllw_shrt_avg_xg_rtrn
    shrt_xg_rtrn = round(ssn2021.loc[shrt_fllw_mask]['shot_statsbomb_xg'].mean(),4)
    start_KPIs_2021['fllw_shrt_avg_xg_rtrn'].append(shrt_xg_rtrn)
    
    ###### Defining the following LONG chains ######
    
    lng_team_GK_chains = team_gk[(team_gk['short_GK_start']==False)&
                                      (team_gk['GK_match_possession'].notnull())]['GK_match_possession'].unique()
    fllw_lng_gk_chains = np.array([increment_second_number(chain) for chain in lng_team_GK_chains])
    fllw_lng_gk_chains = fllw_lng_gk_chains.tolist()
    lng_fllw_mask = ssn2021['new_match_possession'].isin(fllw_lng_gk_chains)
    
    ## fllw_long_x_rtrn
    long_x_rtrn = round(120 - (ssn2021.loc[lng_fllw_mask].groupby('new_match_possession').last()['x_end'].mean()),3)
    start_KPIs_2021['fllw_long_x_rtrn'].append(long_x_rtrn)
    
    ##fllw_long_shots_rtrn
    start_KPIs_2021['fllw_long_shots_rtrn'].append(len(ssn2021.loc[lng_fllw_mask].loc[ssn2021["type"] == 'Shot']))
    
    ## fllw_long_goals_rtrn
    long_goals_rtrn = len(ssn2021.loc[lng_fllw_mask].loc[ssn2021["shot_outcome"] == 'Goal'])
    start_KPIs_2021['fllw_long_goals_rtrn'].append(long_goals_rtrn)
    
    ## fllw_long_avg_xg_rtrn
    long_xg_rtrn = round(ssn2021.loc[lng_fllw_mask]['shot_statsbomb_xg'].mean(),4)
    start_KPIs_2021['fllw_long_avg_xg_rtrn'].append(long_xg_rtrn)

In [402]:
for team in ssn2021_teams:
    kpi_2021_values(team)

In [403]:
lengths = {key: len(value) for key, value in start_KPIs_2021.items()}
lengths

{'team': 12,
 'season': 12,
 'total_GKs': 12,
 'short_GKs': 12,
 'long_GKs': 12,
 'GKs_distnce_std': 12,
 'avg_GK_dist': 12,
 'all_scsfl_GKs': 12,
 'shrt_scsfl_GKs': 12,
 'lng_scsfl_GKs': 12,
 'shortGK_avg_xkick_lft': 12,
 'shortGK_avg_ykick_lft': 12,
 'shortGK_avg_xkick_cntr': 12,
 'shortGK_avg_ykick_cntr': 12,
 'shortGK_avg_xkick_rght': 12,
 'shortGK_avg_ykick_rght': 12,
 'longGK_avg_xkick_lft': 12,
 'longGK_avg_ykick_lft': 12,
 'longGK_avg_xkick_cntr': 12,
 'longGK_avg_ykick_cntr': 12,
 'longGK_avg_xkick_rght': 12,
 'longGK_avg_ykick_rght': 12,
 'ttl_avg_chain_speed': 12,
 'ttl_avg_num_passes': 12,
 'scsfl_prgpass_pcnt': 12,
 'scsfl_prgpass_avg_dist': 12,
 'ttl_prgpass_pcnt': 12,
 'ttl_prgpass_avg_dist': 12,
 'short_avg_num_passes': 12,
 'shrt_all_prgpass_pcnt': 12,
 'shrt_all_prgpass_avg_dist': 12,
 'shrt_scsfl_prgpass_pcnt': 12,
 'shrt_scsfl_prgpass_avg_dist': 12,
 'long_avg_num_passes': 12,
 'long_all_prgpass_pcnt': 12,
 'long_all_prgpass_avg_dist': 12,
 'long_scsfl_prgpass_pcnt'

In [None]:
start_KPIs_2021

In [404]:
KPIs_2021_df = pd.DataFrame(start_KPIs_2021)
#KPIs_2021_df

In [405]:
KPIs_2021_df.to_csv('../data/KPIs_2021.csv', index=False)

KPIs_1920_df.to_csv('../data/KPIs_1920.csv', index=False)

KPIs_1819_df.to_csv('../data/KPIs_1819.csv', index=False)