**8. Calculating metrics on players level**

In the subsequent notebooks, we make an attempt to drill down further on the player level to understand how different LCBs tend to prefer different regions for making progressive passes and how much threat do they add in each region, based on their dominant foot.

The following tasks have been taken into account in this notebook:
1. Compute passing metrics (similar to match level as before) and group them by individual players

# Imports 

In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.ticker import FuncFormatter
from matplotlib.patches import Ellipse
import seaborn as sns
from math import *
import matplotlib.pylab as pyl
import pickle
import swifter
import warnings
import plotly.express as px
from itertools import chain
import scipy.stats as sps
from tqdm import tqdm
from unidecode import unidecode
import re
from pathlib import Path
from tqdm.notebook import tqdm
import pandas as pd
import itertools


In [3]:
import warnings
warnings.filterwarnings('ignore', category=pd.io.pytables.PerformanceWarning)

In [4]:
#pd.set_option('max_colwidth', 999)
pd.set_option('display.max_columns', 1000)
pd.set_option("display.max_rows", 3000)

# Data 

In [5]:
# Loading clusters with VAEP values as a list of dataframes
cluster_names = [
    'rll', 'rlll', 'rlr', 'rrl', 'rrll', 'rrlll', 'rrlr', 'rrr', 'rrrl',
    'rrrll', 'rrrlr', 'rrrr', 'rrrrl'
]
df_clusters = list()
for name in cluster_names:
    df_clusters.append(
        pd.read_pickle(
            f'../data/clusters/clusters_vaep/cluster_{name}.pkl'
        ))

In [6]:
# Reseting the indexes of dfs in df_clusters
for df in df_clusters:
    df.reset_index(drop=True, inplace = True)

# Creating Player-Metrics Dataset

In [7]:
# Function to reorganise the match level metrics dataframe to player level
def player_metrics_func(l):
    player_metrics = pd.DataFrame(columns=[
        'player_name',
        'team',
        'backline_footedness',
        'backline',
        'position',
        'passes',
        'accpass',
        'accpassloc',
        'inaccpassloc',
        'accpassvaep',
        'inaccpassvaep',
        'accpassoff',
        'inaccpassoff',
        'accpassdef',
        'inaccpassdef',
        ])
    pos = [
        'RB',
        'R_CB',
        'L_CB',
        'LB',
        'RWB',
        'RCB',
        'CB',
        'LCB',
        'LWB',
        ]
    for df in l:
        for i in tqdm(range(len(df))):
            if df['backline'][i] == 4:
                s1 = list()
                for p in pos[0:4]:
                    p1 = p + '_'
                    s = [[df[p][i]], [df['team'][i]], [df['footedness'
                         ][i]], [df['backline'][i]], [p], df.loc[i,
                         list(df.columns[df.columns.str.startswith(p1)])].values]
                    s = list(itertools.chain(*s))
                    s1.append(s)
                s1 = np.vstack(s1)
                player_metrics = pd.concat([player_metrics,
                        pd.DataFrame(s1,
                        columns=player_metrics.columns)], axis=0)
            elif df['backline'][i] == 3:

                s1 = list()
                for p in pos[5:8]:
                    p1 = p + '_'
                    s = [[df[p][i]], [df['team'][i]], [df['footedness'
                         ][i]], [df['backline'][i]], [p], df.loc[i,
                         list(df.columns[df.columns.str.startswith(p1)])].values]
                    s = list(itertools.chain(*s))
                    s1.append(s)
                s1 = np.vstack(s1)
                player_metrics = pd.concat([player_metrics,
                        pd.DataFrame(s1,
                        columns=player_metrics.columns)], axis=0)
            elif df['backline'][i] == 5:

                s1 = list()
                for p in pos[4:]:
                    p1 = p + '_'
                    s = [[df[p][i]], [df['team'][i]], [df['footedness'
                         ][i]], [df['backline'][i]], [p], df.loc[i,
                         list(df.columns[df.columns.str.startswith(p1)])].values]
                    s = list(itertools.chain(*s))
                    s1.append(s)
                s1 = np.vstack(s1)
                player_metrics = pd.concat([player_metrics,
                        pd.DataFrame(s1,
                        columns=player_metrics.columns)], axis=0)

    return player_metrics


In [8]:
player_metrics = player_metrics_func(df_clusters)
print(player_metrics.shape)
player_metrics.reset_index(drop=True, inplace=True)

HBox(children=(FloatProgress(value=0.0, max=25.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=4.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=70.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=164.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=2.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=26.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=82.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=316.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=8.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=46.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=15.0), HTML(value='')))


(2885, 15)


In [9]:
player_metrics.tail(20)

Unnamed: 0,player_name,team,backline_footedness,backline,position,passes,accpass,accpassloc,inaccpassloc,accpassvaep,inaccpassvaep,accpassoff,inaccpassoff,accpassdef,inaccpassdef
2865,KyleNaughton,Swansea,right-right-right-right-left,5,RWB,39,27,"[[[30.16, 6.12], [14.56, 8.84]], [[34.32, 2.04...","[[[32.24, 4.76], [35.36, 6.12]], [[33.28, 6.8]...","[-0.00030438019894063473, 0.001873926026746630...","[-0.00579429604113102, -0.010925043374300003, ...","[-0.00043917191214859486, 0.001523824874311685...","[-0.002266661264002323, -0.002798699773848057,...","[0.00013479171320796013, 0.0003501011524349451...","[-0.0035276347771286964, -0.008126343600451946..."
2866,MikevanderHoorn,Swansea,right-right-right-right-left,5,RCB,27,18,"[[[30.16, 21.76], [40.56, 14.96]], [[30.16, 16...","[[[60.32, 5.44], [70.72, 21.76]], [[21.84, 12....","[2.3149419575929642e-05, 0.01246498804539442, ...","[-0.0032844608649611473, 0.0024564226623624563...","[0.00012397533282637596, 0.0022160641383379698...","[-0.001406063325703144, 0.000289801275357604, ...","[-0.00010082591325044632, 0.010248923674225807...","[-0.0018783975392580032, 0.0021666213870048523..."
2867,FedericoFernandez,Swansea,right-right-right-right-left,5,CB,17,14,"[[[31.2, 33.32], [30.16, 21.76]], [[41.6, 16.3...","[[[52.0, 32.64], [71.76, 38.76]], [[27.04, 33....","[0.0012881478760391474, 0.004821080714464188, ...","[-0.0016094238962978125, 0.010347407311201096,...","[0.0010476121678948402, 0.003763394895941019, ...","[0.0009637204930186272, 0.0012512507382780313,...","[0.00024053570814430714, 0.0010576860513538122...","[-0.0025731443893164396, 0.009096156805753708,..."
2868,AlfieMawson,Swansea,right-right-right-right-left,5,LCB,21,19,"[[[32.24, 41.48], [31.2, 33.32]], [[26.0, 34.0...","[[[36.4, 49.64], [83.2, 68.0]], [[62.4, 54.4],...","[0.0015567794907838106, 0.0005909285973757505,...","[-0.003425469622015953, -0.002675830153748393]","[0.0011232001706957817, 0.00041716545820236206...","[-0.004434072412550449, -0.002225589007139206]","[0.0004335793200880289, 0.00017376313917338848...","[0.0010086027905344963, -0.0004502411466091871]"
2869,MartinOlsson,Swansea,right-right-right-right-left,5,LWB,24,13,"[[[82.16, 63.92], [95.68, 62.56]], [[64.48, 62...","[[[35.36, 59.16], [82.16, 59.16]], [[46.8, 63....","[0.00971209816634655, 0.0008557753171771765, 0...","[-0.004868157207965851, -0.007482745684683323,...","[0.009431428276002407, 0.0010588904842734337, ...","[-0.0026926114223897457, -0.004729554057121277...","[0.00028066954109817743, -0.000203115167096257...","[-0.002175545785576105, -0.002753191627562046,..."
2870,EzequielSchelotto,Brighton,right-right-right-right-left,5,RWB,39,32,"[[[15.6, 6.12], [11.44, 18.36]], [[66.56, 17.6...","[[[101.92, 16.32], [95.68, 53.04]], [[92.56, 6...","[-0.0027250798884779215, 0.008709050714969635,...","[-0.022368621081113815, -0.0026102035772055387...","[-0.0021365147549659014, 0.008279751054942608,...","[-0.021815089508891106, -0.0029721278697252274...","[-0.0005885651335120201, 0.0004292994271963835...","[-0.000553530640900135, 0.0003619242925196886,..."
2871,ConnorGoldson,Brighton,right-right-right-right-left,5,RCB,37,29,"[[[20.8, 13.6], [15.6, 34.0]], [[31.2, 20.4], ...","[[[11.44, 18.36], [46.8, 17.68]], [[32.24, 19....","[-0.0016521192155778408, -0.000637517310678958...","[-0.005109456367790699, -0.005683832801878452,...","[-0.001610324252396822, -0.0005284212529659271...","[4.69256192445755e-05, -0.0028726253658533096,...","[-4.179496318101883e-05, -0.000109096057713031...","[-0.0051563819870352745, -0.002811207436025142..."
2872,ShaneDuffy,Brighton,right-right-right-right-left,5,CB,31,28,"[[[21.84, 47.6], [30.16, 36.72]], [[20.8, 33.3...","[[[20.8, 59.84], [28.08, 54.4]], [[26.0, 27.88...","[0.00019070273265242577, 0.0017127888277173042...","[-0.009465785697102547, -0.016421709209680557,...","[1.1037569493055344e-05, 0.0001666424795985221...","[-0.00037744222208857536, 9.625079110264778e-0...","[0.00017966516315937042, 0.001546146348118782,...","[-0.009088343009352684, -0.016517959535121918,..."
2873,LewisDunk,Brighton,right-right-right-right-left,5,LCB,37,27,"[[[26.0, 63.24], [30.16, 64.6]], [[18.72, 37.4...","[[[31.2, 44.88], [43.68, 48.96]], [[28.08, 59....","[-0.0010328111238777637, 0.0074929846450686455...","[-0.007338287308812141, -0.0001143945846706628...","[-0.0005701703485101461, 0.0010665850713849068...","[-0.002070433460175991, 6.990809924900532e-05,...","[-0.0004626407753676176, 0.006426399573683739,...","[-0.00526785384863615, -0.0001843026839196682,..."
2874,MarkusSuttner,Brighton,right-right-right-right-left,5,LWB,25,19,"[[[41.6, 51.0], [39.52, 46.92]], [[61.36, 48.9...","[[[101.92, 61.88], [89.44, 26.52]], [[91.52, 6...","[-0.001984398579224944, 0.0032602078281342983,...","[-0.012535693123936653, -0.024366769939661026,...","[-0.0011985022574663162, 0.0031371312215924263...","[-0.010827042162418365, -0.021044034510850906,...","[-0.0007858963217586279, 0.0001230766065418720...","[-0.0017086504958570004, -0.003322735195979476..."


In [10]:
# Function to extract dominant foot information
def extract_foot(a,b):
    pos1 = ['RB', 'R_CB', 'L_CB', 'LB']
    pos2 = ['RWB', 'RCB', 'CB', 'LCB', 'LWB']
    a = a.split('-')
    if(len(a)==4):
        return a[pos1.index(b)]
    elif(len(a)==5):
        return a[pos2.index(b)]
    elif(len(a)==3):
        return a[pos2.index(b) - 1]

In [11]:
player_metrics['footedness'] = player_metrics.apply(lambda x: extract_foot(x.backline_footedness, x.position), axis=1)
player_metrics.drop(columns = ['backline_footedness'], inplace=True)

In [12]:
player_metrics.head(5)

Unnamed: 0,player_name,team,backline,position,passes,accpass,accpassloc,inaccpassloc,accpassvaep,inaccpassvaep,accpassoff,inaccpassoff,accpassdef,inaccpassdef,footedness
0,SteveCook,Bournemouth,3,RCB,40,27,"[[[64.48, 19.72], [64.48, 4.76]], [[46.8, 11.5...","[[[27.04, 4.76], [39.52, 4.76]], [[34.32, 8.16...","[-0.0017563197761774063, 0.0015748164150863886...","[-0.006077437195926905, -0.0012522567994892597...","[-0.002439117059111595, 0.001446905080229044, ...","[-0.0024494631215929985, 0.0004311646334826946...","[0.0006827972829341888, 0.00012791133485734463...","[-0.003627974074333906, -0.0016834214329719543...",right
1,NathanAke,Bournemouth,3,CB,30,24,"[[[48.88, 36.72], [39.52, 54.4]], [[6.24, 3.4]...","[[[28.08, 22.44], [55.12, 22.44]], [[31.2, 25....","[-0.0009504464687779546, 0.013750978745520115,...","[0.001986460294574499, -0.009018474258482456, ...","[-0.0012554381974041462, 0.002747779479250312,...","[0.0005576326511800289, 0.000248798169195652, ...","[0.0003049917286261916, 0.01100319903343916, 0...","[0.0014288276433944702, -0.009267272427678108,...",left
2,TyroneMings,Bournemouth,3,LCB,45,38,"[[[32.24, 47.6], [68.64, 48.96]], [[39.52, 54....","[[[33.28, 56.44], [65.52, 48.28]], [[19.76, 36...","[0.010723656043410301, 0.00023811450228095055,...","[-0.0038944093976169825, -0.024236343801021576...","[0.010727065615355968, 9.276997298002243e-05, ...","[-0.0014773723669350147, -0.001994863385334611...","[-3.409339115023613e-06, 0.0001453445293009281...","[-0.0024170370306819677, -0.02224148064851761,...",left
3,DeclanRice,West Ham,3,RCB,35,26,"[[[41.6, 18.36], [43.68, 33.32]], [[48.88, 14....","[[[28.08, 12.92], [68.64, 28.56]], [[36.4, 5.4...","[-0.000301242689602077, 0.007525372318923473, ...","[-0.002428452018648386, -0.004736741539090872,...","[-0.00043192552402615547, 0.007223010063171387...","[-0.001414981670677662, -0.003226976376026869,...","[0.00013068283442407846, 0.000302362022921443,...","[-0.001013470347970724, -0.001509765163064003,...",right
4,AngeloOgbonna,West Ham,3,CB,25,24,"[[[24.96, 31.96], [40.56, 17.0]], [[38.48, 44....","[[[33.28, 35.36], [75.92, 46.24]]]","[0.0016360594891011715, 0.0007009587716311216,...",[-0.0014785041566938162],"[0.0011539123952388763, 0.0006793192587792873,...",[0.00024306168779730797],"[0.00048214709386229515, 2.1639512851834297e-0...",[-0.0017215658444911242],left


In [13]:
# Replace some of the misspelled player names that occured due to unidecode conversion
player_metrics = player_metrics.replace({'player_name':{'JamaalLascelles': 'JamalLascelles',
                'ChrisLwe': 'ChrisLowe',
                'CdricSoares': 'CedricSoares',
                'HctorBellern': 'HectorBellerin',
                'NicolsOtamendi': 'NicolasOtamendi',
                'CsarAzpilicueta': 'CesarAzpilicueta',
                'AntonioRdiger': 'AntonioRudiger',
                'JosHolebas': 'JoseHolebas',
                'SamusColeman': 'SeamusColeman',
                'AllanRomoNyom': 'AllanRomeoNyom',
                'NathanAk': 'NathanAke',
                'VictorLindelf':'VictorNilssonLindelof',
                'DavinsonSnchez':'DavinsonSanchezMina',
                'NicolasOtamendi':'NicolasOtamendi',
                'FedericoFernndez':'FedericoFernandez',
                'SebastianPrdl':'SebastianProdl',
                'MollaWagu':'MollaWague',
                'JrmyPied':'JeremyPied',
                'CheikhouKouyat':'CheikhouKouyate',
                'JosFonte':'JoseFonte',
                'VictorLindelof':'VictorNilssonLindelof' 
                }})


In [14]:
# Group the data by individual players, their team, role and dominant foot
df = player_metrics.groupby(['player_name', 'team', 'position', 'footedness'])['passes', 'accpass',
       'accpassloc', 'inaccpassloc', 'accpassvaep', 'inaccpassvaep', 'accpassoff',
       'inaccpassoff', 'accpassdef', 'inaccpassdef'].sum().reset_index()

In [15]:
player_metrics['matches_played'] = pd.Series(np.ones(len(player_metrics)))
df1 = player_metrics.groupby(['player_name', 'team', 'position', 'footedness'])['matches_played'].sum().reset_index()
df['matches_played'] = df1.matches_played

In [16]:
df.head()

Unnamed: 0,player_name,team,position,footedness,passes,accpass,accpassloc,inaccpassloc,accpassvaep,inaccpassvaep,accpassoff,inaccpassoff,accpassdef,inaccpassdef,matches_played
0,AaronCresswell,West Ham,LB,left,275,203,"[[[11.44, 47.6], [10.4, 48.96]], [[21.84, 58.4...","[[[22.88, 65.28], [31.2, 63.24]], [[24.96, 61....","[-0.0060007767751812935, 0.014951384626328945,...","[-0.005001368001103401, 0.0014556727837771177,...","[-0.00111871468834579, 0.0030969707295298576, ...","[-0.0018319590017199516, 0.0011951092164963484...","[-0.00488206185400486, 0.011854413896799088, 0...","[-0.003169409232214093, 0.00026056356728076935...",7.0
1,AaronCresswell,West Ham,LCB,left,796,644,"[[[27.04, 51.68], [55.12, 64.6]], [[65.52, 61....","[[[62.4, 51.0], [78.0, 49.64]], [[79.04, 61.2]...","[0.0010694738011807203, 0.002371369395405054, ...","[-0.00956201646476984, -0.007710381411015987, ...","[0.0018120664171874523, 0.002711281180381775, ...","[-0.006647953763604164, -0.006097717210650444,...","[-0.0007425926742143929, -0.000339911784976720...","[-0.002914062701165676, -0.001612664433196187,...",20.0
2,AaronWanBissaka,Crystal Palace,RB,right,226,165,"[[[60.32, 11.56], [70.72, 8.84]], [[55.12, 11....","[[[86.32, 22.44], [87.36, 25.84]], [[98.8, 10....","[0.0027392818592488766, -0.0019667267333716154...","[-0.02877645380795002, -0.01624043844640255, -...","[0.002814173698425293, -0.0016991370357573032,...","[-0.027683690190315247, -0.01549511682242155, ...","[-7.489195559173822e-05, -0.000267589697614312...","[-0.0010927643161267042, -0.000745321158319711...",7.0
3,AdamSmith,Bournemouth,LB,right,146,120,"[[[15.6, 59.84], [22.88, 54.4]], [[19.76, 61.2...","[[[67.6, 13.6], [74.88, 33.32]], [[44.72, 2.72...","[-0.002463837619870901, -0.0001035716850310564...","[-0.007104361429810524, 1.648860052227974e-05,...","[-0.005229263566434383, 0.0009387347381561995,...","[-0.005188643001019955, 0.0009281975217163563,...","[0.0027654259465634823, -0.0010423064231872559...","[-0.0019157183123752475, -0.000911708921194076...",3.0
4,AdamSmith,Bournemouth,RB,right,451,387,"[[[28.08, 4.76], [40.56, 20.4]], [[31.2, 13.6]...","[[[55.12, 4.08], [62.4, 6.8]], [[35.36, 4.08],...","[0.0007454273290932178, -0.002337034558877349,...","[-0.008189908228814602, 0.0007318942807614803,...","[0.0013971277512609959, -0.0004184735007584095...","[-0.005722924135625362, 0.0007222630083560944,...","[-0.000651700422167778, -0.0019185610581189394...","[-0.0024669840931892395, 9.631272405385971e-06...",10.0


In [17]:
df.to_pickle('../data/defender_clusters/defender_clustering_dataset.pkl')