In [3]:
import pandas as pd
import numpy as np

import sys
sys.path.append('../')
from utils import *

pd.set_option('display.max_columns', None)

In [4]:
DATA_PATH = '../../data/'
PASSES_DF_PATH = DATA_PATH + 'passes_resp_df.pkl'
WYSCOUT_PATH = DATA_PATH + 'wyscout/'
SKILLCORNER_PATH = DATA_PATH + 'skillcorner/'
MATCH_IDS_PATH = DATA_PATH + 'matchids.csv'
XT_PLOT_PATH = DATA_PATH + 'smoothed_xt.csv'

In [5]:
passes_resp_df = pd.read_pickle(PASSES_DF_PATH)

In [6]:
match_info = pd.read_csv(MATCH_IDS_PATH)
match_info = match_info.dropna(subset=['wyscout', 'skillcorner'])
match_info['skillcorner'] = match_info['skillcorner'].astype(int)

wyscout_id = match_info['wyscout'].values[0]
skillcorner_id = match_info['skillcorner'].values[0]

metadata = pd.read_csv(SKILLCORNER_PATH + str(skillcorner_id) + "_metadata.csv")
tracking_df = pd.read_csv(SKILLCORNER_PATH + str(skillcorner_id) + "_tracking.csv")
lineup_df = pd.read_csv(SKILLCORNER_PATH + str(skillcorner_id) + "_lineup.csv")

In [7]:
lineup_df = lineup_df[['player_id', 'player_position']].rename(columns={'player_id': 'tracking.player.id.skillcorner', 'player_position': 'tracking.player_position'})
lineup_df['tracking.player_position'].unique()

array(['Right Wing Back', 'Right Winger', 'Attacking Midfield',
       'Left Midfield', 'Center Forward', 'Right Midfield',
       'Defensive Midfield', 'Right Forward', 'Left Center Back',
       'Left Wing Back', 'Goalkeeper', 'Right Center Back', 'Left Winger',
       'Left Forward'], dtype=object)

In [8]:
passes_resp_df_aug = passes_resp_df.merge(lineup_df, how='left')

# assert that every player has a matching entry in lineup_df
assert len(passes_resp_df) == len(passes_resp_df_aug)

In [9]:
defender_positions = ['Right Wing Back', 'Right Center Back', 'Left Center Back', 'Left Wing Back']

Keep only players of the opposing team, who are defenders with responsibility greater than 0.

In [10]:
passes_resp_df_aug = passes_resp_df_aug[~passes_resp_df_aug['tracking.is_teammate']]
passes_resp_df_aug = passes_resp_df_aug[passes_resp_df_aug['responsibility'] > 0]
df_resp_def = passes_resp_df_aug[passes_resp_df_aug['tracking.player_position'].isin(defender_positions)]

In [11]:
# filter only relevant columns
columns_subnet = ['timestamp', 'period', 'responsibility', 'dxt', 'tracking.player.id.skillcorner', 'tracking.player_first_name', 'tracking.player_last_name', 'tracking.player_position', 'tracking.team_name']
columns_subnet_group = ['tracking.player.id.skillcorner', 'tracking.player_first_name', 'tracking.player_last_name', 'tracking.player_position', 'tracking.team_name']

df_resp_def = df_resp_def[columns_subnet]

In [12]:
df_resp_def

Unnamed: 0,timestamp,period,responsibility,dxt,tracking.player.id.skillcorner,tracking.player_first_name,tracking.player_last_name,tracking.player_position,tracking.team_name
14,00:00:01.900000,1,0.685988,-0.004162,24466,Jurriën,Timber Maduro,Right Wing Back,Netherland
34,00:00:05.800000,1,0.863538,-0.002211,14407,Lutsharel,Geertruida,Right Center Back,Netherland
36,00:00:05.800000,1,0.167511,-0.002211,24466,Jurriën,Timber Maduro,Right Wing Back,Netherland
56,00:00:09.000000,1,0.962982,-0.000660,14407,Lutsharel,Geertruida,Right Center Back,Netherland
57,00:00:09.000000,1,0.788364,-0.000660,10738,Virgil,van Dijk,Left Center Back,Netherland
...,...,...,...,...,...,...,...,...,...
19495,01:32:48.200000,2,0.717024,0.004008,14397,Tyrell,Malacia,Right Wing Back,Netherland
19515,01:33:13.600000,2,0.288703,0.014951,24466,Jurriën,Timber Maduro,Right Wing Back,Netherland
19517,01:33:13.600000,2,0.899308,0.014951,14397,Tyrell,Malacia,Right Wing Back,Netherland
19550,01:33:18.500000,2,0.551108,-0.003139,11630,Jules,Koundé,Right Wing Back,France


In [13]:
df_resp_def_subnet = \
df_resp_def.groupby(columns_subnet_group).agg(
    average_responsibility=('responsibility', 'mean'),
    group_size=('responsibility', 'size')
).reset_index()

df_resp_def_subnet.head(10)

Unnamed: 0,tracking.player.id.skillcorner,tracking.player_first_name,tracking.player_last_name,tracking.player_position,tracking.team_name,average_responsibility,group_size
0,7863,Nathan,Aké,Left Wing Back,Netherland,0.547378,164
1,10738,Virgil,van Dijk,Left Center Back,Netherland,0.594165,179
2,11516,Dayot,Upamecano,Left Center Back,France,0.45086,47
3,11606,Theo,Hernández,Left Wing Back,France,0.488114,65
4,11630,Jules,Koundé,Right Wing Back,France,0.522956,70
5,14397,Tyrell,Malacia,Right Wing Back,Netherland,0.490315,8
6,14407,Lutsharel,Geertruida,Right Center Back,Netherland,0.521522,167
7,16320,Ibrahima,Konaté,Right Center Back,France,0.383661,62
8,24466,Jurriën,Timber Maduro,Right Wing Back,Netherland,0.506239,145


In [14]:
df_resp_def.head(5)

Unnamed: 0,timestamp,period,responsibility,dxt,tracking.player.id.skillcorner,tracking.player_first_name,tracking.player_last_name,tracking.player_position,tracking.team_name
14,00:00:01.900000,1,0.685988,-0.004162,24466,Jurriën,Timber Maduro,Right Wing Back,Netherland
34,00:00:05.800000,1,0.863538,-0.002211,14407,Lutsharel,Geertruida,Right Center Back,Netherland
36,00:00:05.800000,1,0.167511,-0.002211,24466,Jurriën,Timber Maduro,Right Wing Back,Netherland
56,00:00:09.000000,1,0.962982,-0.00066,14407,Lutsharel,Geertruida,Right Center Back,Netherland
57,00:00:09.000000,1,0.788364,-0.00066,10738,Virgil,van Dijk,Left Center Back,Netherland


## Defender Dyads

In [15]:
groupby_cols = ['timestamp', 'period']
filtered_df_resp_def = df_resp_def.groupby(groupby_cols).filter(lambda x: len(x) >= 2)
df_groups = filtered_df_resp_def.groupby(groupby_cols)

In [16]:
relevant_cols = ['timestamp', 'period', 'tracking.player.id.skillcorner_x', 'tracking.player.id.skillcorner_y', 'responsibility_x', 'responsibility_y']
df_resp_def_joint = filtered_df_resp_def.merge(filtered_df_resp_def, on=groupby_cols)[relevant_cols]
df_resp_def_joint = df_resp_def_joint[df_resp_def_joint['tracking.player.id.skillcorner_x'] < df_resp_def_joint['tracking.player.id.skillcorner_y']]
df_resp_def_joint['joint_resp'] = df_resp_def_joint['responsibility_x'] + df_resp_def_joint['responsibility_y']
df_resp_def_joint.head(10)

Unnamed: 0,timestamp,period,tracking.player.id.skillcorner_x,tracking.player.id.skillcorner_y,responsibility_x,responsibility_y,joint_resp
1,00:00:05.800000,1,14407,24466,0.863538,0.167511,1.031049
6,00:00:09.000000,1,10738,14407,0.788364,0.962982,1.751346
10,00:00:13.200000,1,10738,14407,0.449357,0.925059,1.374416
14,00:00:17.900000,1,10738,14407,0.943157,0.89114,1.834296
18,00:00:27.400000,1,10738,14407,0.853413,0.293905,1.147319
22,00:00:32.000000,1,10738,14407,0.629407,0.676123,1.305531
26,00:00:35.000000,1,7863,10738,0.492521,0.54671,1.039231
31,00:00:36.300000,1,10738,14407,0.084041,0.589306,0.673348
34,00:00:36.300000,1,7863,14407,0.682742,0.589306,1.272048
35,00:00:36.300000,1,7863,10738,0.682742,0.084041,0.766783


In [17]:
columns_joint_group = ['timestamp', 'period', 'tracking.player.id.skillcorner_x', 'tracking.player.id.skillcorner_y', 'joint_resp']

df_resp_def_joint_group = df_resp_def_joint.groupby(['tracking.player.id.skillcorner_x', 'tracking.player.id.skillcorner_y']).agg(
    joint_responsibility_mean=('joint_resp', 'mean'),
    joint_responsibility_sum=('joint_resp', 'sum'),
    group_size=('joint_resp', 'size')
    ).reset_index()

df_resp_def_joint_group.head()

Unnamed: 0,tracking.player.id.skillcorner_x,tracking.player.id.skillcorner_y,joint_responsibility_mean,joint_responsibility_sum,group_size
0,7863,10738,1.042314,63.581131,61
1,7863,11516,1.116042,5.580209,5
2,7863,11606,0.869456,1.738912,2
3,7863,11630,1.136597,34.097909,30
4,7863,14407,1.107318,16.609774,15
