In [1]:
import pandas as pd
import numpy as np

import sys
sys.path.append('../')
from utils import *

pd.set_option('display.max_columns', None)

In [2]:
DATA_PATH = '../../data/'
PASSES_DF_PATH = DATA_PATH + 'passes_resp_df.pkl'
WYSCOUT_PATH = DATA_PATH + 'wyscout/'
SKILLCORNER_PATH = DATA_PATH + 'skillcorner/'
MATCH_IDS_PATH = DATA_PATH + 'matchids.csv'
XT_PLOT_PATH = DATA_PATH + 'smoothed_xt.csv'

# Passes Network

In [3]:
passes_resp_df = pd.read_pickle(PASSES_DF_PATH)

In [4]:
match_info = pd.read_csv(MATCH_IDS_PATH)
match_info = match_info.dropna(subset=['wyscout', 'skillcorner'])
match_info['skillcorner'] = match_info['skillcorner'].astype(int)

wyscout_id = match_info['wyscout'].values[0]
skillcorner_id = match_info['skillcorner'].values[0]

metadata = pd.read_csv(SKILLCORNER_PATH + str(skillcorner_id) + "_metadata.csv")
tracking_df = pd.read_csv(SKILLCORNER_PATH + str(skillcorner_id) + "_tracking.csv")
lineup_df = pd.read_csv(SKILLCORNER_PATH + str(skillcorner_id) + "_lineup.csv")

In [5]:
lineup_df = lineup_df[['player_id', 'player_position']].rename(columns={'player_id': 'tracking.player.id.skillcorner', 'player_position': 'tracking.player_position'})
lineup_df['tracking.player_position'].unique()

passes_resp_df_aug = passes_resp_df.merge(lineup_df, how='left')

# assert that every player has a matching entry in lineup_df
assert len(passes_resp_df) == len(passes_resp_df_aug)

In [6]:
defender_positions = ['Right Wing Back', 'Right Center Back', 'Left Center Back', 'Left Wing Back']

Keep only players of the opposing team, who are defenders with responsibility greater than 0.

In [7]:
passes_resp_df_aug = passes_resp_df_aug[~passes_resp_df_aug['tracking.is_teammate']]
passes_resp_df_aug = passes_resp_df_aug[passes_resp_df_aug['responsibility'] > 0]
df_resp_def = passes_resp_df_aug[passes_resp_df_aug['tracking.player_position'].isin(defender_positions)]

# Passes Network

In [8]:
column_mask = ['matchId', 'timestamp', 'period', 'player.id.skillcorner', 'location.x', 'location.y', 'pass.recipient.id.skillcorner', 'pass.endLocation.x', 'pass.endLocation.y', 'dxt', 'responsibility']
group_column_mask = [col for col in column_mask if col != 'responsibility']
passes_network = df_resp_def[column_mask].groupby(group_column_mask).agg({'responsibility': 'sum'}).reset_index()
passes_network.sample(5)

Unnamed: 0,matchId,timestamp,period,player.id.skillcorner,location.x,location.y,pass.recipient.id.skillcorner,pass.endLocation.x,pass.endLocation.y,dxt,responsibility
3,5414111,00:00:13.200000,1,10738,26.470588,-14.66,14407,26.470588,0.704615,0.000147,1.374416
6,5414111,00:00:23.500000,1,4777,37.833333,-7.046154,10738,30.705882,-14.048889,0.000245,0.308272
141,5414111,00:15:43.600000,1,7863,6.352941,-31.814737,33512,6.352941,-21.616842,0.001,0.897997
90,5414111,00:11:50.800000,1,16320,14.823529,14.66,5922,-6.352941,28.172632,0.005677,0.695482
491,5414111,01:31:33.900000,2,3837,-7.411765,7.750769,12654,-19.058824,15.882222,0.012049,0.278905


# Defender Responsibility Network

In [9]:
# filter only relevant columns
columns_subnet = ['matchId', 'timestamp', 'period', 'tracking.player.id.skillcorner', 'tracking.x', 'tracking.y', 'responsibility', 'dxt']
columns_subnet_group = ['tracking.player.id.skillcorner', 'tracking.player_first_name', 'tracking.player_last_name', 'tracking.player_position', 'tracking.team_name']

defender_responsibility_network = df_resp_def[columns_subnet]

In [10]:
defender_responsibility_network.sample(5)

Unnamed: 0,matchId,timestamp,period,tracking.player.id.skillcorner,tracking.x,tracking.y,responsibility,dxt
387,5414111,00:01:02.400000,1,10738,26.84,-11.56,0.681976,0.002475
18384,5414111,01:24:08.000000,2,11630,18.77,11.24,0.591013,-0.002425
18869,5414111,01:29:06.200000,2,11606,15.85,-12.29,0.876565,0.001866
16327,5414111,01:12:54.600000,2,14407,-2.17,-7.58,0.834541,-0.002896
3235,5414111,00:13:37.100000,1,16320,-23.49,-21.88,0.043391,-0.015362


# Aggregate Statistics for each Defender

In [11]:
df_resp_def_subnet = \
df_resp_def.groupby(columns_subnet_group).agg(
    average_responsibility=('responsibility', 'mean'),
    group_size=('responsibility', 'size')
).reset_index()

df_resp_def_subnet.head(10)

Unnamed: 0,tracking.player.id.skillcorner,tracking.player_first_name,tracking.player_last_name,tracking.player_position,tracking.team_name,average_responsibility,group_size
0,7863,Nathan,Aké,Left Wing Back,Netherland,0.547378,164
1,10738,Virgil,van Dijk,Left Center Back,Netherland,0.594165,179
2,11516,Dayot,Upamecano,Left Center Back,France,0.45086,47
3,11606,Theo,Hernández,Left Wing Back,France,0.488114,65
4,11630,Jules,Koundé,Right Wing Back,France,0.522956,70
5,14397,Tyrell,Malacia,Right Wing Back,Netherland,0.490315,8
6,14407,Lutsharel,Geertruida,Right Center Back,Netherland,0.521522,167
7,16320,Ibrahima,Konaté,Right Center Back,France,0.383661,62
8,24466,Jurriën,Timber Maduro,Right Wing Back,Netherland,0.506239,145


## Defender Dyads

In [12]:
groupby_cols = ['timestamp', 'period']
filtered_df_resp_def = df_resp_def.groupby(groupby_cols).filter(lambda x: len(x) >= 2)
df_groups = filtered_df_resp_def.groupby(groupby_cols)

In [13]:
relevant_cols = ['timestamp', 'period', 'tracking.player.id.skillcorner_x', 'tracking.player.id.skillcorner_y', 'responsibility_x', 'responsibility_y']
df_resp_def_joint = filtered_df_resp_def.merge(filtered_df_resp_def, on=groupby_cols)[relevant_cols]
df_resp_def_joint = df_resp_def_joint[df_resp_def_joint['tracking.player.id.skillcorner_x'] < df_resp_def_joint['tracking.player.id.skillcorner_y']]
df_resp_def_joint['joint_resp'] = df_resp_def_joint['responsibility_x'] + df_resp_def_joint['responsibility_y']

In [None]:
columns_joint_group = ['timestamp', 'period', 'tracking.player.id.skillcorner_x', 'tracking.player.id.skillcorner_y', 'joint_resp']

defender_dyads_network = df_resp_def_joint.groupby(['tracking.player.id.skillcorner_x', 'tracking.player.id.skillcorner_y']).agg(
    joint_responsibility_mean=('joint_resp', 'mean'),
    joint_responsibility_sum=('joint_resp', 'sum'),
    group_size=('joint_resp', 'size')
    ).reset_index()

In [15]:
defender_dyads_network.sample(5)

Unnamed: 0,tracking.player.id.skillcorner_x,tracking.player.id.skillcorner_y,joint_responsibility_mean,joint_responsibility_sum,group_size
1,7863,11516,1.116042,5.580209,5
0,7863,10738,1.042314,63.581131,61
7,10738,11516,1.066106,3.198319,3
10,10738,14397,1.704591,1.704591,1
4,7863,14407,1.107318,16.609774,15
