In [1]:
import pandas as pd

import sys
sys.path.append('../')
from utils import *

pd.set_option('display.max_columns', None)

In [2]:
WYSCOUT_ID = 5414111
SKILLCORNER_ID = 952209

In [3]:
DATA_PATH = f'../../data/networks/match_{SKILLCORNER_ID}/'
PASSES_DF_PATH = DATA_PATH + 'passes_df.pkl'
WYSCOUT_PATH = DATA_PATH + 'wyscout/'
SKILLCORNER_PATH = DATA_PATH + 'skillcorner/'
MATCH_IDS_PATH = DATA_PATH + 'matchids.csv'
XT_PLOT_PATH = DATA_PATH + 'smoothed_xt.csv'

# Passes Network

In [4]:
passes_df = pd.read_pickle(PASSES_DF_PATH)
passes_df.rename(columns={'matchPeriod': 'match.period'}, inplace=True)

Keep only players of the opposing team, who are defenders with responsibility greater than 0.

In [5]:
passes_df = passes_df[~passes_df['tracking.is_teammate']]
passes_df = passes_df[passes_df['tracking.object_id'] != -1]
passes_df = passes_df[passes_df['responsibility'] > 0]

# Passes Network

In [6]:
column_mask = ['player.id.skillcorner', 'pass.recipient.id.skillcorner', 'location.x', 'location.y' , 'pass.endLocation.x', 'pass.endLocation.y', 'dxt', 'responsibility', 'match.period', 'team.name']
group_column_mask = [col for col in column_mask if col != 'responsibility']
passes_network = passes_df[column_mask].groupby(group_column_mask).agg({
    'responsibility': ['sum', 'count'],
    }).reset_index()
passes_network.columns = ['.'.join(col).strip() if col[1] != '' else col[0] for col in passes_network.columns.values]

In [7]:
passes_network.sample(5)

Unnamed: 0,player.id.skillcorner,pass.recipient.id.skillcorner,location.x,location.y,pass.endLocation.x,pass.endLocation.y,dxt,match.period,team.name,responsibility.sum,responsibility.count
115,7863,7017,7.411765,-28.901053,5.294118,-14.66,0.001917,1,Netherland,0.409047,1
42,3837,7387,-28.588235,19.548889,-41.5,10.382222,0.060008,1,Netherland,0.783624,1
177,10738,14407,15.882353,-13.437778,9.529412,19.548889,0.001281,1,Netherland,0.700065,1
108,7604,8206,45.625,-1.409231,4.235294,0.0,0.005974,2,France,1.50975,4
256,11630,12190,-36.916667,-28.901053,-13.764706,-27.444211,0.002269,1,France,2.949551,4


# Defender Responsibility Network

In [8]:
tracking_object_id = 11606

player_passes_df = passes_df[passes_df['tracking.object_id'] == tracking_object_id]
defender_responsibility_network = player_passes_df[player_passes_df['responsibility'] > 0]

column_mask = ['player.id.skillcorner', 'pass.recipient.id.skillcorner', 'location.x', 'location.y' , 'pass.endLocation.x', 'pass.endLocation.y', 'dxt', 'responsibility', 'match.period', 'team.name']
defender_responsibility_network = defender_responsibility_network[column_mask]

In [9]:
defender_responsibility_network.sample(5)

Unnamed: 0_level_0,player.id.skillcorner,pass.recipient.id.skillcorner,location.x,location.y,pass.endLocation.x,pass.endLocation.y,dxt,responsibility,match.period,team.name
frame,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
50926,3837,12654,24.352941,-17.715556,44.25,-26.715789,0.027531,0.339803,2,Netherland
8566,10738,10082,-1.058824,-14.048889,-47.916667,31.814737,0.048573,0.885358,1,Netherland
56242,12654,10916,19.058824,-15.882222,31.764706,-15.882222,0.024102,0.217319,2,Netherland
56627,10738,14397,-27.529412,12.215556,-6.352941,-23.802105,0.003263,0.11554,2,Netherland
1117,10082,7017,14.823529,32.543158,16.941176,18.326667,0.000201,0.569141,1,Netherland


# Aggregate Statistics for each Defender

In [34]:
defender_stats = passes_df.groupby('tracking.object_id').agg(
    responsibility_mean=('responsibility', 'mean'),
    dxt_mean=('dxt', 'mean'),
    group_size=('responsibility', 'size')
).reset_index()

defender_stats.sample(5)

Unnamed: 0,tracking.object_id,responsibility_mean,dxt_mean,group_size
15,10916,0.405366,0.010905,14
5,4777,0.161635,0.065234,7
11,7863,0.487821,0.016757,27
0,277,0.410154,0.010028,35
8,7017,0.459053,0.007955,27


# Defender Dyads

In [31]:
passes_df_cp = passes_df.reset_index()
pass_filt_df = passes_df_cp.groupby('frame').filter(lambda x: len(x) >= 2)

relevant_cols = ['frame', 'team.name_x', 'tracking.object_id_x', 'tracking.object_id_y',  'dxt_x', 'responsibility_x', 'responsibility_y']
joint_df = pass_filt_df.merge(pass_filt_df, on='frame')[relevant_cols]
joint_df = joint_df[joint_df['tracking.object_id_x'] < joint_df['tracking.object_id_y']]
joint_df['joint_resp'] = joint_df['responsibility_x'] + joint_df['responsibility_y']
joint_df.rename(columns={'dxt_x': 'dxt', 'team.name_x': 'team_name'}, inplace=True)

columns_joint_group = ['frame', 'tracking.player.id.skillcorner_x', 'tracking.player.id.skillcorner_y', 'joint_resp']
defender_dyads_network = joint_df.groupby(['tracking.object_id_x', 'tracking.object_id_y']).agg(
    joint_responsibility_mean=('joint_resp', 'mean'),
    joint_responsibility_sum=('joint_resp', 'sum'),
    joint_xt_mean = ('dxt', 'mean'),
    joint_xt_sum = ('dxt', 'sum'),
    team_name=('team_name', 'first'),
    group_size=('joint_resp', 'size')
    ).reset_index()

In [32]:
defender_dyads_network.sample(5)

Unnamed: 0,tracking.object_id_x,tracking.object_id_y,joint_responsibility_mean,joint_responsibility_sum,joint_xt_mean,joint_xt_sum,team_name,group_size
46,4777,7387,1.298321,1.298321,0.076054,0.076054,France,1
21,946,16320,1.053264,6.319587,0.027113,0.162675,Netherland,6
135,11630,16320,0.957662,6.703634,0.018316,0.128212,Netherland,7
61,6028,8206,0.466305,0.932611,0.002655,0.005311,Netherland,2
117,10916,31930,0.562871,0.562871,-0.000747,-0.000747,France,1
