In [1]:
import pandas as pd
import numpy as np

import sys
sys.path.append('../')
from utils import *

pd.set_option('display.max_columns', None)

In [2]:
DATA_PATH = '../../data/'
PASSES_DF_PATH = DATA_PATH + 'passes_df.pkl'
WYSCOUT_PATH = DATA_PATH + 'wyscout/'
SKILLCORNER_PATH = DATA_PATH + 'skillcorner/'
MATCH_IDS_PATH = DATA_PATH + 'matchids.csv'
XT_PLOT_PATH = DATA_PATH + 'smoothed_xt.csv'

# Passes Network

In [3]:
passes_df = pd.read_pickle(PASSES_DF_PATH)

match_info = pd.read_csv(MATCH_IDS_PATH)
match_info = match_info.dropna(subset=['wyscout', 'skillcorner'])
match_info['skillcorner'] = match_info['skillcorner'].astype(int)

wyscout_id = match_info['wyscout'].values[0]
skillcorner_id = match_info['skillcorner'].values[0]

metadata = pd.read_csv(SKILLCORNER_PATH + str(skillcorner_id) + "_metadata.csv")
tracking_df = pd.read_csv(SKILLCORNER_PATH + str(skillcorner_id) + "_tracking.csv")
lineup_df = pd.read_csv(SKILLCORNER_PATH + str(skillcorner_id) + "_lineup.csv")

In [4]:
lineup_df = lineup_df[['player_id', 'player_position']].rename(columns={'player_id': 'tracking.object_id', 'player_position': 'tracking.player_position'})
lineup_df['tracking.player_position'].unique()

passes_df_new = passes_df.merge(lineup_df, how='left')

# assert that every player has a matching entry in lineup_df
assert len(passes_df_new) == len(passes_df)

passes_df = passes_df_new

In [5]:
defender_positions = ['Right Wing Back', 'Right Center Back', 'Left Center Back', 'Left Wing Back']

Keep only players of the opposing team, who are defenders with responsibility greater than 0.

In [6]:
passes_df = passes_df[~passes_df['tracking.is_teammate']]
passes_df = passes_df[passes_df['responsibility'] > 0]
passes_df = passes_df[passes_df['tracking.player_position'].isin(defender_positions)]

# Passes Network

In [7]:
column_mask = ['timestamp', 'period', 'player.id.skillcorner', 'pass.recipient.id.skillcorner', 'location.x', 'location.y' , 'pass.endLocation.x', 'pass.endLocation.y', 'dxt', 'responsibility']
group_column_mask = [col for col in column_mask if col != 'responsibility']
passes_network = passes_df[column_mask].groupby(group_column_mask).agg({'responsibility': 'sum'}).reset_index()

In [8]:
passes_network.sample(5)

Unnamed: 0,timestamp,period,player.id.skillcorner,pass.recipient.id.skillcorner,location.x,location.y,pass.endLocation.x,pass.endLocation.y,dxt,responsibility
213,01:09:57.800000,2,16320,12246,19.058824,20.888421,6.352941,25.987368,-0.000909,0.461441
206,01:05:16.900000,2,5922,12190,6.352941,14.66,-39.666667,23.802105,0.017473,1.158069
84,00:22:28.900000,1,33512,7017,-14.823529,-18.326667,-5.294118,4.932308,0.033507,0.159547
21,00:09:09.700000,1,24466,10082,-42.875,31.814737,-42.875,23.073684,-0.001698,0.82796
184,00:53:48.200000,2,11630,12190,6.352941,29.629474,-11.647059,14.66,0.070783,1.302446


# Defender Responsibility Network

In [9]:
tracking_object_id = 11606

player_passes_df = passes_df[passes_df['player.id.skillcorner'] == tracking_object_id]
defender_responsibility_network = player_passes_df[player_passes_df['responsibility'] > 0]

column_mask = ['timestamp', 'period', 'tracking.x', 'tracking.y', 'dxt', 'responsibility']
defender_responsibility_network = defender_responsibility_network[column_mask]

In [10]:
player_passes_df = passes_df[passes_df['player.id.skillcorner'] == tracking_object_id]
defender_responsibility_network = player_passes_df[player_passes_df['responsibility'] > 0]

column_mask = ['timestamp', 'period', 'tracking.x', 'tracking.y', 'dxt', 'responsibility']
defender_responsibility_network = defender_responsibility_network[column_mask]

In [11]:
defender_responsibility_network.sample(5)

Unnamed: 0,timestamp,period,tracking.x,tracking.y,dxt,responsibility
12217,00:46:25.400000,2,-26.6,-10.87,0.006289,0.167888
14540,00:58:14.900000,2,44.88,-2.4,-0.001585,0.045348
13181,00:51:06.900000,2,-30.59,-10.26,-0.012568,0.043322
12216,00:46:25.400000,2,-23.43,1.79,0.006289,0.644935
9283,00:32:51.000000,1,3.99,0.81,-0.002365,0.038753


# Aggregate Statistics for each Defender

In [12]:
defender_stats = passes_df.groupby('tracking.object_id').agg(
    average_responsibility=('responsibility', 'mean'),
    average_dxt=('dxt', 'mean'),
    group_size=('responsibility', 'size')
).reset_index()

defender_stats.sample(5)

Unnamed: 0,tracking.object_id,average_responsibility,average_dxt,group_size
6,14407,0.296308,0.002681,33
4,11630,0.405123,0.006677,70
7,16320,0.272775,0.001711,62
5,14397,0.449654,0.061248,1
8,24466,0.238814,0.009559,29


## Defender Dyads

In [13]:
groupby_cols = ['timestamp', 'period']
pass_filt_df = passes_df.groupby(groupby_cols).filter(lambda x: len(x) >= 2)
#pass_filt_df = pass_filt_df.groupby(groupby_cols)

In [14]:
relevant_cols = ['timestamp', 'period', 'tracking.object_id_x', 'tracking.object_id_y', 'responsibility_x', 'responsibility_y']
joint_df = pass_filt_df.merge(pass_filt_df, on=groupby_cols)[relevant_cols]
joint_df = joint_df[joint_df['tracking.object_id_x'] < joint_df['tracking.object_id_y']]
joint_df['joint_resp'] = joint_df['responsibility_x'] + joint_df['responsibility_y']

In [15]:
columns_joint_group = ['timestamp', 'period', 'tracking.player.id.skillcorner_x', 'tracking.player.id.skillcorner_y', 'joint_resp']

defender_dyads_network = joint_df.groupby(['tracking.object_id_x', 'tracking.object_id_y']).agg(
    joint_responsibility_mean=('joint_resp', 'mean'),
    joint_responsibility_sum=('joint_resp', 'sum'),
    group_size=('joint_resp', 'size')
    ).reset_index()

In [16]:
defender_dyads_network.sample(5)

Unnamed: 0,tracking.object_id_x,tracking.object_id_y,joint_responsibility_mean,joint_responsibility_sum,group_size
10,11630,16320,0.738829,30.292009,41
5,11516,11606,0.792386,22.186806,28
2,7863,24466,0.574925,3.449551,6
12,14407,24466,0.511314,4.601827,9
9,11606,16320,0.658418,7.242601,11
