In [1]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta

pd.set_option('display.max_columns', None)

In [2]:
DATA_PATH = '../data/'
PASSES_DF_PATH = DATA_PATH + 'passes_df.pkl'
WYSCOUT_TO_SKILLCORNER = DATA_PATH + 'wyscout2skillcorner.csv'

In [3]:
def responsibility(row, max_distance=10, pass_length_factor=1.0, end_location_factor=0.5):
    
    start_x = row['location.x']
    start_y = row['location.y']
    end_x = row['pass.endLocation.x']
    end_y = row['pass.endLocation.y']
    
    player_x = row['tracking.x']
    player_y = row['tracking.y']
    
    # Vector from start to end of the pass
    pass_vector = np.array([end_x - start_x, end_y - start_y])
    # Vector from start of the pass to the player's position
    player_vector = np.array([player_x - start_x, player_y - start_y])
    
    pass_length = np.linalg.norm(pass_vector)
    
    
    if pass_length > 0:
        pass_unit_vector = pass_vector / pass_length
        projection_length = np.dot(player_vector, pass_unit_vector)
        
        # Clamp projection_length to the range [0, pass_length] to account for the endpoints of the pass
        projection_length = max(0, min(projection_length, pass_length))
        
        # Find the point on the pass closest to the player
        closest_point = np.array([start_x, start_y]) + projection_length * pass_unit_vector
        
        # Distance from player to the closest point on the pass
        distance_to_pass = np.linalg.norm(np.array([player_x, player_y]) - closest_point)
    else:
        # If the pass length is zero, set distance to the player's distance from the start point
        distance_to_pass = np.linalg.norm(np.array([player_x, player_y]) - np.array([start_x, start_y]))
    
    # Calculate distance from the player to the end location of the pass
    distance_to_end_location = np.linalg.norm(np.array([player_x, player_y]) - np.array([end_x, end_y]))
    
    # Calculate responsibility with pass length and distance to pass scaling
    if distance_to_pass < max_distance and row['tracking.object_id'] != -1:
        """
        # Apply scaling based on the distance to the pass and pass length
        raw_responsibility = (1 - (distance_to_pass / max_distance)) * np.power(1 + pass_length_factor, pass_length)
        
        # Normalize the responsibility to be between 0 and 1
        max_possible_responsibility = np.power(1 + pass_length_factor, pass_length)
        responsibility_score = raw_responsibility / max_possible_responsibility
        
        # Adjust the responsibility based on the distance to the end location
        
        proximity_to_end = 1 - (distance_to_end_location / max_distance)
        end_location_adjustment = (1 + end_location_factor * proximity_to_end)
        responsibility_score *= end_location_adjustment
        """
        raw_responsibility = (1 - (distance_to_pass / max_distance)) * np.power(1 + pass_length_factor, pass_length)
        
        # Normalize the responsibility to be between 0 and 1
        max_possible_responsibility = np.power(1 + pass_length_factor, pass_length)
        responsibility_score = raw_responsibility / max_possible_responsibility
        
        
    else:
        # If the player is too far, they get no responsibility
        responsibility_score = 0
        
    
        
    return responsibility_score

In [4]:
passes_df = pd.read_pickle(PASSES_DF_PATH)

In [5]:
passes_df['responsibility'] = passes_df.apply(responsibility, axis=1, pass_length_factor = 1)

cols_to_display = ['player.id', 'tracking.object_id', 'location.x', 'location.y', 'pass.endLocation.x', 
                   'pass.endLocation.y','tracking.x', 'tracking.y', 'responsibility', 'dxt']
passes_df[cols_to_display].head(23)

Unnamed: 0,player.id,tracking.object_id,location.x,location.y,pass.endLocation.x,pass.endLocation.y,tracking.x,tracking.y,responsibility,dxt
0,118,7604,51,50,42,83,-41.03,-0.05,0.0,-0.004162
1,118,16320,51,50,42,83,-15.86,-7.61,0.0,-0.004162
2,118,11516,51,50,42,83,-16.08,2.42,0.0,-0.004162
3,118,11630,51,50,42,83,-14.54,-18.87,0.0,-0.004162
4,118,11606,51,50,42,83,-14.08,17.56,0.0,-0.004162
5,118,12218,51,50,42,83,-9.69,0.05,0.0,-0.004162
6,118,277,51,50,42,83,-7.61,9.58,0.0,-0.004162
7,118,946,51,50,42,83,-6.2,-6.81,0.0,-0.004162
8,118,5922,51,50,42,83,-1.21,-19.48,0.0,-0.004162
9,118,6028,51,50,42,83,-0.19,22.92,0.0,-0.004162


In [6]:
wyscout2skillcorner = pd.read_csv(WYSCOUT_TO_SKILLCORNER).drop(columns='id')

In [9]:
# First, merge id_map with joined_df on player.id (WyScout player IDs) to get the team_name_sk for player_skillcorner_id
passes_df = passes_df.merge(wyscout2skillcorner[['player_id_wy', 'player_id_sk', 'team_name_sk']],
                            left_on='player.id', 
                            right_on='player_id_wy', 
                            how='left')

# Rename the columns for clarity
passes_df.rename(columns={'player_id_sk': 'player_skillcorner_id', 'team_name_sk': 'team_name_skillcorner'}, inplace=True)

# Now merge id_map with object_id (which is SkillCorner ID) to get the team_name_sk for object_id
passes_df = passes_df.merge(wyscout2skillcorner[['player_id_sk', 'team_name_sk']],
                            left_on='tracking.object_id', 
                            right_on='player_id_sk', 
                            how='left')

# Rename the columns for clarity
passes_df.rename(columns={'team_name_sk': 'team_name_object'}, inplace=True) 

# Finally, create a new column to check if both players are on the same team
passes_df['same_team'] = passes_df['team_name_skillcorner'] == passes_df['team_name_object']

# You can drop any unnecessary columns if needed
passes_df.drop(columns=['player_id_wy', 'player_id_sk'], inplace=True)

In [10]:
passes_df.columns

Index(['matchId', 'matchTimestamp', 'team.id', 'team.name', 'player.id',
       'player.name', 'opponentTeam.id', 'opponentTeam.name',
       'pass.recipient.id', 'pass.recipient.name', 'location.x', 'location.y',
       'pass.endLocation.x', 'pass.endLocation.y', 'dxt', 'tracking.object_id',
       'tracking.x', 'tracking.y', 'tracking.z', 'tracking.x_norm',
       'tracking.y_norm', 'tracking.team_name', 'tracking.player_id',
       'tracking.player_first_name', 'tracking.player_last_name',
       'responsibility', 'player_skillcorner_id', 'team_name_skillcorner',
       'team_name_object', 'same_team'],
      dtype='object')

In [11]:
#joined_df['same_team'] = joined_df['team.name'] == joined_df['team_name_sk_y']
cols_to_display.append('team.name')
cols_to_display.append('team_name_object')
cols_to_display.append('same_team')
passes_df[cols_to_display].head(23)

Unnamed: 0,player.id,tracking.object_id,location.x,location.y,pass.endLocation.x,pass.endLocation.y,tracking.x,tracking.y,responsibility,dxt,team.name,team_name_object,same_team
0,118,7604,51,50,42,83,-41.03,-0.05,0.0,-0.004162,Netherlands,France,False
1,118,16320,51,50,42,83,-15.86,-7.61,0.0,-0.004162,Netherlands,France,False
2,118,11516,51,50,42,83,-16.08,2.42,0.0,-0.004162,Netherlands,France,False
3,118,11630,51,50,42,83,-14.54,-18.87,0.0,-0.004162,Netherlands,France,False
4,118,11606,51,50,42,83,-14.08,17.56,0.0,-0.004162,Netherlands,France,False
5,118,12218,51,50,42,83,-9.69,0.05,0.0,-0.004162,Netherlands,France,False
6,118,277,51,50,42,83,-7.61,9.58,0.0,-0.004162,Netherlands,France,False
7,118,946,51,50,42,83,-6.2,-6.81,0.0,-0.004162,Netherlands,France,False
8,118,5922,51,50,42,83,-1.21,-19.48,0.0,-0.004162,Netherlands,France,False
9,118,6028,51,50,42,83,-0.19,22.92,0.0,-0.004162,Netherlands,France,False


In [12]:
passes_df['responsibility'] = np.where(passes_df['same_team'], 0, passes_df['responsibility'])
passes_df.head(23)

Unnamed: 0,matchId,matchTimestamp,team.id,team.name,player.id,player.name,opponentTeam.id,opponentTeam.name,pass.recipient.id,pass.recipient.name,location.x,location.y,pass.endLocation.x,pass.endLocation.y,dxt,tracking.object_id,tracking.x,tracking.y,tracking.z,tracking.x_norm,tracking.y_norm,tracking.team_name,tracking.player_id,tracking.player_first_name,tracking.player_last_name,responsibility,player_skillcorner_id,team_name_skillcorner,team_name_object,same_team
0,5414111,00:00:01.912,664,Netherlands,118,M. Depay,4418,France,625,M. de Roon,51,50,42,83,-0.004162,7604,-41.03,-0.05,0.0,89.07619,49.926471,France,7604,Mike,Maignan,0.0,7387,Netherland,France,False
1,5414111,00:00:01.912,664,Netherlands,118,M. Depay,4418,France,625,M. de Roon,51,50,42,83,-0.004162,16320,-15.86,-7.61,0.0,65.104762,38.808824,France,16320,Ibrahima,Konaté,0.0,7387,Netherland,France,False
2,5414111,00:00:01.912,664,Netherlands,118,M. Depay,4418,France,625,M. de Roon,51,50,42,83,-0.004162,11516,-16.08,2.42,0.0,65.314286,53.558824,France,11516,Dayot,Upamecano,0.0,7387,Netherland,France,False
3,5414111,00:00:01.912,664,Netherlands,118,M. Depay,4418,France,625,M. de Roon,51,50,42,83,-0.004162,11630,-14.54,-18.87,0.0,63.847619,22.25,France,11630,Jules,Koundé,0.0,7387,Netherland,France,False
4,5414111,00:00:01.912,664,Netherlands,118,M. Depay,4418,France,625,M. de Roon,51,50,42,83,-0.004162,11606,-14.08,17.56,0.0,63.409524,75.823529,France,11606,Theo,Hernández,0.0,7387,Netherland,France,False
5,5414111,00:00:01.912,664,Netherlands,118,M. Depay,4418,France,625,M. de Roon,51,50,42,83,-0.004162,12218,-9.69,0.05,0.0,59.228571,50.073529,France,12218,Aurélien,Tchouaméni,0.0,7387,Netherland,France,False
6,5414111,00:00:01.912,664,Netherlands,118,M. Depay,4418,France,625,M. de Roon,51,50,42,83,-0.004162,277,-7.61,9.58,0.0,57.247619,64.088235,France,277,Adrien,Rabiot,0.0,7387,Netherland,France,False
7,5414111,00:00:01.912,664,Netherlands,118,M. Depay,4418,France,625,M. de Roon,51,50,42,83,-0.004162,946,-6.2,-6.81,0.0,55.904762,39.985294,France,946,Antoine,Griezmann,0.0,7387,Netherland,France,False
8,5414111,00:00:01.912,664,Netherlands,118,M. Depay,4418,France,625,M. de Roon,51,50,42,83,-0.004162,5922,-1.21,-19.48,0.0,51.152381,21.352941,France,5922,Kingsley,Coman,0.0,7387,Netherland,France,False
9,5414111,00:00:01.912,664,Netherlands,118,M. Depay,4418,France,625,M. de Roon,51,50,42,83,-0.004162,6028,-0.19,22.92,0.0,50.180952,83.705882,France,6028,Kylian,Mbappé Lottin,0.0,7387,Netherland,France,False


In [13]:
passes_df

Unnamed: 0,matchId,matchTimestamp,team.id,team.name,player.id,player.name,opponentTeam.id,opponentTeam.name,pass.recipient.id,pass.recipient.name,location.x,location.y,pass.endLocation.x,pass.endLocation.y,dxt,tracking.object_id,tracking.x,tracking.y,tracking.z,tracking.x_norm,tracking.y_norm,tracking.team_name,tracking.player_id,tracking.player_first_name,tracking.player_last_name,responsibility,player_skillcorner_id,team_name_skillcorner,team_name_object,same_team
0,5414111,00:00:01.912,664,Netherlands,118,M. Depay,4418,France,625,M. de Roon,51,50,42,83,-0.004162,7604,-41.03,-0.05,0.0,89.076190,49.926471,France,7604,Mike,Maignan,0.0,7387,Netherland,France,False
1,5414111,00:00:01.912,664,Netherlands,118,M. Depay,4418,France,625,M. de Roon,51,50,42,83,-0.004162,16320,-15.86,-7.61,0.0,65.104762,38.808824,France,16320,Ibrahima,Konaté,0.0,7387,Netherland,France,False
2,5414111,00:00:01.912,664,Netherlands,118,M. Depay,4418,France,625,M. de Roon,51,50,42,83,-0.004162,11516,-16.08,2.42,0.0,65.314286,53.558824,France,11516,Dayot,Upamecano,0.0,7387,Netherland,France,False
3,5414111,00:00:01.912,664,Netherlands,118,M. Depay,4418,France,625,M. de Roon,51,50,42,83,-0.004162,11630,-14.54,-18.87,0.0,63.847619,22.250000,France,11630,Jules,Koundé,0.0,7387,Netherland,France,False
4,5414111,00:00:01.912,664,Netherlands,118,M. Depay,4418,France,625,M. de Roon,51,50,42,83,-0.004162,11606,-14.08,17.56,0.0,63.409524,75.823529,France,11606,Theo,Hernández,0.0,7387,Netherland,France,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
19707,5414111,01:33:38.826,664,Netherlands,118,M. Depay,4418,France,221069,W. Weghorst,80,17,89,42,0.064346,20191,31.49,-1.03,0.0,20.009524,48.485294,France,20191,Khéphren,Thuram-Ulien,0.0,7387,Netherland,France,False
19708,5414111,01:33:38.826,664,Netherlands,118,M. Depay,4418,France,221069,W. Weghorst,80,17,89,42,0.064346,12229,31.89,8.13,0.0,19.628571,61.955882,France,12229,Youssouf,Fofana,0.0,7387,Netherland,France,False
19709,5414111,01:33:38.826,664,Netherlands,118,M. Depay,4418,France,221069,W. Weghorst,80,17,89,42,0.064346,12246,35.56,12.33,0.0,16.133333,68.132353,France,12246,Moussa,Diaby,0.0,7387,Netherland,France,False
19710,5414111,01:33:38.826,664,Netherlands,118,M. Depay,4418,France,221069,W. Weghorst,80,17,89,42,0.064346,6028,5.09,-7.70,0.0,45.152381,38.676471,France,6028,Kylian,Mbappé Lottin,0.0,7387,Netherland,France,False
