In [None]:
import pandas as pd
import numpy as np
import os
from mplsoccer.pitch import Pitch
from shapely.geometry import MultiPoint, Polygon, Point

Load the data

In [None]:
STATSBOMB = os.path.join('..', '..', 'data', 'statsbomb')
df_statsbomb_event = pd.read_parquet(os.path.join(STATSBOMB, 'event.parquet'))
df_statsbomb_freeze = pd.read_parquet(os.path.join(STATSBOMB, 'freeze.parquet'))

Filter shots

In [None]:
df_statsbomb_shot = df_statsbomb_event[df_statsbomb_event.type_name == 'Shot'].copy()

# Features based on StatsBomb freeze frame

Features based on freeze frame - this takes a while as looping over 20k+ shots:
- space around goaly
- space around shooter
- number of players in shot angle to goal

Filter out penalty goals from freeze frames

In [None]:
non_penalty_id = df_statsbomb_shot.loc[(df_statsbomb_shot.shot_type_name != 'Penalty'), 'id']
df_statsbomb_freeze = df_statsbomb_freeze[df_statsbomb_freeze.id.isin(non_penalty_id)].copy()

Add the shot taker to the freeze frame

In [None]:
cols_to_keep = ['id','player_id','player_name','position_id','position_name','x','y','match_id']
freeze_ids = df_statsbomb_freeze.id.unique()
df_shot_taker = df_statsbomb_shot.loc[df_statsbomb_shot.id.isin(freeze_ids), cols_to_keep].copy()
df_shot_taker['player_teammate'] = True
df_shot_taker['event_freeze_id'] = 0
df_shot_taker.rename({'position_id': 'player_position_id', 'position_name': 'player_position_name'}, axis=1, inplace=True)
df_statsbomb_freeze = pd.concat([df_statsbomb_freeze, df_shot_taker])

Calculate features

In [None]:
statsbomb_pitch = Pitch()

# store the results in lists
area_goal = []
area_shot = []
n_angle = []

# loop through the freeze frames create a voronoi and calculate the area around the goalkeeper/ shot taker
for shot_id in df_statsbomb_freeze.id.unique():
    subset = df_statsbomb_freeze.loc[df_statsbomb_freeze.id == shot_id,
                                     ['x', 'y', 'player_teammate', 'event_freeze_id', 
                                      'player_position_id','player_position_name']].copy()
    team1, team2 = statsbomb_pitch.voronoi(subset.x, subset.y, subset.player_teammate)
    subset['rank'] = subset.groupby('player_teammate')['x'].cumcount()
    
    # goal keeper voronoi
    if (subset.player_position_name=='Goalkeeper').sum() > 0:
        goalkeeper_voronoi = team2[subset.loc[subset.player_position_id == 1, 'rank'].values[0]]
        area_goal.append(Polygon(goalkeeper_voronoi).area)
    else:
        area_goal.append(0)
    
    # shot voronoi
    shot_taker_voronoi = team1[subset.loc[subset.event_freeze_id == 0, 'rank'].values[0]]
    area_shot.append(Polygon(shot_taker_voronoi).area)
    
    # calculate number of players in the angle to the goal
    shot_taker = subset.loc[subset.event_freeze_id == 0, ['x', 'y']]
    verts = np.zeros((3, 2))
    verts[0, 0] = shot_taker.x
    verts[0, 1] = shot_taker.y
    verts[1:, :] = statsbomb_pitch.goal_right
    angle = Polygon(verts).buffer(0)  # the angle to the goal polygon, buffer added as sometimes shot is on the goal line
    players = MultiPoint(subset.loc[subset.event_freeze_id!=0, ['x', 'y']].values.tolist())  # points for players
    intersection = players.intersection(angle)  # intersection between angle and players
    if isinstance(intersection, MultiPoint):  # calculate number of players
        n_players = len(players.intersection(angle))
    elif isinstance(intersection, Point):
        n_players = 1
    else:
        n_players = 0
    n_angle.append(n_players)
    
# create a dataframe
df_freeze_features = pd.DataFrame({'id': df_statsbomb_freeze.id.unique(), 'area_shot': area_shot,
                                   'area_goal': area_goal, 'n_angle': n_angle})

Add on goalkeeper position

In [None]:
gk_position = df_statsbomb_freeze.loc[(df_statsbomb_freeze.player_position_name == 'Goalkeeper') &
                                      (df_statsbomb_freeze.player_teammate == False), ['id', 'x', 'y']]
gk_position.rename({'x': 'goalkeeper_x','y': 'goalkeeper_y'}, axis=1, inplace=True)
df_freeze_features = df_freeze_features.merge(gk_position, how='left', on='id', validate='1:1')

Save features

In [None]:
df_freeze_features.to_parquet(os.path.join(STATSBOMB, 'freeze_features.parquet'))
df_freeze_features.info()