## __Import__

In [74]:
import pandas as pd
import numpy as np
import math
from statsbombpy import sb
from ast import literal_eval
from dataloader import load_data
from mplsoccer import Pitch, VerticalPitch, Standardizer


def load_event_data(match=False):
    if match:
        COMPETITION_ID = 9
        SEASON_ID = 27
        MATCH_ID = 3890561 
        df = sb.events(match_id=MATCH_ID)
        return df
    else:
        df = load_data(frac=(0.5))
        return df

MATCH=False
df = load_event_data(match=MATCH)

[2025-03-04 14:52:34] File exists
[2025-03-04 14:52:34] Loading data form local file system


  df = pd.read_csv(file_path).sample(frac=frac, random_state=42)


[2025-03-04 14:52:44] File loaded with shape: (534933, 115)


In [75]:
columns = ["player","location","type","under_pressure" ,"goalkeeper_position", "goalkeeper_technique","goalkeeper_body_part","goalkeeper_type","goalkeeper_outcome"]
df = df.loc[(df["type"]=="Goal Keeper"),columns]
df 

Unnamed: 0,player,location,type,under_pressure,goalkeeper_position,goalkeeper_technique,goalkeeper_body_part,goalkeeper_type,goalkeeper_outcome
411161,Christian Mathenia,"[7.7, 47.2]",Goal Keeper,,,,,Keeper Sweeper,Claim
494861,Roman Bürki,"[4.2, 40.5]",Goal Keeper,,Set,,,Shot Faced,
741502,Rune Almenning Jarstein,"[3.3, 34.2]",Goal Keeper,,,,,Punch,Punched out
942205,Roman Bürki,"[1.2, 38.7]",Goal Keeper,,Set,,,Shot Faced,
865408,Diego Orlando Benaglio,"[2.3, 39.7]",Goal Keeper,,Set,Diving,,Goal Conceded,No Touch
...,...,...,...,...,...,...,...,...,...
42756,Marwin Hitz,"[15.1, 37.9]",Goal Keeper,,Set,,,Shot Faced,
963278,Loris Karius,"[6.4, 41.6]",Goal Keeper,,Set,Diving,Right Hand,Shot Saved,Touched Out
505582,Ralf Fährmann,"[3.5, 36.7]",Goal Keeper,,Set,,,Shot Faced,
567982,Marwin Hitz,"[1.8, 41.6]",Goal Keeper,,Set,,,Shot Faced,


## __Setting types__

In [76]:
## __Setting types__
def convert_to_list(input_data):
    if(isinstance(input_data, str)):
        try:
            return literal_eval(input_data)
        except (ValueError, SyntaxError):
            print(f"Error: The string {input_data} could not be converted to a list.")
            return None
    
    return input_data
    
df["location"] = df["location"].apply(convert_to_list)
df[["x", "y"]] = df["location"].apply(pd.Series)
df["under_pressure"] = df["under_pressure"] == True
df

Unnamed: 0,player,location,type,under_pressure,goalkeeper_position,goalkeeper_technique,goalkeeper_body_part,goalkeeper_type,goalkeeper_outcome,x,y
411161,Christian Mathenia,"[7.7, 47.2]",Goal Keeper,False,,,,Keeper Sweeper,Claim,7.7,47.2
494861,Roman Bürki,"[4.2, 40.5]",Goal Keeper,False,Set,,,Shot Faced,,4.2,40.5
741502,Rune Almenning Jarstein,"[3.3, 34.2]",Goal Keeper,False,,,,Punch,Punched out,3.3,34.2
942205,Roman Bürki,"[1.2, 38.7]",Goal Keeper,False,Set,,,Shot Faced,,1.2,38.7
865408,Diego Orlando Benaglio,"[2.3, 39.7]",Goal Keeper,False,Set,Diving,,Goal Conceded,No Touch,2.3,39.7
...,...,...,...,...,...,...,...,...,...,...,...
42756,Marwin Hitz,"[15.1, 37.9]",Goal Keeper,False,Set,,,Shot Faced,,15.1,37.9
963278,Loris Karius,"[6.4, 41.6]",Goal Keeper,False,Set,Diving,Right Hand,Shot Saved,Touched Out,6.4,41.6
505582,Ralf Fährmann,"[3.5, 36.7]",Goal Keeper,False,Set,,,Shot Faced,,3.5,36.7
567982,Marwin Hitz,"[1.8, 41.6]",Goal Keeper,False,Set,,,Shot Faced,,1.8,41.6


## __Generate Features__

In [77]:
def analyze_defending(df):
    """
    This function Pre-compute all conditions for Vectorize operations.
    Returns results grouped by player and under_pressure  
    """
    # Pre-compute all conditions
    df_with_flags = df.copy()

    # Action flags
    df_with_flags["is_shot_on_target"] = (df["goalkeeper_type"] =="Goal Conceded") | (df["goalkeeper_type"] =="Shot Saved")
    df_with_flags["goal_conceded"] = df["goalkeeper_type"] =="Goal Conceded"
    df_with_flags["is_dive"] = df["goalkeeper_technique"] =="Diving"
    df_with_flags["is_shot_saved"] = df["goalkeeper_type"] =="Shot Saved"
    df_with_flags["is_sweeper"] = df["goalkeeper_type"] =="Keeper Sweeper"
    df_with_flags["is_standing"] = df["goalkeeper_technique"] =="Standing"
    df_with_flags["is_penalty"] = (df["goalkeeper_type"] =="Penalty Conceded") | (df["goalkeeper_type"] =="Penalty Saved") | (df["goalkeeper_type"] =="Penalty Saved To Post")
    df_with_flags["is_penalty_saved"] = (df["goalkeeper_type"] =="Penalty Saved") | (df["goalkeeper_type"] =="Penalty Saved To Post")

    # location flags
    df_with_flags["distance_to_goal"] = np.sqrt( (df["x"] - 0)**2 + (df["y"] - 40)**2)

    # combinations
    df_with_flags["is_saved_shot"] = df_with_flags["is_shot_on_target"] & df_with_flags["is_shot_saved"]
    df_with_flags["is_saved_from_dive"] = df_with_flags["is_saved_shot"] & df_with_flags["is_dive"]
    df_with_flags["is_saved_from_standing"] = df_with_flags["is_saved_shot"] & df_with_flags["is_standing"]
    df_with_flags["is_sweeper_and_clears_ball"] = (df["goalkeeper_type"] =="Keeper Sweeper") & (df["goalkeeper_outcome"] =="Clear")
    df_with_flags["is_sweeper_and_collects_ball"] = (df["goalkeeper_type"] =="Keeper Sweeper") & (df["goalkeeper_outcome"] =="Claim")

    player_under_pressure_grouping = df_with_flags.groupby(['player',"under_pressure"]).agg(
            # general actions
            actions_total=('player', 'count'),
            goals_conceded=("goal_conceded","sum"), # doesnt account own goals
            # shots and saves
            shots_on_target=("is_shot_on_target","sum"),
            saved_shots=("is_saved_shot","sum"),
            saved_shots_dive=("is_saved_from_dive","sum"),
            saved_shots_standing=("is_saved_from_standing","sum"),
            # penalties
            penalty_total=("is_penalty","sum"),
            penalty_saved=("is_penalty_saved","sum"),
            # sweeper
            sweeper_total=("is_sweeper","sum"),
            sweeper_clears_ball=("is_sweeper_and_clears_ball","sum"),
            sweeper_collects_ball=("is_sweeper_and_collects_ball","sum"),
            # distance to goal
            avg_distance_to_goal=("distance_to_goal",lambda x: (x).mean()),
            avg_distance_to_goal_saved_shots=("distance_to_goal", lambda x: x[df_with_flags["is_saved_shot"] == True].mean()),
            avg_distance_to_goal_goals_conceded=("distance_to_goal", lambda x: x[df_with_flags["goal_conceded"] == True].mean()),

    )

    total_stats = player_under_pressure_grouping.groupby('player').sum()

    return total_stats

analyze_defending(df)

Unnamed: 0_level_0,actions_total,goals_conceded,shots_on_target,saved_shots,saved_shots_dive,saved_shots_standing,penalty_total,penalty_saved,sweeper_total,sweeper_clears_ball,sweeper_collects_ball,avg_distance_to_goal,avg_distance_to_goal_saved_shots,avg_distance_to_goal_goals_conceded
player,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
Alexander Manninger,4,0,1,1,1,0,0,0,0,0,0,2.907528,0.6,0.0
Andreas Hirzel,3,0,1,1,0,1,1,0,0,0,0,5.176439,10.728001,0.0
Bernd Leno,211,15,58,43,2,41,2,0,24,7,17,25.61719,4.151967,4.974309
Christian Mathenia,281,23,72,49,8,41,1,0,25,6,19,25.311669,4.196075,4.755131
Dario Kresic,5,1,2,1,0,1,0,0,0,0,0,4.821701,3.605551,12.47758
David Yelldell,9,0,1,1,0,1,0,0,1,0,1,6.518589,3.7,0.0
Diego Orlando Benaglio,153,16,51,35,2,33,1,0,11,2,9,21.987767,4.264035,4.197968
Felix Wiedwald,272,25,86,61,7,54,5,0,16,6,10,30.460376,9.649692,3.555248
Jaroslav Drobný,82,7,23,16,1,15,0,0,6,1,5,5.256829,4.172658,6.071453
Jens Grahl,6,1,5,4,0,4,0,0,0,0,0,7.276991,3.419144,26.655206
