## __Import__

In [None]:
import sys
import os
import pandas as pd
from ast import literal_eval


# Run the notebook from inside the notebooks folder
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__name__), '..')))

from scripts.dataloader import Dataloader

dataloader = Dataloader()
df = dataloader.get_dimension(dimension="defending")
df.columns

[2025-03-27 20:16:28] Loading data form local file system


Index(['player', 'type', 'location', 'ball_recovery_recovery_failure',
       'ball_recovery_offensive', 'bad_behaviour_card', 'foul_committed_card',
       'counterpress', 'block_offensive', 'block_deflection',
       'block_save_block', 'foul_committed_offensive',
       'foul_committed_penalty', 'foul_won_penalty', 'interception_outcome',
       'clearance_body_part', 'duel_type', 'duel_outcome', 'under_pressure'],
      dtype='object')

## __Setting types__

In [2]:
def convert_to_list(input_data):
    if(isinstance(input_data, str)):
        try:
            return literal_eval(input_data)
        except (ValueError, SyntaxError):
            print(f"Error: The string {input_data} could not be converted to a list.")
            return None
    
    return input_data
    
df["location"] = df["location"].apply(convert_to_list)
df[["x", "y"]] = df["location"].apply(pd.Series)
df["under_pressure"] = df["under_pressure"] == True
df

Unnamed: 0,player,type,location,ball_recovery_recovery_failure,ball_recovery_offensive,bad_behaviour_card,foul_committed_card,counterpress,block_offensive,block_deflection,...,foul_committed_offensive,foul_committed_penalty,foul_won_penalty,interception_outcome,clearance_body_part,duel_type,duel_outcome,under_pressure,x,y
3097,Leroy Sané,Pressure,"[55.4, 73.9]",,,,,,,,...,,,,,,,,,55.4,73.9
3098,Benedikt Höwedes,Pressure,"[47.3, 73.7]",,,,,,,,...,,,,,,,,,47.3,73.7
3099,Kevin Volland,Pressure,"[96.8, 25.5]",,,,,,,,...,,,,,,,,,96.8,25.5
3100,Mark Uth,Pressure,"[109.8, 43.8]",,,,,,,,...,,,,,,,,,109.8,43.8
3101,Pirmin Schwegler,Pressure,"[81.4, 27.2]",,,,,,,,...,,,,,,,,,81.4,27.2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1069719,Pierre-Michel Lasogga,Foul Committed,"[48.1, 55.0]",,,,,,,,...,,,,,,,,,48.1,55.0
1069720,Douglas Costa de Souza,Foul Committed,"[95.1, 75.0]",,,,,True,,,...,,,,,,,,,95.1,75.0
1069721,Arturo Erasmo Vidal Pardo,Foul Committed,"[45.9, 72.4]",,,,,,,,...,,,,,,,,,45.9,72.4
1069801,Matthias Ostrzolek,Shield,"[7.3, 22.7]",,,,,,,,...,,,,,,,,,7.3,22.7


## __Generate Features__

In [None]:
def is_in_defensive_penalty_area(x, y):
    x_axis = ((0 <= x) and (x <= 18))  # Stop before goal area
    y_axis = ((40 - 22) <= y and y <= (40 + 22))
    return x_axis and y_axis

def analyze_defending(df):
    """
    This function Pre-compute all conditions for Vectorize operations.
    Returns results grouped by player and under_pressure  
    """
    # Pre-compute all conditions
    df_with_flags = df.copy()

    # Actions
    df_with_flags["is_ball_recovery"] = df["type"] == "Ball Recovery"
    df_with_flags["is_ball_recovery_successful"] = df["ball_recovery_recovery_failure"].isna()
    df_with_flags["is_offensive_ball_recovery"] = df["ball_recovery_offensive"] == True
    df_with_flags["is_ball_recovery_failed"] = df["ball_recovery_recovery_failure"] == True
    df_with_flags["is_bad_behaviour"] = df["type"]=="Bad Behaviour"
    df_with_flags["is_yellow_card_bad_behaviour"] = (df["bad_behaviour_card"] == "Yellow Card") 
    df_with_flags["is_second_yellow_card_bad_behaviour"] = df["bad_behaviour_card"] == "Second Yellow"
    df_with_flags["is_red_card_bad_behaviour"] = df["bad_behaviour_card"] == "Red Card"
    df_with_flags["is_yellow_card_foul"] = (df["foul_committed_card"] == "Yellow Card")
    df_with_flags["is_second_yellow_card_foul"] = (df["foul_committed_card"] == "Second Yellow")
    df_with_flags["is_red_card_foul"] = (df["foul_committed_card"] == "Red Card")    
    df_with_flags["is_counterpress"] = df["counterpress"].notna()
    df_with_flags["is_pressure_on_opponent"] = df["type"] == "Pressure"
    df_with_flags["is_block"] = df["type"] == "Block"
    df_with_flags["is_block_offensive"] = df["block_offensive"] == True
    df_with_flags["is_block_ball_deflection"] = df["block_deflection"] == True
    df_with_flags["is_block_shot_on_target"] = df["block_save_block"] == True
    df_with_flags["is_clearance"] = df["type"] == "Clearance"
    df_with_flags["is_duel"] = df["type"] == "Duel"
    df_with_flags["is_foul"] = df["type"] == "Foul Committed"
    df_with_flags["is_offensive_foul"] = df["foul_committed_offensive"] == True
    df_with_flags["is_foul_penalty_resulted"] = df["foul_committed_penalty"] == True
    df_with_flags["teammate_is_fouled_in_op_penalty"] = df["foul_won_penalty"] == True
    df_with_flags["is_interception"] =(df["interception_outcome"]=="Success In Play") | (df["interception_outcome"]=="Won") 
    df_with_flags["is_shield"] = df["type"] == "Shield"


    # Location-based flags
    df_with_flags['is_attacking_third'] = df['x'] >= 80
    df_with_flags['is_middle_third'] = (80 > df['x']) & (df['x'] > 40)
    df_with_flags['is_defending_third'] = df['x'] <= 40
    df_with_flags['is_in_defending_box'] = df[["x","y"]].apply(lambda row: is_in_defensive_penalty_area(row['x'], row['y']), axis=1)

    # Combinations
    df_with_flags["ball_recovery_successful"] = df_with_flags["is_ball_recovery"] & df_with_flags["is_ball_recovery_successful"]
    df_with_flags["ball_recovery_failed"] = (df_with_flags["is_ball_recovery"])  & (df_with_flags["is_ball_recovery_failed"])
    df_with_flags["ball_recovery_offensive_successful"] = df_with_flags["is_offensive_ball_recovery"] & df_with_flags["is_ball_recovery_successful"]
    df_with_flags["block_during_counterpress"] = df_with_flags["is_block"] & df_with_flags["is_block"]
    df_with_flags["is_clearance_with_head"] = (df_with_flags["is_clearance"]) & (df["clearance_body_part"] == "Head")
    df_with_flags["is_duel_tackling"] = (df_with_flags["is_duel"]) & (df["duel_type"] == "Tackle")
    df_with_flags["is_duel_tackling_won"] = (df_with_flags["is_duel"]) & (df["duel_type"] == "Tackle") & ((df["duel_outcome"] == "Won") | (df["duel_outcome"] == "Success In Play"))
    df_with_flags["is_pressure_on_opponent_defending_third"] = df_with_flags["is_pressure_on_opponent"] & df_with_flags['is_defending_third']
    df_with_flags["is_pressure_on_opponent_middle_third"] = df_with_flags["is_pressure_on_opponent"] & df_with_flags['is_middle_third']
    df_with_flags["is_pressure_on_opponent_attacking_third"] = df_with_flags["is_pressure_on_opponent"] & df_with_flags['is_attacking_third']
    df_with_flags["is_counterpress_on_opponent_defending_third"] = df_with_flags["is_counterpress"] & df_with_flags['is_defending_third']
    df_with_flags["is_counterpress_on_opponent_middle_third"] = df_with_flags["is_counterpress"] & df_with_flags['is_middle_third']
    df_with_flags["is_counterpress_on_opponent_attacking_third"] = df_with_flags["is_counterpress"] & df_with_flags['is_attacking_third']
    df_with_flags["is_tackling_in_defending_third"] = df_with_flags["is_duel_tackling"] & df_with_flags['is_defending_third']
    df_with_flags["is_tackling_in_middle_third"] = df_with_flags["is_duel_tackling"] & df_with_flags['is_middle_third']
    df_with_flags["is_tackling_in_attacking_third"] = df_with_flags["is_duel_tackling"] & df_with_flags['is_attacking_third']

    df_with_flags["is_foul_in_defending_third"] = df_with_flags["is_foul"] & df_with_flags['is_defending_third']
    df_with_flags["is_foul_in_middle_third"] = df_with_flags["is_foul"] & df_with_flags['is_middle_third']
    df_with_flags["is_foul_in_attacking_third"] = df_with_flags["is_foul"] & df_with_flags['is_attacking_third']

    df_with_flags["total_yellow_card"] = df_with_flags["is_yellow_card_foul"] | df_with_flags["is_yellow_card_foul"] | df_with_flags["is_second_yellow_card_bad_behaviour"] | df_with_flags["is_yellow_card_bad_behaviour"]
    df_with_flags["total_red_card"] = df_with_flags["is_red_card_foul"] | df_with_flags["is_red_card_bad_behaviour"]
    

    total_stats = df_with_flags.groupby('player').agg(
        # ball recovery
        ball_recovery_total=('is_ball_recovery', 'sum'),
        ball_recovery_successful=("ball_recovery_successful","sum"),
        ball_recovery_offensive_total=("is_offensive_ball_recovery", "sum"),
        ball_recovery_offensive_successful=("ball_recovery_offensive_successful", "sum"),
        ball_recovery_failed=("ball_recovery_failed", "sum"),
        # pressure 
        pressure_on_opponent=("is_pressure_on_opponent","sum"),
        pressure_on_opponent_defending_third=("is_pressure_on_opponent_defending_third","sum"),
        pressure_on_opponent_middle_third=("is_pressure_on_opponent_middle_third","sum"),
        pressure_on_opponent_attacking_third=("is_pressure_on_opponent_attacking_third","sum"),
        # counterpressing
        counterpressing_total = ("is_counterpress","sum"),
        counterpressing_opponent_defending_third = ("is_counterpress_on_opponent_defending_third","sum"),
        counterpressing_opponent_middle_third = ("is_counterpress_on_opponent_middle_third","sum"),
        counterpressing_attacking_third = ("is_counterpress_on_opponent_attacking_third","sum"),
        # shields
        shield_total=("is_shield","sum"),
        # blocking
        block_total = ("is_block", "sum"),
        block_offensive = ("is_block_offensive", "sum"),
        block_ball_deflection = ("is_block_ball_deflection", "sum"),
        block_counterpress = ("is_block_ball_deflection", "sum"),
        block_during_counterpress =("block_during_counterpress", "sum"),
        block_shot_on_target=("is_block_shot_on_target", "sum"),
        # clearance
        clearance_total=("is_clearance","sum"),
        clearance_with_head=("is_clearance_with_head","sum"),
        # interception
        interception=("is_interception","sum"),
        # duel
        tackling=("is_duel_tackling","sum"),
        tackling_won=("is_duel_tackling_won","sum"),
        tackling_in_defending_third=("is_tackling_in_defending_third","sum"),
        tackling_in_middle_third=("is_tackling_in_middle_third","sum"),
        tackling_in_attacking_third=("is_tackling_in_attacking_third","sum"),
        # fouls
        fouls_total=("is_foul","sum"),
        fouls_in_defending_third=("is_foul_in_defending_third","sum"),
        fouls_in_middle_third=("is_foul_in_middle_third","sum"),
        fouls_in_attacking_third=("is_foul_in_attacking_third","sum"),
        fouls_offensive=("is_offensive_foul","sum"),
        fouls_lead_to_penalty=("is_foul_penalty_resulted","sum"),
        fouls_wins_a_penalty=("teammate_is_fouled_in_op_penalty","sum"),
        fouls_yellow_card=("is_yellow_card_foul", "sum"),
        fouls_second_yellow_card=("is_second_yellow_card_foul", "sum"),
        fouls_red_card=("is_red_card_foul", "sum"),
        # bad_behaviour
        bad_behaviour_total=("is_bad_behaviour", "sum"),
        bad_behaviour_yellow_card=("is_yellow_card_bad_behaviour", "sum"),
        bad_behaviour_second_yellow_card=("is_second_yellow_card_bad_behaviour", "sum"),
        bad_behaviour_red_card=("is_red_card_bad_behaviour", "sum"),
        # total cards
        total_yellow = ("total_yellow_card", "sum"),
        total_red = ("total_red_card", "sum"),
    )

    calculation_pairs = [
        ("ball_recovery_successful","ball_recovery_total","ball_recovery_successful_%"),
        ("pressure_on_opponent_defending_third","pressure_on_opponent","pressure_on_opponent_defending_third_%"),
        ("pressure_on_opponent_middle_third","pressure_on_opponent","pressure_on_opponent_middle_third_%"),
        ("pressure_on_opponent_attacking_third","pressure_on_opponent","pressure_on_opponent_attacking_third_%"),
        ("counterpressing_opponent_defending_third","counterpressing_total","counterpressing_opponent_defending_third_%"),
        ("counterpressing_opponent_middle_third","counterpressing_total","counterpressing_opponent_middle_third_%"),
        ("counterpressing_attacking_third","counterpressing_total","counterpressing_opponent_attacking_third_%"),
        ("tackling_won","tackling","tackling_success_%"),
        ("tackling_in_defending_third","tackling","tackling_in_defending_third_%"),
        ("tackling_in_middle_third","tackling","tackling_in_middle_third_%"),
        ("tackling_in_attacking_third","tackling","tackling_in_attacking_third_%"),
        ("fouls_in_defending_third","fouls_total","fouls_in_defending_third_%"),
        ("fouls_in_middle_third","fouls_total","fouls_in_middle_third_%"),
        ("fouls_in_attacking_third","fouls_total","fouls_in_attacking_third_%"),
    ]

    for a, b, c in calculation_pairs:
        total_stats[c] = (total_stats[f'{a}'] / total_stats[f'{b}'])

    return total_stats

result_df = analyze_defending(df)
result_df

Unnamed: 0_level_0,ball_recovery_total,ball_recovery_successful,ball_recovery_offensive_total,ball_recovery_offensive_successful,ball_recovery_failed,pressure_on_opponent,pressure_on_opponent_defending_third,pressure_on_opponent_middle_third,pressure_on_opponent_attacking_third,counterpressing_total,...,counterpressing_opponent_defending_third_%,counterpressing_opponent_middle_third_%,counterpressing_opponent_attacking_third_%,tackling_success_%,tackling_in_defending_third_%,tackling_in_middle_third_%,tackling_in_attacking_third_%,fouls_in_defending_third_%,fouls_in_middle_third_%,fouls_in_attacking_third_%
player,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Aaron Hunt,108,103,0,0,5,359,65,190,104,107,...,0.074766,0.495327,0.429907,0.678571,0.250000,0.571429,0.178571,0.156250,0.531250,0.312500
Adam Hloušek,8,8,0,0,0,52,32,18,2,15,...,0.266667,0.533333,0.200000,0.666667,0.666667,0.333333,0.000000,0.142857,0.857143,0.000000
Adelino André Vieira Freitas,133,130,0,0,3,296,106,149,41,95,...,0.147368,0.600000,0.252632,0.519231,0.403846,0.519231,0.076923,0.250000,0.562500,0.187500
Admir Mehmedi,76,73,1,1,3,315,51,159,105,100,...,0.040000,0.510000,0.450000,0.541667,0.250000,0.541667,0.208333,0.093750,0.593750,0.312500
Adnan Januzaj,6,6,0,0,0,37,3,16,18,14,...,0.071429,0.428571,0.500000,1.000000,0.000000,1.000000,0.000000,0.142857,0.285714,0.571429
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Özkan Yıldırım,0,0,0,0,0,2,0,1,1,2,...,0.000000,0.500000,0.500000,,,,,,,
Ørjan Håskjold Nyland,36,36,0,0,0,1,1,0,0,0,...,,,,,,,,,,
İlkay Gündoğan,180,161,1,1,19,394,81,249,64,160,...,0.081250,0.675000,0.243750,0.814815,0.296296,0.629630,0.074074,0.129032,0.709677,0.161290
Łukasz Piszczek,63,58,0,0,5,215,91,91,33,73,...,0.123288,0.479452,0.397260,0.480000,0.600000,0.360000,0.040000,0.333333,0.250000,0.416667


## __Storing data__

In [None]:
result_df.to_csv("../../data/defending.csv",index=True)