In [12]:
#1 IMPORTS

import numpy as np
import pandas as pd
from statsbombpy import sb
from mplsoccer import Pitch
import matplotlib.pyplot as plt

In [13]:
#2 DATA LOADING

# Identify EURO 2024 in the statsbomb competitions dataset
competitions = sb.competitions()
competitions_24 = competitions[competitions['season_name'] == '2024']

# View all matches from EURO 2024
sb.matches(competition_id=55, season_id=282)

# Select a match (Netherlands v England) 
MATCH_ID = 3942819

# Create dataframe for the event data from the match
match_events_df = sb.events(match_id=MATCH_ID)

# Find the corresponding 360 data for the match and create a dataframe 
match_360_df = pd.read_json(f'C:/Users/conno/Documents/Education/GIS/Applied Research Project/open-data/data/three-sixty/{MATCH_ID}.json')

# Combine the dataframes to add the 360 data to relevant event data rows
df = pd.merge(left = match_events_df, right = match_360_df, left_on = 'id', right_on = 'event_uuid', how = 'left')

In [14]:
#3 DATA CLEANING

# Keep only relevant columns
columns_to_keep = [
    'match_id', 'period', 'minute', 'team', 'possession_team', 'possession_team_id',
    'player', 'type', 'location', 'play_pattern', 'possession', 'counterpress',
    'freeze_frame', 'visible_area'
]
df = df[columns_to_keep].copy()

# Keep only relevant event types
relevant_event_types = [
    'Pass', 'Carry', 'Pressure', 'Duel', 'Shot', 'Ball Receipt',
    'Miscontrol', 'Dispossessed', '50/50', 'Block', 'Clearance',
    'Foul Committed', 'Interception', 'Dribbled Past'
]
df = df[df['type'].apply(lambda x: x.get('name') if isinstance(x, dict) else None).isin(relevant_event_types)]

In [15]:
#4 FEATURE EXTRACTION

# Extract statsbomb dictionary fields
def extract_statsbomb_fields(df):
    # PASS fields
    df['pass_length'] = df['pass'].apply(lambda x: x.get('length') if isinstance(x, dict) else None)
    df['pass_angle'] = df['pass'].apply(lambda x: x.get('angle') if isinstance(x, dict) else None)
    df['pass_height'] = df['pass'].apply(lambda x: x.get('height', {}).get('name') if isinstance(x, dict) else None)
    df['pass_outcome'] = df['pass'].apply(lambda x: x.get('outcome', {}).get('name') if isinstance(x, dict) else None)
    df['pass_end_location'] = df['pass'].apply(lambda x: x.get('end_location') if isinstance(x, dict) else None)

    # CARRY fields
    df['carry_end_location'] = df['carry'].apply(lambda x: x.get('end_location') if isinstance(x, dict) else None)

    # SHOT fields
    df['shot_statsbomb_xg'] = df['shot'].apply(lambda x: x.get('statsbomb_xg') if isinstance(x, dict) else None)
    df['shot_outcome'] = df['shot'].apply(lambda x: x.get('outcome', {}).get('name') if isinstance(x, dict) else None)

    # DUEL fields
    df['duel_type'] = df['duel'].apply(lambda x: x.get('type', {}).get('name') if isinstance(x, dict) else None)
    df['duel_outcome'] = df['duel'].apply(lambda x: x.get('outcome', {}).get('name') if isinstance(x, dict) else None)

    # BALL RECEIPT fields
    df['ball_receipt_outcome'] = df['ball_receipt'].apply(lambda x: x.get('outcome', {}).get('name') if isinstance(x, dict) else None)

    # PLAY PATTERN
    df['play_pattern_name'] = df['play_pattern'].apply(lambda x: x.get('name') if isinstance(x, dict) else None)

    # TYPE name for filtering
    df['event_type'] = df['type'].apply(lambda x: x.get('name') if isinstance(x, dict) else None)

# Calculate attacking direction and flip 2nd half x-coordinates 
def align_team_attacking_direction(df):
    df['team_name'] = df['team'].apply(lambda x: x['name'] if isinstance(x, dict) else None)

    # Create base x/y from location
    df['location_x'] = df['location'].apply(lambda x: x[0] if isinstance(x, list) and len(x) == 2 else np.nan)
    df['location_y'] = df['location'].apply(lambda x: x[1] if isinstance(x, list) and len(x) == 2 else np.nan)
    df['pass_end_x'] = df['pass_end_location'].apply(lambda x: x[0] if isinstance(x, list) and len(x) == 2 else np.nan)
    df['pass_end_y'] = df['pass_end_location'].apply(lambda x: x[1] if isinstance(x, list) and len(x) == 2 else np.nan)

    # Infer attacking direction for each team in 1st half
    direction_map = {}
    teams = df['team_name'].dropna().unique()

    for team in teams:
        team_passes = df[
            (df['event_type'] == 'Pass') &
            (df['team_name'] == team) &
            (df['period'] == 1)
        ]

        # Calculate mean x difference
        mean_direction = (team_passes['pass_end_x'] - team_passes['location_x']).mean()

        # If mean_direction > 0 → team is attacking left → right in 1st half
        direction_map[team] = 'left_to_right' if mean_direction > 0 else 'right_to_left'

    # Flip coordinates to align to left-to-right
    def flip_x(row):
        team = row['team_name']
        period = row['period']
        attacking_ltr_first_half = direction_map.get(team)

        flip = (
            (attacking_ltr_first_half == 'right_to_left' and period == 1) or
            (attacking_ltr_first_half == 'left_to_right' and period == 2)
        )

        if flip:
            return 120 - row['location_x'], 120 - row['pass_end_x'] if not pd.isna(row['pass_end_x']) else np.nan
        else:
            return row['location_x'], row['pass_end_x']

    # Apply flip logic
    flipped_coords = df.apply(flip_x, axis=1, result_type='expand')
    df['location_x_aligned'] = flipped_coords[0]
    df['pass_end_x_aligned'] = flipped_coords[1]

# Assign pitch zones
def assign_pitch_zones(df):
    def get_zone(x, y):
        if pd.isna(x) or pd.isna(y):
            return None

        # Defensive third
        if 0 <= x < 40:
            if 18 <= y <= 62:
                return 'Defensive central'
            else:
                return 'Defensive wide'

        # Middle third
        elif 40 <= x < 80:
            if 18 <= y <= 62:
                return 'Midfield central'
            else:
                return 'Midfield wide'

        # Attacking third
        elif 80 <= x <= 120:
            if 30 <= y <= 50 and 80 <= x < 102:
                return 'Attacking central low'
            elif y > 62 or y < 18:
                return 'Attacking wide'
            elif (18 <= y < 30) or (50 < y <= 62):
                return 'Half space'
            elif 30 <= y <= 50 and x >= 102:
                return 'Attacking central high'

        return None

    df['end_zone'] = df.apply(
        lambda row: get_zone(row['pass_end_x_aligned'], row['pass_end_y']), axis=1
    )

# Assign the pass start and end 1/3 zones 
def assign_pitch_thirds(df):
    def get_third(x):
        if pd.isna(x):
            return None
        elif x < 40:
            return 'Defensive Third'
        elif x < 80:
            return 'Middle Third'
        else:
            return 'Attacking Third'

    # Start third from event location
    df['start_third'] = df['location_x_aligned'].apply(get_third)

    # End third from pass end or carry end
    df['end_third'] = df['pass_end_x_aligned'].apply(get_third)

In [17]:
#4 OUTPUT A SUMMARY DATAFRAME

def generate_team_summary(df, match_id):
    team_rows = []
    teams = df['team'].apply(lambda x: x['name'] if isinstance(x, dict) else None).dropna().unique()

    for team in teams:
        team_df = df[df['team'].apply(lambda x: x['name'] if isinstance(x, dict) else None) == team]

        # Calculate metrics (to be implemented)
        metrics = {
            'team': team,
            'match_id': match_id,
        }

        team_rows.append(metrics)

    return pd.DataFrame(team_rows)

In [None]:
#5 CALCULATE METRICS

# Total mins played 