In [1]:
# Import libraries
import datetime
import pandas as pd
import numpy as np
import networkx as nx
import matplotlib.pyplot as plt
import json
from mplsoccer import Pitch, Sbopen
from typing import Literal

In [2]:
# Find sys path
import sys
sys.path.append("../src")

In [3]:
# Import custom functions
import load_data as ld
import net_analysis as net_an

## Load data

In [4]:
# Read config json file and define parameters
with open("../config.json", "r") as file:
    params = json.load(file)

# Load data and get match data
parser, match_data = ld.get_match_data(params["competition"], params["season"])

# Get match info
match_info_dict, lineups = ld.get_match_info(parser, match_data, 3942819)

In [5]:
# TEST 1: Show match info and a glimpse of the lineups
print(match_info_dict)
print(lineups.head(2))

{'Home Team': 'Netherlands', 'Away Team': 'England', 'Final Score': '1-2', 'Match Date': 'July 10, 2024', 'Stage': 'Semi-finals', 'Stadium': 'Signal-Iduna-Park'}
   player_id    player_name player_nickname  jersey_number  match_id  team_id  \
0       2988  Memphis Depay   Memphis Depay             10   3942819      941   
1       3306     Nathan Aké      Nathan Aké              5   3942819      941   

     team_name  country_id country_name  
0  Netherlands         160  Netherlands  
1  Netherlands         160  Netherlands  


In [6]:
# Get match event data & phases of play
event_df, related, freeze, tactics = parser.event(3942819)
phase_bounds = ld.phase_of_play(event_df, match_info_dict["Home Team"])

In [7]:
# TEST 2: Show phases of play bounds
print(phase_bounds)

[1101, 1701, 3396]


In [8]:
# Obtain pass data for a specific phase of play and add 
passes = ld.get_passing_data(event_df, lineups, match_info_dict["Home Team"], 1, phase_bounds)
passes = ld.cosine_pass_vector(passes)

In [9]:
# TEST 3: Show passes for the Home Team after the first substitution
passes.head(2)

Unnamed: 0,x,y,end_x,end_y,pass_length,player_id,player_name,pass_recipient_id,pass_recipient_name,player_name_jersey,pass_recipient_jersey,cosine
0,47.8,66.3,65.4,55.1,19.075708,15582.0,Malen,20750.0,Gakpo,18,11,0.843661
1,4.6,42.0,30.9,24.2,29.03893,37274.0,Verbruggen,21582.0,Reijnders,1,14,0.828154


In [10]:
# Divide pitch and get filtered data for a specific region
first_bound, second_bound, mid_region_passes = ld.region_pass_filter(passes, params, "statsbomb", "mid")

# TEST 4: Show passes originating from midfield
print(f"End of defensive third: {first_bound} | Var. Type: {type(first_bound)}")
print(f"End of midfield third: {second_bound} | Var. Type: {type(second_bound)}")
print(mid_region_passes.head(2))

End of defensive third: 40.0 | Var. Type: <class 'numpy.float64'>
End of midfield third: 80.0 | Var. Type: <class 'numpy.float64'>
       x     y  end_x  end_y  pass_length  player_id player_name  \
0   47.8  66.3   65.4   55.1    19.075708    15582.0       Malen   
10  43.5  34.3   38.8   44.1     9.938397    15582.0       Malen   

    pass_recipient_id pass_recipient_name player_name_jersey  \
0             20750.0               Gakpo                 18   
10            39167.0              Simons                 18   

   pass_recipient_jersey    cosine  
0                     11  0.843661  
10                     7 -0.432432  


In [11]:
# Obtain forward passes that originated in midfield
fwd_mid_passes = ld.direction_pass_filter(mid_region_passes, "fwd")
# Obtain progressive passes from midfield to attacking third
prog_m2a_passes = ld.progressive_passes(fwd_mid_passes, "m2a", first_bound, second_bound)

# TEST 5: Show forward passes from midfield and progressive passes
print(f"Overall passes: {len(passes)}") 
print(f"Passes originating from midfield: {len(mid_region_passes)}")
print(f"Forward passes originating from midfield: {len(fwd_mid_passes)} | Progressive passes to attacking third from midfield: {len(prog_m2a_passes)}")

Overall passes: 74
Passes originating from midfield: 38
Forward passes originating from midfield: 9 | Progressive passes to attacking third from midfield: 3


### Network Analysis

In [14]:
# Create graph object for the overall data of a specific phase of play
graph = net_an.create_graph(passes)

# TEST 1: Show number of nodes, edges and a glimpse of the node attributes of a player
print(f"Number of nodes: {len(graph.nodes())} | Number of edges: {len(graph.edges())}")
print(f"Player 6 average position: {graph.nodes['6']['avg_pos']}")
print(f"Sample edge data: {graph.get_edge_data('6', '4')}")

Number of nodes: 11 | Number of edges: 44
Player 6 average position: [54.97, 60.41]
Sample edge data: {0: {'intensity': 4, 'distance': 2500.0}}


In [15]:
# Calculate node centralities
net_an.node_strength(graph, "in", "intensity")
net_an.node_strength(graph, "out", "intensity")
net_an.node_strength(graph, None, "intensity")
net_an.distance_centralities(graph, "betweenness", "distance")
net_an.distance_centralities(graph, "in-harmonic", "distance")
net_an.distance_centralities(graph, "out-harmonic", "distance")

In [17]:
# TEST 2: Get full node attributes after calculating centralities
print(graph.nodes["6"])

{'avg_pos': [54.97, 60.41], 'in-strength': 10, 'out-strength': 10, 'strength': 20, 'betweenness': 0.2722, 'in-harmonic': 0.0014, 'out-harmonic': 0.0016}


### Visualization & Statistics