notebook to find out the amount of distance players run above certrain speed tresholds during a  match

In [1]:
from floodligt_functions import clean_metadata
from floodligt_functions import read_position_data_jsonl
from pathlib import Path
import polars as pl
from floodlight.models.kinematics import DistanceModel
from floodlight.models.kinematics import VelocityModel

In [2]:
# path to the tracking data file
filepath_position = Path("SecondSpectrum/Second Spectrum/Anderlecht - Club Brugge/tracking-produced.jsonl")

In [3]:
# path to the origibal meta data file
filepath_metadata = Path("SecondSpectrum/Second Spectrum/Anderlecht - Club Brugge/rsc-bru_metadata.json")

In [4]:
# path to the cleaned metadata file = output of clean_metadata function
filepath_cleaned_metadata = Path("cleaned_metadata.json")

In [5]:
# modifying the metadata file so it can be used as input for the floodlight function
clean_metadata(filepath_metadata)

In [6]:
# reading in the tracking / position data
position_data = read_position_data_jsonl(
    filepath_position,
    filepath_cleaned_metadata
)

Getting the data

In [7]:
# storing different possibilities for halfs and teams to be used in loops
# only HT1 is used to avoid kernel crashing 
halfs =  ["HT1"] #["HT1", "HT2"]
teams =["Home", "Away"]

In [8]:
# get list of home players
# this will be used to match player with correct position data
home_players = [player for player in position_data[3]['Home']["player"]]

# get list of away players
away_players = [player for player in position_data[3]['Away']["player"]]

In [13]:
# store a dataframe per half showing the possession status (H = Home or A = Away) 
possession_status = {}

for half in halfs:
    # access the possession data in the position data dictionary
    half_possession = position_data[1][half].code

    # Convert np.nan to None so Polars can handle them as nulls
    half_possession = [x if isinstance(x, str) else None for x in half_possession]

    # Create DataFrame
    possession_status[half] = pl.DataFrame({
        f"{half} possession": half_possession
    }).drop_nulls()

In [11]:
# store a dataframe per half showing the possession status (D = Dead or A = Alive) 
ball_status = {}

for half in halfs:
    # access the ball data in the position data dictionary
    half_ball_status = position_data[2][half].code
    
    # Convert np.nan to None so Polars can handle them as nulls
    half_ball_status = [x if isinstance(x, str) else None for x in half_ball_status]
    
    # Create DataFrame
    ball_status[half] = pl.DataFrame({
        f"{half} ball status": half_ball_status
    }).drop_nulls()

In [14]:
# get the xy coordinates of the position data
# needed as input for the floodlight position - velocity - acceleration data
xy_objects = position_data[0]

In [None]:
# Dictionary to store results per playing half
distance_dfs = {}

for half in halfs:                  
    distance_dfs[half] = {}
    
    for team in teams:
        # apply the floodlight function
        dm = DistanceModel()
        dm.fit(xy_objects[half][team])
        distance = dm.distance_covered()
        # extract only the distance data
        np_array = distance.property
        # define the player names that have to be used as column names
        schema = home_players if team == "Home" else away_players
        # store the data in a dataframe and filter out rows that entirely consist of nan values 
        distance_dfs[half][team] = pl.DataFrame(np_array, schema=schema).filter(~pl.all_horizontal(pl.all().is_nan()))

In [15]:
#distance_dfs

In [None]:
# concatenate data from the Home team, Away team, ball status & possession status
distance_df = pl.concat([distance_dfs["HT1"]['Home'],distance_dfs["HT1"]['Away'],ball_status["HT1"], possession_status["HT1"]], how = "horizontal").drop_nulls()

In [17]:
#distance_df

In [None]:
# write the distance data to a parquet file 
path = Path("floodlight_parquets/distance_HT1")
distance_df.write_parquet(path)

In [None]:

# Dictionary to store results
velocity_dfs = {}

for half in halfs:
    velocity_dfs[half] = {}
    
    for team in teams:
        # apply the floodlight function
        vm = VelocityModel()
        vm.fit(xy_objects[half][team])
        velocity = vm.velocity()
        # extract only the velocity data
        np_array = velocity.property
        # define the player names that have to be used as column names
        schema = home_players if team == "Home" else away_players
        # store the data in a dataframe and filter out rows that entirely consist of nan values 
        velocity_dfs[half][team] = pl.DataFrame(np_array, schema=schema).filter(~pl.all_horizontal(pl.all().is_nan()))

: 

In [None]:
#velocity_dfs

{'HT1': {'Home': shape: (76_365, 21)
  ┌─────────────┬──────────┬──────────┬──────────┬───┬────────────┬────────┬────────────┬────────────┐
  │ C.          ┆ Amuzu    ┆ M. Rits  ┆ T. Leoni ┆ … ┆ T. Degreef ┆ Thomas ┆ Timon      ┆ Anas       │
  │ Coosemans   ┆ ---      ┆ ---      ┆ ---      ┆   ┆ ---        ┆ Foket  ┆ Vanhoutte  ┆ Tajaouart  │
  │ ---         ┆ f64      ┆ f64      ┆ f64      ┆   ┆ f64        ┆ ---    ┆ ---        ┆ ---        │
  │ f64         ┆          ┆          ┆          ┆   ┆            ┆ f64    ┆ f64        ┆ f64        │
  ╞═════════════╪══════════╪══════════╪══════════╪═══╪════════════╪════════╪════════════╪════════════╡
  │ 0.5         ┆ 1.520691 ┆ 0.559017 ┆ 1.030776 ┆ … ┆ NaN        ┆ NaN    ┆ NaN        ┆ NaN        │
  │ 0.450694    ┆ 1.520691 ┆ 0.450694 ┆ 1.007782 ┆ … ┆ NaN        ┆ NaN    ┆ NaN        ┆ NaN        │
  │ 0.559017    ┆ 1.667708 ┆ 0.375    ┆ 1.0      ┆ … ┆ NaN        ┆ NaN    ┆ NaN        ┆ NaN        │
  │ 0.450694    ┆ 1.976424 ┆ 0.39528

In [None]:
# concatenate data from the Home team, Away team, ball status & possession status
velocity_df = pl.concat([velocity_dfs["HT1"]['Home'],velocity_dfs["HT1"]['Away'],ball_status["HT1"], possession_status["HT1"]], how = "horizontal").drop_nulls()
#velocity_df

C. Coosemans,Amuzu,M. Rits,T. Leoni,K. Dolberg,K. Sardella,A. Dreyer,L. Dendoncker,J. Simić,Amando Lapage,Ali Maamar,L. Vázquez,Y. Verschaeren,N. Engwanda,Mads Kikkenborg,S. Edozie,Thorgan Hazard,T. Degreef,Thomas Foket,Timon Vanhoutte,Anas Tajaouart,S. Mignolet,Ordoñez,Ferran Jutglà,K. Sabbe,C. Talbi,H. Vanaken,B. Mechele,C. Tzolis,Maxim De Cuyper,Ardon Jashari,Raphael Onyedika,Bjorn Meijer,Michal Skoras,J. Spileers,C. Nielsen,Romero,H. Vetlesen,Siquet,Jackers,Romeo Vermant,Gustaf Nilsson,HT1 ball status,HT1 possession
f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,str,str
0.5,1.520691,0.559017,1.030776,2.150581,0.25,0.75,0.353553,0.5,0.25,0.25,,,,,,,,,,,0.25,0.25,1.767767,0.25,0.25,0.25,0.0,0.25,0.25,0.790569,0.353553,,,,,,,,,,,"""D""","""H"""
0.450694,1.520691,0.450694,1.007782,2.150581,0.25,0.625,0.353553,0.395285,0.395285,0.279508,,,,,,,,,,,0.176777,0.176777,1.891593,0.279508,0.125,0.176777,0.125,0.375,0.125,0.790569,0.279508,,,,,,,,,,,"""D""","""H"""
0.559017,1.667708,0.375,1.0,2.25347,0.279508,0.625,0.450694,0.395285,0.450694,0.25,,,,,,,,,,,0.279508,0.125,2.157835,0.353553,0.125,0.395285,0.125,0.5,0.125,0.673146,0.125,,,,,,,,,,,"""D""","""H"""
0.450694,1.976424,0.395285,1.007782,2.25347,0.279508,0.5,0.559017,0.375,0.450694,0.25,,,,,,,,,,,0.353553,0.125,2.427061,0.353553,0.25,0.75,0.0,0.625,0.176777,0.559017,0.125,,,,,,,,,,,"""A""","""A"""
0.559017,2.25347,0.375,1.007782,2.150581,0.176777,0.25,0.559017,0.375,0.728869,0.395285,,,,,,,,,,,0.450694,0.25,2.576941,0.279508,0.125,1.256234,0.125,1.125,0.125,0.353553,0.176777,,,,,,,,,,,"""A""","""A"""
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
0.800391,0.0,1.845603,0.395285,0.515388,0.279508,0.125,0.395285,0.0,1.030776,0.637377,,,,,,,,,,,1.152443,0.176777,0.176777,1.030776,0.673146,1.007782,0.515388,0.176777,0.883883,0.5,0.279508,,,,,,,,,,,"""A""","""A"""
0.901388,0.0,1.789728,0.515388,0.515388,0.279508,0.125,0.450694,0.0,1.030776,0.637377,,,,,,,,,,,1.131923,0.0,0.176777,1.007782,0.673146,1.007782,0.559017,0.0,0.883883,0.5,0.353553,,,,,,,,,,,"""D""","""A"""
1.007782,0.0,1.845603,0.395285,0.625,0.353553,0.25,0.395285,0.0,1.152443,0.673146,,,,,,,,,,,1.25,0.125,0.176777,1.030776,0.790569,1.030776,0.515388,0.0,0.910014,0.5,0.279508,,,,,,,,,,,"""D""","""A"""
0.951972,0.0,1.845603,0.395285,0.637377,0.176777,0.25,0.515388,0.0,1.152443,0.790569,,,,,,,,,,,1.25,0.279508,0.176777,1.068,0.673146,1.030776,0.625,0.0,1.030776,0.5,0.450694,,,,,,,,,,,"""D""","""A"""


In [None]:
# write the velocity data to a parquet file 
path = Path("floodlight_parquets/velocity_HT1")
velocity_df.write_parquet(path)

In [None]:
#distance_dfs['HT2']['Away']["C. Tzolis"]

C. Tzolis
f64
0.014142
0.014142
0.018028
0.018028
0.01118
…
""
""
""
""


In [22]:
pl.concat(
    [distance_dfs['HT2']['Away']["C. Tzolis"].to_frame(name="distance"), velocity_dfs['HT2']['Away']["C. Tzolis"].to_frame(name="velocity")], how="horizontal"
).filter(
    ((pl.col("velocity") > 5.5) & (pl.col("velocity").is_not_nan()))
).select(
    "distance"
).sum()


distance
f64
518.688651
