In [None]:
# =============================================================================
# SETUP
# =============================================================================
import pandas as pd 
import polars as pl
from utils.constants import get_open_data_dirs, get_j1_league_dirs

J1_LEAGUE_DIR = get_j1_league_dirs()
OPEN_DATA_DIR = get_open_data_dirs()


# Loads ALL parquets in the folder as a single dataframe, lazy (doesn’t use memory until you need it)
df = pl.scan_parquet(OPEN_DATA_DIR["bronze_events"], extra_columns="ignore", missing_columns="insert")


Schema({'id': String, 'index': Int64, 'period': Int64, 'timestamp': String, 'minute': Int64, 'second': Int64, 'possession': Int64, 'duration': Float64, 'type_id': Int64, 'type_name': String, 'possession_team_id': Int64, 'possession_team_name': String, 'play_pattern_id': Int64, 'play_pattern_name': String, 'team_id': Int64, 'team_name': String, 'tactics_formation': Float64, 'tactics_lineup': List(Struct({'jersey_number': Int64, 'player': Struct({'id': Int64, 'name': String}), 'position': Struct({'id': Int64, 'name': String})})), 'related_events': String, 'location': String, 'player_id': Float64, 'player_name': String, 'position_id': Float64, 'position_name': String, 'pass_recipient_id': Float64, 'pass_recipient_name': String, 'pass_length': Float64, 'pass_angle': Float64, 'pass_height_id': Float64, 'pass_height_name': String, 'pass_end_location': List(Float64), 'pass_body_part_id': Float64, 'pass_body_part_name': String, 'pass_type_id': Float64, 'pass_type_name': String, 'carry_end_loca

id,index,period,timestamp,minute,second,possession,duration,type_id,type_name,possession_team_id,possession_team_name,play_pattern_id,play_pattern_name,team_id,team_name,tactics_formation,tactics_lineup,related_events,location,player_id,player_name,position_id,position_name,pass_recipient_id,pass_recipient_name,pass_length,pass_angle,pass_height_id,pass_height_name,pass_end_location,pass_body_part_id,pass_body_part_name,pass_type_id,pass_type_name,carry_end_location,pass_switch,…,foul_committed_offensive,foul_committed_card_id,foul_committed_card_name,foul_won_defensive,pass_through_ball,pass_technique_id,pass_technique_name,out,clearance_right_foot,pass_inswinging,goalkeeper_technique_id,goalkeeper_technique_name,goalkeeper_body_part_id,goalkeeper_body_part_name,clearance_head,shot_aerial_won,miscontrol_aerial_won,dribble_overrun,pass_miscommunication,block_offensive,bad_behaviour_card_id,bad_behaviour_card_name,substitution_outcome_id,substitution_outcome_name,substitution_replacement_id,substitution_replacement_name,pass_cut_back,shot_one_on_one,foul_committed_advantage,foul_won_advantage,clearance_aerial_won,pass_deflected,pass_no_touch,foul_committed_type_id,foul_committed_type_name,pass_straight,pass_goal_assist
str,i64,i64,str,i64,i64,i64,f64,i64,str,i64,str,i64,str,i64,str,f64,list[struct[3]],str,str,f64,str,f64,str,f64,str,f64,f64,f64,str,list[f64],f64,str,f64,str,list[f64],bool,…,bool,f64,str,bool,bool,f64,str,bool,bool,bool,f64,str,f64,str,bool,bool,bool,bool,bool,bool,f64,str,f64,str,f64,str,bool,bool,bool,bool,bool,bool,bool,f64,str,bool,bool
"""9f6e2ecf-6685-45df-a62e-c2db30…",1,1,"""00:00:00.000""",0,0,1,0.0,35,"""Starting XI""",217,"""Barcelona""",1,"""Regular Play""",217,"""Barcelona""",442.0,"[{1,{20055,""Marc-André ter Stegen""},{1,""Goalkeeper""}}, {2,{6374,""Nélson Cabral Semedo""},{2,""Right Back""}}, … {10,{5503,""Lionel Andrés Messi Cuccittini""},{24,""Left Center Forward""}}]",,,,,,,,,,,,,,,,,,,,…,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


In [10]:
arsenal_events = (
    df.select("clearance_left_foot", "player_name", "team_name")
      .filter(
          (pl.col("clearance_left_foot") == True) & (pl.col("team_name") == "Arsenal")
      )
      .collect()
)
arsenal_events.head(10)

clearance_left_foot,player_name,team_name
bool,str,str
True,"""Ashley Cole""","""Arsenal"""
True,"""Kolo Habib Touré""","""Arsenal"""
True,"""Ashley Cole""","""Arsenal"""
True,"""Laureano Bisan-Etame Mayer""","""Arsenal"""
True,"""Sulzeer Jeremiah ''Sol' Campbe…","""Arsenal"""
True,"""Ashley Cole""","""Arsenal"""
True,"""Sulzeer Jeremiah ''Sol' Campbe…","""Arsenal"""
True,"""Sulzeer Jeremiah ''Sol' Campbe…","""Arsenal"""
True,"""Sulzeer Jeremiah ''Sol' Campbe…","""Arsenal"""
True,"""Ray Parlour""","""Arsenal"""
