### Setup

In [1]:
import polars as pl
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report
import glob

In [2]:
pl.Config.set_tbl_rows(100)

polars.config.Config

### Load Dataframes

In [74]:
player_plays = pl.read_csv("data/player_play.csv", null_values=["NA"])
games = pl.read_csv("data/games.csv", null_values=["NA"])
plays = pl.read_csv("data/plays.csv", null_values=["NA"])
players = pl.read_csv("data/players.csv", null_values=["NA"])

In [4]:
tracking_files = glob.glob("data/tracking_week_*.csv")
tracking_data = pl.concat([pl.read_csv(f, null_values= ["NA"]) for f in tracking_files])

### Exploration

In [None]:
player_plays.head(3)

In [None]:
player_plays.columns

In [None]:
games.head(2)

In [None]:
games.columns

In [None]:
players.head(3)

In [None]:
plays.head(3)

In [None]:
plays.columns

In [None]:
tracking_data.head(3)

In [None]:
tracking_data.columns

### Feature Engineering

In [5]:
# Rename columns in `tracking_data`
tracking_data = tracking_data.rename({"s":"yardsPerSecond", "a":"acceleration","o":"orientation"})

In [6]:
tracking_data['frameType'].unique()

frameType
str
"""BEFORE_SNAP"""
"""AFTER_SNAP"""
"""SNAP"""


##### Presnap movements 

In [7]:
presnap_movements = tracking_data.filter(
    (pl.col("frameType") == "BEFORE_SNAP")
)

In [11]:
df = presnap_movements.filter(
    (pl.col("gameId") == 2022101000) & 
    (pl.col("event") == "man_in_motion")
)

In [62]:
snap = tracking_data.filter(
    (pl.col("gameId") == 2022101000) &
    (pl.col("frameType") == "SNAP")
)

In [64]:
unique_plays = snap["playId"].unique()

In [65]:
unique_plays

playId
i64
57
110
132
181
202
224
268
287
343
367


In [71]:
df = df.filter(
    (pl.col("playId") == 100)
)

In [72]:
df

gameId,playId,nflId,displayName,frameId,frameType,time,jerseyNumber,club,playDirection,x,y,yardsPerSecond,acceleration,dis,orientation,dir,event
i64,i64,i64,str,i64,str,str,i64,str,str,f64,f64,f64,f64,f64,f64,f64,str


In [66]:
snap = snap.filter(
    (pl.col("playId") == 4148)
)

In [69]:
snap

gameId,playId,nflId,displayName,frameId,frameType,time,jerseyNumber,club,playDirection,x,y,yardsPerSecond,acceleration,dis,orientation,dir,event
i64,i64,i64,str,i64,str,str,i64,str,str,f64,f64,f64,f64,f64,f64,f64,str
2022101000,4148,40039.0,"""Duron Harmon""",60,"""SNAP""","""2022-10-11 03:23:10""",30.0,"""LV""","""left""",49.33,26.67,0.05,0.03,0.0,93.24,314.23,"""ball_snap"""
2022101000,4148,41325.0,"""Jerick McKinnon""",60,"""SNAP""","""2022-10-11 03:23:10""",1.0,"""KC""","""left""",56.78,22.89,0.02,0.02,0.0,334.38,326.97,"""ball_snap"""
2022101000,4148,42391.0,"""Denzel Perryman""",60,"""SNAP""","""2022-10-11 03:23:10""",52.0,"""LV""","""left""",52.55,25.21,0.47,0.53,0.05,98.17,86.43,"""ball_snap"""
2022101000,4148,42511.0,"""Michael Burton""",60,"""SNAP""","""2022-10-11 03:23:10""",45.0,"""KC""","""left""",56.68,24.64,0.01,0.01,0.0,220.14,227.48,"""ball_snap"""
2022101000,4148,43367.0,"""Joe Thuney""",60,"""SNAP""","""2022-10-11 03:23:10""",62.0,"""KC""","""left""",55.8,22.53,0.02,0.02,0.0,273.72,134.49,"""ball_snap"""
2022101000,4148,43411.0,"""Andrew Billings""",60,"""SNAP""","""2022-10-11 03:23:10""",97.0,"""LV""","""left""",54.52,23.97,0.01,0.01,0.02,73.47,68.9,"""ball_snap"""
2022101000,4148,44822.0,"""Patrick Mahomes""",60,"""SNAP""","""2022-10-11 03:23:10""",15.0,"""KC""","""left""",56.44,23.81,0.02,0.02,0.01,257.52,349.69,"""ball_snap"""
2022101000,4148,45695.0,"""Andrew Wylie""",60,"""SNAP""","""2022-10-11 03:23:10""",77.0,"""KC""","""left""",55.62,25.97,0.02,0.02,0.0,272.5,128.67,"""ball_snap"""
2022101000,4148,46152.0,"""Orlando Brown""",60,"""SNAP""","""2022-10-11 03:23:10""",57.0,"""KC""","""left""",55.69,21.5,0.07,0.07,0.01,288.15,50.74,"""ball_snap"""
2022101000,4148,46213.0,"""Justin Watson""",60,"""SNAP""","""2022-10-11 03:23:10""",84.0,"""KC""","""left""",68.57,23.68,0.01,0.01,0.0,282.47,113.81,"""ball_snap"""


In [58]:
def calculate_presnap_movement(presnap:pl.DataFrame, snap:pl.DataFrame) -> pl.DataFrame:
    merged = presnap.join(snap, on='displayName', suffix='_snap')
    merged = merged.with_columns(
     (pl.col("x")-pl.col("x_snap")).alias("x_dif"),
     (pl.col("y")-pl.col("y_snap")).alias("y_dif")
    ).sort((pl.col("y_dif")), descending=True)
    return merged

In [68]:
new = calculate_presnap_movement(df, snap)
new

gameId,playId,nflId,displayName,frameId,frameType,time,jerseyNumber,club,playDirection,x,y,yardsPerSecond,acceleration,dis,orientation,dir,event,gameId_snap,playId_snap,nflId_snap,frameId_snap,frameType_snap,time_snap,jerseyNumber_snap,club_snap,playDirection_snap,x_snap,y_snap,yardsPerSecond_snap,acceleration_snap,dis_snap,orientation_snap,dir_snap,event_snap,x_dif,y_dif
i64,i64,i64,str,i64,str,str,i64,str,str,f64,f64,f64,f64,f64,f64,f64,str,i64,i64,i64,i64,str,str,i64,str,str,f64,f64,f64,f64,f64,f64,f64,str,f64,f64
