In [1]:
import polars as pl
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report
import glob


#### Load Dataframes

In [2]:
player_plays = pl.read_csv("data/player_play.csv", null_values=["NA"])
games = pl.read_csv("data/games.csv", null_values=["NA"])
plays = pl.read_csv("data/plays.csv", null_values=["NA"])
players = pl.read_csv("data/players.csv", null_values=["NA"])

In [3]:
tracking_files = glob.glob("data/tracking_week_*.csv")
tracking_data = pl.concat([pl.read_csv(f, null_values= ["NA"]) for f in tracking_files])

In [None]:
tfl = player_plays.filter(
    (pl.col("tackleForALoss") == True)
)
tfl

In [30]:
# Filter for plays that are relevant to quarterback scrambling (e.g., dropbacks with defensive pressures)
scramble_plays = player_plays.filter(
    (pl.col("hadDropback") == True)
)

In [38]:
inMotionAtBallSnapDf = player_plays.filter(
    (pl.col("inMotionAtBallSnap") == "TRUE")
)

In [None]:
inMotionAtBallSnapDf

In [None]:
scramble_plays

In [11]:
# Load and process tracking data for all weeks
tracking_files = glob.glob("data/tracking_week_*.csv")
tracking_data = []
for file in tracking_files:
    week_data = pl.read_csv(
        file,
        columns=[
            "gameId", "playId", "nflId", "displayName", "frameId", "frameType",
            "time", "jerseyNumber", "club", "playDirection", "x", "y", "s", "a",
            "dis", "o", "dir", "event"
        ],
        null_values= ["NA"]
    )
    # Feature engineering: Aggregate relevant tracking metrics per play
    week_data_grouped = (
        week_data.group_by(["gameId", "playId"])
        .agg([
            pl.col("s").mean().alias("avg_speed"),
            pl.col("a").mean().alias("avg_acceleration"),
            pl.col("o").mean().alias("avg_orientation"),
            pl.col("dir").mean().alias("avg_direction"),
            (pl.col("displayName") == "QB").sum().alias("qb_in_play")
        ])
    )
    tracking_data.append(week_data_grouped)

# Concatenate all tracking data for each week
all_tracking_data = pl.concat(tracking_data)