In [1]:
import os
import glob
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.colors import Normalize
from tqdm import tqdm

import argparse
import torch
from torchvision import transforms
from torchvision.datasets.folder import default_loader
from PIL import Image

In [2]:
raw_data_dir = os.path.join(os.environ["DATA_DIR"],"raw")

In [3]:
games = pd.read_csv(os.path.join(raw_data_dir,"games.csv"))
player_play = pd.read_csv(os.path.join(raw_data_dir,"player_play.csv"))
players = pd.read_csv(os.path.join(raw_data_dir,"players.csv"))
plays = pd.read_csv(os.path.join(raw_data_dir,"plays.csv"))

# Were concatenating all the tracking files
tracking = pd.concat([pd.read_csv(file) for file in tqdm(glob.glob(os.path.join(raw_data_dir,"tracking_week_*.csv")))], ignore_index=True)

100%|██████████| 9/9 [01:01<00:00,  6.79s/it]


In [4]:
print("tracking shape:", tracking.shape)
print("player_play shape:", player_play.shape)
print("tracking nflId unique:", tracking['nflId'].nunique())
print("player_play nflId unique:", player_play['nflId'].nunique())


tracking shape: (59327373, 18)
player_play shape: (354727, 50)
tracking nflId unique: 1697
player_play nflId unique: 1697


In [20]:
# Get the position, x and route ran
routesRan_df = pd.merge(
    player_play[player_play["wasRunningRoute"] == 1],
    tracking[tracking["event"] == "ball_snap"],
    how="left",
    on=["playId", "nflId","gameId"],
)
routesRan_df = pd.merge(
    routesRan_df, players[["nflId", "position"]], how="left", on="nflId"
)


all_keys = [
    "routeRanQB0", "routeRanWR0", "routeRanWR1", "routeRanWR2", "routeRanWR3", "routeRanWR4",
    "routeRanRB0", "routeRanRB1", "routeRanRB2",
    "routeRanTE0", "routeRanTE1", "routeRanTE2"
]

def extract_routes_per_play(group):
    group = group.sort_values(by='x')
    route_data = {key: "NA" for key in all_keys}  # Initialize with "NA"
    position_counter = {'QB': 0, 'WR': 0, 'RB': 0, 'TE': 0}

    for _, row in group.iterrows():
        pos = row['position']
        if pos not in position_counter:
            continue  # skip unexpected positions
        
        idx = position_counter[pos]
        key = f"routeRan{pos}{idx}"
        if key in route_data:  # only fill valid keys
            route_data[key] = row['routeRan']
            position_counter[pos] += 1

    return pd.Series(route_data)


# Apply per play
routesRan_play_df = (
    routesRan_df.groupby(["playId","gameId"]).apply(extract_routes_per_play).reset_index()
)
routesRan_play_df

  routesRan_df.groupby(["playId","gameId"]).apply(extract_routes_per_play).reset_index()


Unnamed: 0,playId,gameId,routeRanQB0,routeRanWR0,routeRanWR1,routeRanWR2,routeRanWR3,routeRanWR4,routeRanRB0,routeRanRB1,routeRanRB2,routeRanTE0,routeRanTE1,routeRanTE2
0,54,2022092512,,CROSS,IN,OUT,,,,,,SLANT,,
1,54,2022100200,,GO,HITCH,SCREEN,,,ANGLE,,,,,
2,55,2022092510,,POST,CORNER,,,,FLAT,,,FLAT,IN,
3,55,2022100910,,CROSS,,,,,ANGLE,,,CROSS,CORNER,
4,55,2022100912,,GO,FLAT,SCREEN,,,ANGLE,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9308,4993,2022091103,,GO,HITCH,HITCH,,,ANGLE,,,GO,,
9309,5015,2022091103,,POST,HITCH,HITCH,,,,,,CROSS,,
9310,5039,2022091103,,HITCH,GO,OUT,,,OUT,,,GO,,
9311,5074,2022091103,,GO,SLANT,GO,,,,,,SLANT,,


In [6]:
local_features = pd.merge(player_play, players, on='nflId', how='left')


# groupby playId
list_local_feature = local_features.groupby(["playId","gameId"])

# Check if wasRunningRoute get
qb = []
wr = []
rb = []
te = []
for _, features in tqdm(list_local_feature):
    qb.append(((features['wasRunningRoute'] == 1)&(features['position']=="QB")).sum())
    wr.append(((features['wasRunningRoute'] == 1)&(features['position']=="WR")).sum())
    rb.append(((features['wasRunningRoute'] == 1)&(features['position']=="RB")).sum())
    te.append(((features['wasRunningRoute'] == 1)&(features['position']=="TE")).sum())
    
print(max(qb))
print(max(wr))
print(max(rb))
print(max(te))

100%|██████████| 16124/16124 [00:09<00:00, 1651.66it/s]

1
5
3
3





In [7]:
frame_list = [
    tracking_frames
    for _, tracking_plays in tracking.groupby("playId")
    for _, tracking_frames in tracking_plays.groupby("playId")
]

In [8]:
min(tracking["x"])


-11.789999961853

In [9]:
max(tracking["y"])

71.7399978637695

In [10]:
min(tracking["y"])

-18.3600006103516