In [19]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.spatial.distance import cdist
from tqdm.notebook import tqdm
import IPython

Reading in Data

In [None]:
"""Katies reading in files"""
players = pd.read_csv("players.csv")
week = pd.read_csv("tracking_week_1.csv")
plays = pd.read_csv("plays.csv")
tackles = pd.read_csv("tackles.csv")


# join player positioning information onto a week's worth of tracking data 
week = week.merge(players.loc[:, ['nflId', 'position']], how='left')
week.shape

In [20]:
""" Matts reading in files """
def load_dataset(dataset_name):
    """
    Download a specific dataset from data directory.

    Parameters:
    - dataset_name: Name of the dataset to load
    """
    return pd.read_csv(f"C:\\Users\\mattd\\Documents\\GitHub\\big-data-bowl-2024\\data\\{dataset_name}.csv")

# Read In csvs
plays = load_dataset("plays")
players = load_dataset("players")
week = load_dataset("tracking_week_1")
tackles = load_dataset("tackles")
week = week.merge(players.loc[:, ['nflId', 'position']], how='left')
week.shape

(1407439, 18)

Defining Functions for feature engineering at each level

(Just put in ones I thought of, if it has #TO DO: over it then it isnt complete/not started)

In [21]:
"""Game level functions"""

def get_frames_of_catch(games):
    """
    Returns only the frame of the play where the ball is caught.

    Parameters:
    - games: Dataset of games

    Returns:
    - frame: The tracking data of only the data where the ball is caught
    """
    return games.loc[games['event'] == 'pass_outcome_caught'].copy()

In [68]:
"""Frame level functions"""
def distance_to_ball(play_data, frame):
    """
    Calculates the distance to the ball for each player per frame in a play.

    Parameters:
    - dataset_name: Name of the dataset to load
    """
    ball_carrier_id = play_data["ballCarrierId"].iloc[0]
    ball_carrier = frame[frame["nflId"] == ball_carrier_id]
    carr_x = ball_carrier["x"].values[0]
    carr_y = ball_carrier["y"].values[0]
    frame['distance_to_ball_carrier'] = cdist(frame[['x', 'y']], [[carr_x, carr_y]], metric='euclidean')
    return frame

def who_tackles(play_data, frame):
    """
    Adds who tackles the ball carrier to the tracking data set.

    Parameters:
    - dataset_name: Name of the dataset to load
    """
    ball_carrier_id = play_data["ballCarrierId"].iloc[0]
    ball_carrier = frame[frame["nflId"] == ball_carrier_id]
    carr_x = ball_carrier["x"].values[0]
    carr_y = ball_carrier["y"].values[0]
    frame['distance_to_ball_carrier'] = cdist(frame[['x', 'y']], [[carr_x, carr_y]], metric='euclidean')
    return frame

def create_dnn_input_df(frames, side_of_ball):
    """
    Creates the input dataframe for the DNN model.

    Parameters:
    - frames: Name of the dataset to load
    """
    sorted = frames.sort_values(by='distance_to_ball_carrier').reset_index(drop=True)
    
    input_df = pd.DataFrame()
    # Iterate over each row in the sorted DataFrame
    for idx, row in sorted.iterrows():
        # Extract player's x and y values
        x = row['x']
        y = row['y']
        s = row['s']
        a = row['a']
        dis = row['dis']
        o = row['o']
        dir = row['dir']
        distance_to_ball_carrier = row['distance_to_ball_carrier']


        # Add x_i columns to the new DataFrame
        input_df.loc[0,f'{side_of_ball}_x_{idx}'] = x
        input_df.loc[0,f'{side_of_ball}_y_{idx}'] = y
        input_df.loc[0,f'{side_of_ball}_s_{idx}'] = s
        input_df.loc[0,f'{side_of_ball}_a_{idx}'] = a
        input_df.loc[0,f'{side_of_ball}_dis_{idx}'] = dis
        input_df.loc[0,f'{side_of_ball}_o_{idx}'] = o
        input_df.loc[0,f'{side_of_ball}_dir_{idx}'] = dir
        input_df.loc[0,f'{side_of_ball}_distance_to_ball_carrier_{idx}'] = distance_to_ball_carrier

    # Concatenate the original DataFrame with the new x_i DataFrame
    return input_df
    





Iterating through each Play

In [69]:
# Process each game, play, and frame
pass_only = get_frames_of_catch(week)
df = pd.DataFrame()
for gid in tqdm(pass_only['gameId'].unique(), leave=True):
    game = pass_only.loc[pass_only['gameId'] == gid].copy()
    game_plays = plays[plays['gameId'] == gid]
    game_tackles = tackles[tackles['gameId'] == gid]

    for pid in tqdm(game['playId'].unique(), leave=False):
        play = game.loc[game['playId'] == pid].copy()
        play_data = game_plays[game_plays['playId'] == pid]
        play_tackles = game_tackles[game_tackles['playId'] == pid]  

        #Adding distance to ball carrier as a feature in tracking data
        play = distance_to_ball(play_data, play)

        offense = play[play["club"] == play_data["possessionTeam"].iloc[0]]
        defense = play[play["club"] == play_data["defensiveTeam"].iloc[0]]
        #print(defense.head())
        #print("Length of ",len(defense))

        offense_input = create_dnn_input_df(offense, "o")
        defense_input = create_dnn_input_df(defense, "d")
        print(defense_input.head())

        
        

        #Adding who tackled the ball carrier
        



        



  0%|          | 0/16 [00:00<?, ?it/s]

  0%|          | 0/52 [00:00<?, ?it/s]

1
1
   d_x_0  d_y_0  d_s_0  d_a_0  d_dis_0   d_o_0  d_dir_0  \
0  78.15  37.85   5.88   1.23     0.59  140.96    178.5   

   d_distance_to_ball_carrier_0  d_x_1  d_y_1  ...  d_dir_9  \
0                      2.828003  76.71  33.36  ...   305.29   

   d_distance_to_ball_carrier_9  d_x_10  d_y_10  d_s_10  d_a_10  d_dis_10  \
0                       21.8936   70.62    7.66     2.6    4.14      0.27   

   d_o_10  d_dir_10  d_distance_to_ball_carrier_10  
0  331.57    278.33                      29.415605  

[1 rows x 88 columns]
1
1
   d_x_0  d_y_0  d_s_0  d_a_0  d_dis_0  d_o_0  d_dir_0  \
0  68.93  34.26   4.44   3.47     0.45  18.51   358.93   

   d_distance_to_ball_carrier_0  d_x_1  d_y_1  ...  d_dir_9  \
0                      4.278329  70.95   34.7  ...   310.45   

   d_distance_to_ball_carrier_9  d_x_10  d_y_10  d_s_10  d_a_10  d_dis_10  \
0                     30.639067   45.18   11.65    3.89    2.12      0.39   

   d_o_10  d_dir_10  d_distance_to_ball_carrier_10  
0   82.35 

KeyboardInterrupt: 

In [16]:
play

Unnamed: 0,gameId,playId,nflId,displayName,frameId,time,jerseyNumber,club,playDirection,x,y,s,a,dis,o,dir,event,position,distance_to_ball_carrier
1406225,2022091200,3826,38605.0,Russell Wilson,6,2022-09-12 23:05:53.500000,3.0,DEN,left,73.95,29.06,0.52,1.05,0.05,157.72,16.57,pass_outcome_caught,QB,17.430846
1406278,2022091200,3826,41369.0,Cameron Fleming,6,2022-09-12 23:05:53.500000,73.0,DEN,left,70.65,32.66,0.8,0.85,0.08,249.02,217.68,pass_outcome_caught,T,20.700155
1406331,2022091200,3826,41464.0,Shelby Harris,6,2022-09-12 23:05:53.500000,93.0,SEA,left,73.42,27.15,1.88,1.76,0.19,331.49,118.69,pass_outcome_caught,DE,15.455051
1406384,2022091200,3826,42543.0,Quandre Diggs,6,2022-09-12 23:05:53.500000,6.0,SEA,left,39.86,17.78,4.1,5.07,0.4,141.94,184.77,pass_outcome_caught,FS,31.256623
1406437,2022091200,3826,42827.0,Justin Coleman,6,2022-09-12 23:05:53.500000,28.0,SEA,left,60.59,12.24,6.3,2.68,0.63,103.59,136.76,pass_outcome_caught,CB,9.983927
1406490,2022091200,3826,43384.0,Graham Glasgow,6,2022-09-12 23:05:53.500000,61.0,DEN,left,68.99,31.92,1.54,0.51,0.16,291.42,265.14,pass_outcome_caught,G,20.022437
1406543,2022091200,3826,43436.0,Quinton Jefferson,6,2022-09-12 23:05:53.500000,77.0,SEA,left,69.03,28.24,3.5,0.92,0.36,112.81,184.46,pass_outcome_caught,DE,16.352676
1406596,2022091200,3826,44832.0,Garett Bolles,6,2022-09-12 23:05:53.500000,72.0,DEN,left,73.36,27.72,1.63,1.44,0.17,249.93,120.74,pass_outcome_caught,T,16.005052
1406649,2022091200,3826,44873.0,Josh Jones,6,2022-09-12 23:05:53.500000,13.0,SEA,left,49.62,32.42,4.63,4.05,0.44,143.62,159.31,pass_outcome_caught,FS,29.283342
1406702,2022091200,3826,46109.0,Courtland Sutton,6,2022-09-12 23:05:53.500000,14.0,DEN,left,50.25,16.29,3.66,4.63,0.39,335.66,281.33,pass_outcome_caught,WR,20.77622
