In [None]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.spatial.distance import cdist
from tqdm.notebook import tqdm
import IPython

What it does:
    
For each play in each game in Week 1: 
    
    
1. calculates the distance of each defensive player from the ball carrier at the moment the ball is caught, using their coordinates on the field.

2. Ranks Players by Proximity by sorteing players based on their proximity to the ball carrier (which players were closest at the time of the catch)

3. It identifies which player made the tackle in a given play using the tackles data.

In [None]:
players = pd.read_csv("players.csv")
week = pd.read_csv("tracking_week_1.csv")
plays = pd.read_csv("plays.csv")
tackles = pd.read_csv("tackles.csv")


# join player positioning information onto a week's worth of tracking data 
week = week.merge(players.loc[:, ['nflId', 'position']], how='left')
week.shape

In [None]:
""" Matts reading in files """
def load_dataset(dataset_name):
    """
    Download a specific dataset from data directory.

    Parameters:
    - dataset_name: Name of the dataset to load
    """
    return pd.read_csv(f"C:\\Users\\mattd\\Documents\\GitHub\\big-data-bowl-2024\\data\\{dataset_name}.csv")

# Read In csvs
plays = load_dataset("plays")
players = load_dataset("players")
week = load_dataset("tracking_week_1")
tackles = load_dataset("tackles")
#week = pd.merge(week1,players,how="left",on = 'nflId')
week = week.merge(players.loc[:, ['nflId', 'position']], how='left')
week.shape

In [None]:
# identifies the ball carrier for each frame using the ballCarrierId from the play data
def identify_ball_carrier(play_data, frame):
    ball_carrier_id = play_data['ballCarrierId'].iloc[0]
    if ball_carrier_id in frame['nflId'].values:
        return ball_carrier_id
    else:
        return None
   
#Determines who made the tackle using the tackles data
def track_tackle_event(tackles_data, play_id):
    play_tackles = tackles_data[tackles_data['playId'] == play_id]
    tackler = play_tackles[play_tackles['tackle'] == 1]['nflId']
    if not tackler.empty:
        return tackler.iloc[0]
    else:
        return None
    
# the positions we want 
defensive_positions = ['DT']  # idk what postions tackle-- add more if needed 


# Process each game, play, and frame
df = pd.DataFrame()
for gid in tqdm(week['gameId'].unique(), leave=True):
    game = week.loc[week['gameId'] == gid].copy()
    game_plays = plays[plays['gameId'] == gid]
    game_tackles = tackles[tackles['gameId'] == gid]
    
    for pid in tqdm(game['playId'].unique(), leave=False):
        play = game.loc[game['playId'] == pid].copy()
        play_data = game_plays[game_plays['playId'] == pid]
        play_tackles = game_tackles[game_tackles['playId'] == pid]
        
            # For every play, it further iterates through each frameId, which represents a specific moment or frame in that play
        for fid in tqdm(play['frameId'].unique(), leave=False):
            frame = play.loc[play['frameId'] == fid].copy()
            
            #only the postions we want 
            frame_defense = frame[frame['position'].isin(defensive_positions)]


            # Check if this frame contains a catch event (we decided we only wanna look at from at caught)
            if 'pass_outcome_caught' in frame['event'].values:
                ball_carrier_id = identify_ball_carrier(play_data, frame)

                # Skip this frame if ball carrier is not identified
                if ball_carrier_id is None:
                    continue

                # Calculate distance to the ball carrier for each player
                frame['distance_to_ball_carrier'] = np.sqrt(
                    (frame['x'] - frame.loc[frame['nflId'] == ball_carrier_id, 'x'].values[0])**2 + 
                    (frame['y'] - frame.loc[frame['nflId'] == ball_carrier_id, 'y'].values[0])**2
                )

                # Sort players by distance to the ball carrier
                sorted_players = frame.sort_values(by='distance_to_ball_carrier')

                # Track the tackle event
                tackle_made_by_id = track_tackle_event(play_tackles, pid)

                # Prepare the output
                for rank, (index, player) in enumerate(sorted_players.iterrows()):
                    tackle_indicator = 1 if player['nflId'] == tackle_made_by_id else 0
                    player_data = {
                        'gameId': gid,
                        'playId': pid,
                        'frameId': fid,
                        'nflId': player['nflId'],
                        'displayName': player['displayName'],
                        'rank': rank,
                        'distance_to_ball_carrier': player['distance_to_ball_carrier'],
                        'tackle_indicator': tackle_indicator
                    }
                    player_df = pd.DataFrame([player_data])
                    frames = [df, player_df]
                    df = pd.concat(frames, ignore_index=True)


In [None]:
df.head(20)