In [79]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.animation import FuncAnimation
from IPython.display import display, HTML

import matplotlib.patches as patches
import plotly.graph_objects as go
import os
from kaggle.api.kaggle_api_extended import KaggleApi
from tqdm.notebook import tqdm
from scipy.spatial.distance import cdist

In [72]:
def load_dataset(dataset_name):
    """
    Download a specific dataset from data directory.

    Parameters:
    - dataset_name: Name of the dataset to load
    """
    return pd.read_csv(f"C:\\Users\\mattd\\Documents\\GitHub\\big-data-bowl-2024\\data\\{dataset_name}.csv")

In [135]:
# Read In csvs
games = load_dataset("games")
plays = load_dataset("plays")
players = load_dataset("players")
week1 = load_dataset("tracking_week_1")
tackles = load_dataset("tackles")
joined_all = pd.merge(games,plays,how="inner",on = "gameId")
joined_all = pd.merge(joined_all,week1,how="inner",on=["gameId","playId"])
joined_all = pd.merge(joined_all,tackles,how="left",on=["gameId","playId","nflId"])
# # left join on players to keep football records
joined_all = pd.merge(joined_all,players,how="left",on = "nflId")

In [136]:
pass_arrived = joined_all.loc[joined_all['event']=='pass_arrived'].copy()
pass_arrived

Unnamed: 0,gameId,season,week,gameDate,gameTimeEastern,homeTeamAbbr,visitorTeamAbbr,homeFinalScore,visitorFinalScore,playId,...,tackle,assist,forcedFumble,pff_missedTackle,height,weight,birthDate,collegeName,position,displayName_y
0,2022090800,2022,1,09/08/2022,20:20:00,LA,BUF,10,31,2184,...,,,,,6-5,325.0,1988-06-06,Indiana,G,Rodger Saffold
44,2022090800,2022,1,09/08/2022,20:20:00,LA,BUF,10,31,2184,...,,,,,6-0,242.0,1990-06-27,Utah State,ILB,Bobby Wagner
88,2022090800,2022,1,09/08/2022,20:20:00,LA,BUF,10,31,2184,...,,,,,6-1,280.0,1991-05-23,Pittsburgh,DT,Aaron Donald
132,2022090800,2022,1,09/08/2022,20:20:00,LA,BUF,10,31,2184,...,,,,,6-6,305.0,1992-04-21,Missouri,C,Mitch Morse
176,2022090800,2022,1,09/08/2022,20:20:00,LA,BUF,10,31,2184,...,,,,,6-0,191.0,1993-11-29,Maryland,WR,Stefon Diggs
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1407316,2022091200,2022,1,09/12/2022,20:15:00,SEA,DEN,17,16,3467,...,,,,,6-2,203.0,,Alabama,CB,Patrick Surtain
1407341,2022091200,2022,1,09/12/2022,20:15:00,SEA,DEN,17,16,3467,...,,,,,6-3,240.0,,Ohio State,ILB,Baron Browning
1407366,2022091200,2022,1,09/12/2022,20:15:00,SEA,DEN,17,16,3467,...,,,,,6-5,310.0,,Mississippi State,T,Charles Cross
1407391,2022091200,2022,1,09/12/2022,20:15:00,SEA,DEN,17,16,3467,...,,,,,6-7,319.0,,Washington State,T,Abraham Lucas


In [146]:
#Tracking df/play cleaning functions 
"""
Functions are meant to be run on a frame by frame basis. The dataframe passed into it should contain just the tracking from one play
"""

def distance_to_ball(frame):
    """
    Calculates the distance to the ball for each player per frame in a play.

    Parameters:
    - dataset_name: Name of the dataset to load
    """
    carr_x, carr_y = ball_carrier_info(frame)
    frame['distance_to_tackle'] = cdist(frame[['x', 'y']], [[carr_x, carr_y]], metric='euclidean')
    return frame



def ball_carrier_info(frame):
    """
    Returns the x and y coordinates for the ball carrier.

    Parameters:
    - frame: Name of df contained one frame from one play
    """
    ball_carrier_id = frame["ballCarrierId"].iloc[0]
    ball_carrier = frame[frame["nflId"]==ball_carrier_id]
    return ball_carrier["x"].values[0], ball_carrier["y"].values[0]



In [149]:
# Testing distance_to_ball and ball_carrier_info
gid = 2022091103 #randomly sampled game 
joined_play_tracking = pd.merge(plays,week1,how="inner",on=["gameId","playId"])
game = joined_play_tracking.loc[joined_play_tracking['gameId']==gid].copy()
pass_play = game.loc[game['event']=='pass_arrived'].copy()
frame_test = pass_play[(pass_play['frameId']==4) & (pass_play['playId']==58)]

test_df = distance_to_ball(frame_test)
test_df[["ballCarrierId","nflId","Distance_to_reference","x","y"]]



34.55 29.94


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  frame['Distance_to_reference'] = cdist(frame[['x', 'y']], [[carr_x, carr_y]], metric='euclidean')


Unnamed: 0,ballCarrierId,nflId,Distance_to_reference,x,y
478012,53484,43299.0,17.501503,38.04,12.79
478033,53484,43350.0,13.79174,43.19,19.19
478054,53484,43455.0,3.236943,37.22,28.11
478075,53484,43641.0,12.705987,38.3,42.08
478096,53484,44814.0,5.935697,30.49,34.27
478117,53484,44872.0,17.071687,44.6,43.74
478138,53484,44915.0,3.973978,32.46,33.32
478159,53484,46108.0,1.822581,36.22,30.67
478180,53484,46123.0,14.03901,47.18,36.07
478201,53484,46138.0,5.262414,35.42,24.75
