# Working Code

## Necessary Libraries

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
from sklearn.linear_model import LinearRegression
import ffmpeg

## Getting Data

In [2]:
#for kaggle
#tracking_week_1 = pd.read_csv("/kaggle/input/nfl-big-data-bowl-2025/tracking_week_1.csv")

#for local notebook
parent = os.path.dirname(os.getcwd())
tracking_week_1 = pd.read_csv(parent + "/data/tracking_week_1.csv") 

In [3]:
play1 = tracking_week_1[(tracking_week_1.gameId == 2022091200) & (tracking_week_1.playId == 64)]

before_snap = play1[play1.frameType == "BEFORE_SNAP"]
snap = play1[play1.frameType == "SNAP"]
after_snap = play1[play1.frameType == "AFTER_SNAP"]

## Functions 

In [4]:
def ball_distance_dictionary(player_df : pd.DataFrame) -> (dict, int): 
    player_sep = {}
    players = player_df.displayName.unique()
    football = player_df[player_df.displayName == 'football'].reset_index(drop=True)
    for i in range(len(player_df.displayName.unique())):
        player = player_df[player_df.displayName == players[i]].reset_index(drop=True)
        if player.displayName[0] != "football":
            distances = list(zip(player.x, player.y))
            seps = []
            for idx, (x_dist, y_dist) in enumerate(distances):
                sep = ((football.x[idx] - x_dist)**2 + (football.y[idx] - y_dist)**2)
                seps.append(sep)
                frames = len(seps)
        player_sep[players[i]] = seps
    return player_sep, frames

def player_with_ball(player_df : pd.DataFrame) -> list:
    player_sep, frames = ball_distance_dictionary(player_df)
    player_with_ball = [None] * frames
    players = player_df.displayName.unique()
    
    for frame in range(frames):
        curr = None
        curr_dist = 17209
        for player in players:
            if player_sep[player][frame] < 1:
                curr = player
                curr_dist = player_sep[player][frame]
            if curr is not None and player_sep[player][frame] < curr_dist:
                curr = player
                curr_dist = player_sep[player][frame]
        if curr is not None:
            player_with_ball[frame] = (curr, curr_dist)
        else:
            player_with_ball[frame] = ('In air', None)

    return player_with_ball

In [5]:
def predict_next_k_frames(play: pd.DataFrame, k: int) -> pd.DataFrame: 
    players = play['displayName'].unique()
    new_after_snap = pd.DataFrame()

    for value in players:
        player_data = play.loc[play['displayName'] == value]
        x_data = [(row['frameId'], row['x']) for _, row in player_data.iterrows()]
        y_data = [(row['frameId'], row['y']) for _, row in player_data.iterrows()]

        X = np.array([point[0] for point in x_data]).reshape(-1, 1)  # Reshape to 2D array
        x_pred = np.array([point[1] for point in x_data])
        model_x = LinearRegression()
        model_x.fit(X, x_pred)
        max_frameid = y_data[-1][0]
        next_frame_ids = np.array([max_frameid + i for i in range(1, k + 1)]).reshape(-1, 1)
        predicted_x = model_x.predict(next_frame_ids)  
        
        Y = np.array([point[0] for point in y_data]).reshape(-1, 1)  # Reshape to 2D array
        y_pred = np.array([point[1] for point in y_data])
        model_y = LinearRegression()
        model_y.fit(Y, y_pred)
        max_frameid = y_data[-1][0]
        next_frame_ids = np.array([max_frameid + i for i in range(1, k + 1)]).reshape(-1, 1)
        predicted_y = model_y.predict(next_frame_ids)    
        

        new_rows = pd.DataFrame({
            'frameId': next_frame_ids.flatten(),
            'x': predicted_x.flatten(),
            'y': predicted_y.flatten()
        })

        for col in play.columns:
            if col not in new_rows.columns:  
                new_rows[col] = play[col].iloc[-1]
        
        updated_player_data = pd.concat([player_data, new_rows], ignore_index=True)
        new_after_snap = pd.concat([new_after_snap, updated_player_data], ignore_index=True)

    return new_after_snap

## Making graphs of the play that label the player with the ball

In [6]:
print(play1.frameId.min(), play1.frameId.max())

1 163


In [12]:
pred_start = 1
pred_stop = 133
prediction_play = predict_next_k_frames(after_snap[(after_snap.frameId >= pred_start) & (after_snap.frameId <= pred_stop)], 30) 

In [20]:
frames = []
i = 0
ball_posessions = player_with_ball(play1)
for frame in play1.frameId.unique():
    play_at_frame = play1[play1.frameId == frame].sort_values(by="jerseyNumber").reset_index(drop=True)
    players = play_at_frame[:-1]
    ball = play_at_frame[-1:] 

    plt.xlim(0, 120)
    plt.ylim(0, 53.3)
    plt.scatter(players.x, players.y, c=range(len(players)))
    plt.scatter(ball.x, ball.y, c="red", marker="*") 
    plt.title(play_at_frame.frameType[0])

    player = ball_posessions[i][0]
    if (player != "In air"): 
        player_x = list(play_at_frame[play_at_frame.displayName == player].x)[0]
        player_y = list(play_at_frame[play_at_frame.displayName == player].y)[0]
        plt.text(player_x, player_y, player)

    if play_at_frame.frameType[0] == "AFTER_SNAP":
        pred_at_frame = prediction_play[prediction_play.frameId == frame].sort_values(by="jerseyNumber").reset_index(drop=True)
        pred_players = pred_at_frame[:-1]
        pred_ball = pred_at_frame[-1:] 
        plt.scatter(pred_players.x, pred_players.y, c=range(len(players)), alpha=0.3)
        plt.scatter(pred_ball.x, pred_ball.y, c="red", marker="*", alpha = 0.3) 

    plt.savefig(f"img{i}.png")
    plt.close()
    i += 1

### Calculating frame rate

In [16]:
play_length = (pd.to_datetime(play1.time.max()) - pd.to_datetime(play1.time.min())).total_seconds()
num_frames = len(play1.frameId.unique())
print(f"Frame rate: {num_frames / play_length}")

Frame rate: 10.06172839506173


## Create animation from PNGs

In [15]:
#locally, install ffmpeg and from the parent directory run: ffmpeg -r 10.06 -i notebooks/img%01d.png -vcodec mpeg4 -y movie.mp4
os.system("ffmpeg -r 10.06 -i img%01d.png -vcodec mpeg4 -y movie.mp4")

sh: ffmpeg: command not found


32512

### Delete PNGs

In [21]:
#courtesy of Gemini
def delete_png_files(folder_path):
    """Deletes all PNG files in the specified folder."""

    for filename in os.listdir(folder_path):
        if filename.endswith(".png"):
            file_path = os.path.join(folder_path, filename)
            try:
                os.remove(file_path)
            except OSError as e:
                print(f"Error deleting {file_path}: {e}")

# Specify the folder path where you want to delete PNG files
folder_path = os.getcwd()#"/kaggle/working" 

# Call the function to delete PNG files
delete_png_files(folder_path)

# Testing / Experiments

In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt
import matplotlib.animation as animation
import os

In [None]:
games = pd.read_csv("/kaggle/input/nfl-big-data-bowl-2025/games.csv")
player_play = pd.read_csv("/kaggle/input/nfl-big-data-bowl-2025/player_play.csv")
players = pd.read_csv("/kaggle/input/nfl-big-data-bowl-2025/players.csv")
plays = pd.read_csv("/kaggle/input/nfl-big-data-bowl-2025/plays.csv")
tracking_week_1 = pd.read_csv("/kaggle/input/nfl-big-data-bowl-2025/tracking_week_1.csv")
#tracking_week_2 = pd.read_csv("/kaggle/input/nfl-big-data-bowl-2025/tracking_week_2.csv")
#tracking_week_3 = pd.read_csv("/kaggle/input/nfl-big-data-bowl-2025/tracking_week_3.csv")
#tracking_week_4 = pd.read_csv("/kaggle/input/nfl-big-data-bowl-2025/tracking_week_4.csv")
#tracking_week_5 = pd.read_csv("/kaggle/input/nfl-big-data-bowl-2025/tracking_week_5.csv")
#tracking_week_6 = pd.read_csv("/kaggle/input/nfl-big-data-bowl-2025/tracking_week_6.csv")
#tracking_week_7 = pd.read_csv("/kaggle/input/nfl-big-data-bowl-2025/tracking_week_7.csv")
#tracking_week_8 = pd.read_csv("/kaggle/input/nfl-big-data-bowl-2025/tracking_week_8.csv")
#tracking_week_9 = pd.read_csv("/kaggle/input/nfl-big-data-bowl-2025/tracking_week_9.csv")

In [None]:
games.head()

In [None]:
print(player_play.shape)
player_play.head()

In [None]:
print(players.shape)
players.head()

In [None]:
print(plays.shape)
plays.head()

In [None]:
plays.sort_values(by=["gameId", "playId"])

In [None]:
print(tracking_week_1.shape)
tracking_week_1.head()

In [None]:
tracking_week_1.gameId.unique()

In [None]:
tracking_week_1.playDirection.unique()

In [None]:
tracking_week_1[tracking_week_1.gameId == 2022091200].playId.unique()

In [None]:
tracking_week_1.frameType.unique()

In [None]:
play1 = tracking_week_1[(tracking_week_1.gameId == 2022091200) & (tracking_week_1.playId == 64)]

In [None]:
print(tracking_week_1.x.aggregate(["min", "max"]))
print(tracking_week_1.y.aggregate(["min", "max"]))

In [None]:
before_snap = play1[play1.frameType == "BEFORE_SNAP"]
snap = play1[play1.frameType == "SNAP"]
after_snap = play1[play1.frameType == "AFTER_SNAP"]

In [18]:
players = before_snap.displayName.unique()
players

array(['Kareem Jackson', 'Geno Smith', 'Gabe Jackson', 'Ronald Darby',
       'Randy Gregory', 'Tyler Lockett', 'DeShawn Williams',
       'Alex Singleton', 'Justin Simmons', 'Austin Blythe', 'D.J. Jones',
       'Bradley Chubb', 'Rashaad Penny', 'Will Dissly', 'Noah Fant',
       'DK Metcalf', "Dre'Mont Jones", 'Phil Haynes', 'Jonas Griffith',
       'Patrick Surtain', 'Charles Cross', 'Abraham Lucas', 'football'],
      dtype=object)

In [19]:
len(players)

23

In [None]:
player0 = before_snap[before_snap.displayName == players[0]]
player0

In [None]:
player0.loc[:, ["x", "y"]].isna().sum()

In [None]:
player0.time.aggregate(["min", "max"])

In [None]:
player0.displayName.reset_index(drop=True)

In [None]:
n = 0
frames = []
for frameType in [before_snap, snap, after_snap]:
    plt.xlim(0, 120)
    plt.ylim(0, 53.3)
    for i in range(len(frameType.displayName.unique())):
        player = frameType[frameType.displayName == players[i]].reset_index(drop=True)
        if player.displayName[0] == "football":
            plt.scatter(player.x, player.y, c="red")
            plt.text(list(player.x)[-1], list(player.y)[-1], "FB")
        else:
            plt.scatter(player.x, player.y, c=player.frameId)
            #plt.text(list(player.x)[-1], list(player.y)[-1], list(player.jerseyNumber)[-1])
    
    title = ""
    match n:
        case 0: 
            title = "before snap"
        case 1: 
            title = "snap"
        case 2: 
            title = "after snap"
    plt.title(title)
    
    plt.show()
    n+=1

In [None]:
after_snap = play1[play1.frameType == "AFTER_SNAP"]
print(len(player_with_ball(play1)))
player_with_ball(play1)

In [60]:
def predict_next_k_frames(play: pd.DataFrame, k: int) -> pd.DataFrame: 
    players = play['displayName'].unique()
    new_after_snap = pd.DataFrame()

    for value in players:
        player_data = after_snap.loc[play['displayName'] == value]
        x_data = [(row['frameId'], row['x']) for _, row in player_data.iterrows()]
        y_data = [(row['frameId'], row['y']) for _, row in player_data.iterrows()]

        X = np.array([point[0] for point in x_data]).reshape(-1, 1)  # Reshape to 2D array
        x_pred = np.array([point[1] for point in x_data])
        model_x = LinearRegression()
        model_x.fit(X, x_pred)
        max_frameid = y_data[-1][0]
        next_frame_ids = np.array([max_frameid + i for i in range(1, k + 1)]).reshape(-1, 1)
        predicted_x = model_x.predict(next_frame_ids)  
        
        Y = np.array([point[0] for point in y_data]).reshape(-1, 1)  # Reshape to 2D array
        y_pred = np.array([point[1] for point in y_data])
        model_y = LinearRegression()
        model_y.fit(Y, y_pred)
        max_frameid = y_data[-1][0]
        next_frame_ids = np.array([max_frameid + i for i in range(1, k + 1)]).reshape(-1, 1)
        predicted_y = model_y.predict(next_frame_ids)    
        

        new_rows = pd.DataFrame({
            'frameId': next_frame_ids.flatten(),
            'x': predicted_x.flatten(),
            'y': predicted_y.flatten()
        })

        for col in play.columns:
            if col not in new_rows.columns:  
                new_rows[col] = play[col].iloc[-1]
        
        updated_player_data = pd.concat([player_data, new_rows], ignore_index=True)
        new_after_snap = pd.concat([new_after_snap, updated_player_data], ignore_index=True)

    return new_after_snap

In [61]:
predict_next_k_frames(after_snap, 5)

Unnamed: 0,gameId,playId,nflId,displayName,frameId,frameType,time,jerseyNumber,club,playDirection,x,y,s,a,dis,o,dir,event
0,2022091200,64,35459.0,Kareem Jackson,115,AFTER_SNAP,2022-09-13 00:16:14.9,22.0,DEN,right,50.390000,29.340000,1.37,1.61,0.13,240.06,131.19,
1,2022091200,64,35459.0,Kareem Jackson,116,AFTER_SNAP,2022-09-13 00:16:15,22.0,DEN,right,50.520000,29.240000,1.65,1.76,0.16,238.47,129.12,
2,2022091200,64,35459.0,Kareem Jackson,117,AFTER_SNAP,2022-09-13 00:16:15.1,22.0,DEN,right,50.670000,29.120000,1.99,1.94,0.20,235.66,128.32,
3,2022091200,64,35459.0,Kareem Jackson,118,AFTER_SNAP,2022-09-13 00:16:15.2,22.0,DEN,right,50.850000,28.980000,2.32,2.03,0.23,231.72,127.25,
4,2022091200,64,35459.0,Kareem Jackson,119,AFTER_SNAP,2022-09-13 00:16:15.3,22.0,DEN,right,51.060000,28.820000,2.66,2.10,0.26,226.65,127.76,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1237,2022091200,64,,football,164,AFTER_SNAP,2022-09-13 00:16:19.7,,football,right,40.417194,-2.195944,0.33,2.76,0.04,,,
1238,2022091200,64,,football,165,AFTER_SNAP,2022-09-13 00:16:19.7,,football,right,40.551653,-2.743692,0.33,2.76,0.04,,,
1239,2022091200,64,,football,166,AFTER_SNAP,2022-09-13 00:16:19.7,,football,right,40.686112,-3.291440,0.33,2.76,0.04,,,
1240,2022091200,64,,football,167,AFTER_SNAP,2022-09-13 00:16:19.7,,football,right,40.820571,-3.839188,0.33,2.76,0.04,,,


In [17]:
after_snap

Unnamed: 0,gameId,playId,nflId,displayName,frameId,frameType,time,jerseyNumber,club,playDirection,x,y,s,a,dis,o,dir,event
114,2022091200,64,35459.0,Kareem Jackson,115,AFTER_SNAP,2022-09-13 00:16:14.9,22.0,DEN,right,50.390000,29.34,1.37,1.61,0.13,240.06,131.19,
115,2022091200,64,35459.0,Kareem Jackson,116,AFTER_SNAP,2022-09-13 00:16:15,22.0,DEN,right,50.520000,29.24,1.65,1.76,0.16,238.47,129.12,
116,2022091200,64,35459.0,Kareem Jackson,117,AFTER_SNAP,2022-09-13 00:16:15.1,22.0,DEN,right,50.670000,29.12,1.99,1.94,0.20,235.66,128.32,
117,2022091200,64,35459.0,Kareem Jackson,118,AFTER_SNAP,2022-09-13 00:16:15.2,22.0,DEN,right,50.850000,28.98,2.32,2.03,0.23,231.72,127.25,
118,2022091200,64,35459.0,Kareem Jackson,119,AFTER_SNAP,2022-09-13 00:16:15.3,22.0,DEN,right,51.060000,28.82,2.66,2.10,0.26,226.65,127.76,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3744,2022091200,64,,football,159,AFTER_SNAP,2022-09-13 00:16:19.3,,football,right,40.380001,1.39,2.04,3.87,0.23,,,tackle
3745,2022091200,64,,football,160,AFTER_SNAP,2022-09-13 00:16:19.4,,football,right,40.400002,1.21,1.64,3.49,0.18,,,
3746,2022091200,64,,football,161,AFTER_SNAP,2022-09-13 00:16:19.5,,football,right,40.419998,1.09,1.14,3.37,0.13,,,
3747,2022091200,64,,football,162,AFTER_SNAP,2022-09-13 00:16:19.6,,football,right,40.430000,0.99,0.87,2.49,0.10,,,
