In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
import pickle

In [42]:
import matplotlib.pyplot as plt
import plotly.express as px
import matplotlib.animation as animation
from IPython.display import HTML

In [108]:
input_week_2_dir = "/Users/akshayprabhu/Desktop/Kaggle NFL/data/train/input_2023_w02.csv"
input_df = pd.read_csv(input_week_2_dir)

output_week_2_dir = "/Users/akshayprabhu/Desktop/Kaggle NFL/data/train/output_2023_w02.csv"
output_df = pd.read_csv(output_week_2_dir)

filename = 'model1.sav'
model = pickle.load(open(filename, 'rb'))

In [153]:
K = input_df[(input_df['player_name'] == "Justin Jefferson")]
K = K[K['player_role'] == 'Targeted Receiver']

In [154]:
K.sample(10)

Unnamed: 0,game_id,play_id,player_to_predict,nfl_id,frame_id,play_direction,absolute_yardline_number,player_name,player_height,player_weight,...,player_role,x,y,s,a,dir,o,num_frames_output,ball_land_x,ball_land_y
8644,2023091400,2450,True,52430,18,left,85,Justin Jefferson,6-3,192,...,Targeted Receiver,79.72,15.36,6.79,0.91,266.47,268.19,13,72.440002,6.31
14767,2023091400,3486,True,52430,6,right,86,Justin Jefferson,6-3,192,...,Targeted Receiver,83.75,35.56,2.04,2.4,35.66,90.61,17,97.900002,47.779999
4279,2023091400,1072,True,52430,29,left,22,Justin Jefferson,6-3,192,...,Targeted Receiver,24.84,29.9,8.66,1.07,346.44,16.77,10,22.639999,42.009998
11762,2023091400,2793,True,52430,27,left,74,Justin Jefferson,6-3,192,...,Targeted Receiver,70.96,25.34,7.1,1.99,356.93,18.08,10,70.830002,35.889999
16136,2023091400,3836,True,52430,20,right,35,Justin Jefferson,6-3,192,...,Targeted Receiver,40.22,10.41,6.41,1.85,82.05,82.97,10,45.509998,16.92
1114,2023091400,318,True,52430,1,right,50,Justin Jefferson,6-3,192,...,Targeted Receiver,48.85,10.02,0.0,0.0,59.67,83.85,7,48.279999,14.38
982,2023091400,293,True,52430,4,right,35,Justin Jefferson,6-3,192,...,Targeted Receiver,33.8,39.84,0.0,0.0,116.84,79.08,18,48.889999,14.68
9233,2023091400,2500,True,52430,16,left,72,Justin Jefferson,6-3,192,...,Targeted Receiver,69.74,9.62,4.4,2.14,290.93,277.32,9,66.139999,14.27
6192,2023091400,1677,True,52430,16,left,63,Justin Jefferson,6-3,192,...,Targeted Receiver,61.54,9.57,4.99,0.82,260.29,276.46,10,60.169998,9.35
18430,2023091400,4107,True,52430,3,right,85,Justin Jefferson,6-3,192,...,Targeted Receiver,84.1,14.44,0.12,0.09,88.48,67.1,15,101.230003,7.75


In [109]:
def get_play(df=input_df, game_id=2023091400, play_id=2523):
    play_df = (
        df[(df["game_id"] == game_id) & (df["play_id"] == play_id)]
        .sort_values(['nfl_id', 'frame_id'])
    )
    
    return play_df

In [172]:
input_play = get_play(game_id=2023091400, play_id=4107)
output_play = output_df[(output_df['game_id'] == input_play['game_id'].iloc[0]) & 
                        (output_df['play_id'] == input_play['play_id'].iloc[0])]

In [173]:
def merge_df(input_df, output_df):
    '''
    Merge input dataframe and output data frame on gameand play id. Add context to output data, 
    marking them as post throw frames and adding time relative time after throw. 
    '''
    
    columns = ['game_id', 'play_id', 'nfl_id', 'frame_id', 'player_to_predict','play_direction', 
        'absolute_yardline_number','player_height', 'player_weight', 'player_position', 'player_side', 
           'player_role', 'x', 'y', 'num_frames_output', 'ball_land_x', 'ball_land_y']
    
    # Step 1: get the max input frame per play
    frame_offsets = (
        input_df.groupby(["game_id", "play_id"])["frame_id"]
        .max()
        .reset_index()
        .rename(columns={"frame_id": "maxInputFrame"})
    )

    # Step 2: merge offsets into output_df
    output_df = output_df.copy()
    output_df.loc[:,'is_post_throw'] = 1
    output_shifted = output_df.merge(frame_offsets, on=["game_id", "play_id"], how="left")

    # Step 3: shift the output frameIds
    output_shifted["frame_id"] = output_shifted["frame_id"] + output_shifted["maxInputFrame"]

    # Step 4: (optional) bring over team/position info from input
    output_shifted = output_shifted.merge(
        input_df[["game_id", "play_id", "nfl_id"]].drop_duplicates(),
        on=["game_id", "play_id", "nfl_id"],
        how="left"
    )

    # Step 5: combine input + adjusted output
    df = pd.concat([input_df, output_shifted.drop(columns=["maxInputFrame"])], ignore_index=True)
    
    return df

def add_throw_context(df):
    
    df["is_post_throw"] = df["play_direction"].isna().astype(np.int8)
    # Step 6: add a new feature 'throw_frame_id' that represents frame relative to time of throw
    throw_frame = (
        df.loc[df["is_post_throw"] == 1]
          .groupby(["game_id","play_id"])["frame_id"]
          .min()
          .rename("throw_frame_id")
    )

    df = df.merge(throw_frame, on=["game_id","play_id"], how="left")

    # fallback: no post-throw → use last frame of the play
    fallback = df.groupby(["game_id","play_id"])["frame_id"].transform("max")
    df["throw_frame_id"] = df["throw_frame_id"].fillna(fallback)
    
    # Step 7: Add 't_rel_frames' 
    df["t_rel_frames"] = df["frame_id"] - df["throw_frame_id"]
    
    group_cols = ["game_id", "play_id", "nfl_id"]

    df = df.sort_values(group_cols + ["frame_id"])
    first = df.groupby(group_cols).transform("first")
    
    return df.fillna(first).infer_objects(copy=False)

In [174]:
def regularize_play(df):
    df = df.sort_values(
        by=['game_id', 'play_id', 'nfl_id'],
        ascending=[True, True, True]
    ).reset_index(drop=True)
    
    df.loc[df['play_direction'] == 'right', 'LOS'] = 10 + df['absolute_yardline_number']
    df.loc[df['play_direction'] == 'left', 'LOS'] = 110 - df['absolute_yardline_number']
    
    
    df.loc[df['play_direction'] == 'right', 'ball_land_x'] = 10 + df['ball_land_x'] - df['LOS']
    df.loc[df['play_direction'] == 'left', 'ball_land_x'] = 110 - df['ball_land_x'] - df['LOS']
    
    
    df.loc[df['play_direction'] == 'left', 'ball_land_y'] = 53.3 - df['ball_land_y']
    
    df.loc[df['play_direction'] == 'right', 'x'] = 10 + df['x'] - df['LOS']
    df.loc[df['play_direction'] == 'left', 'x'] = 110 - df['x'] - df['LOS']
    
    df.loc[df['play_direction'] == 'left', 'y'] = 53.3 - df['y']
    
    return df

In [175]:
def three_window_horizon(df, inference=False):
    df = df[df['player_to_predict'] == True]
    df = df.sort_values(['game_id', 'play_id', 'nfl_id', 'frame_id'])
    
    ## get delta x,y values
    df[['delta_x', 'delta_y']] = (
        df.groupby(['game_id', 'play_id', 'nfl_id'])[['x', 'y']]
            .diff(-1)   # (x_t - x_{t-1}); shift if you want it aligned at t
    )
    
    df.loc[:, 'delta_x'] = df['delta_x'] * -1
    df.loc[:, 'delta_y'] = df['delta_y'] * -1
    
    ## get shifted values
    for k in [1, 2]:
        df[f"x_{k}"] = df.groupby(["game_id","play_id","nfl_id"])["x"].shift(k)
        df[f"y_{k}"] = df.groupby(["game_id","play_id","nfl_id"])["y"].shift(k)
    
    
    df["2_frame_velocity"] = np.sqrt(
        (df["x"] - df["x_2"])**2 + (df["y"] - df["y_2"])**2
    ) / 0.2
    
    df["1_frame_velocity"] = np.sqrt(
        (df["x"] - df["x_1"])**2 + (df["y"] - df["y_1"])**2
    ) / 0.1
    
    df["acceleration"] = (
        df["1_frame_velocity"]
        - df.groupby(["game_id","play_id","nfl_id"])["1_frame_velocity"].shift(1)
    ) / 0.1
    
    dx = df["x"] - df.groupby(["game_id","play_id","nfl_id"])["x"].shift(1)
    dy = df["y"] - df.groupby(["game_id","play_id","nfl_id"])["y"].shift(1)

    df["direction_rad"] = np.arctan2(dy, dx)      # angle in radians
    df["dir_sin"] = np.sin(df["direction_rad"])   # model-friendly
    df["dir_cos"] = np.cos(df["direction_rad"])
    
    if inference:
        df = df.dropna(subset=['2_frame_velocity'])
    else:
        df = df.dropna()
    
    return df

In [176]:
def make_play(input_play, output_play):
    
    true_df = merge_df(input_play, output_play)
    true_df['is_post_throw'] = true_df.loc[:,'is_post_throw'].fillna(0)
    
    cols = ['game_id', 'play_id', 'nfl_id', 'frame_id',
        'play_direction', 'player_name', 'player_side', 'player_role',
        'x', 'y', 'ball_land_x', 'ball_land_y', 'is_post_throw']

    true_df = true_df[cols]
    
    true_df = true_df.sort_values(["nfl_id", "frame_id"])
    first = true_df.groupby(["nfl_id"]).transform("first")
    return true_df.fillna(first).infer_objects(copy=False)

In [177]:
true = make_play(input_play, output_play)

In [178]:
input_play = add_throw_context(input_play)
input_play = regularize_play(input_play)
input_play = three_window_horizon(input_play, inference=True)

In [179]:
num_features = [
    "x", "y",          # current position
    'x_1', 'y_1', 'x_2', 'y_2', # prev positions
    '2_frame_velocity', '1_frame_velocity', 'acceleration',# kinematics
    'direction_rad','dir_sin', 'dir_cos', # direction
    "LOS",             # context
    "ball_land_x", "ball_land_y", # ball landing spot
    'is_post_throw', 'throw_frame_id', 't_rel_frames' # frame info
]

cat_features = [
    "player_position",
    "player_role"
]

In [180]:
def infer(play_df):
    out = []

    for nfl_id, player_df in play_df.groupby("nfl_id"):
        player_df = player_df.sort_values("frame_id").copy()
        n_frames = int(player_df["num_frames_output"].iloc[0])
        name = player_df["player_name"].iloc[0]

        for _ in range(n_frames):
            
            frame = player_df.iloc[[-1]]
            frame_count = int(frame['frame_id'].values[0])
            X = frame[num_features + cat_features]

            y = model.predict(X)
            delta_x, delta_y = y[0]

            nx = delta_x + frame["x"].values[0]
            ny = delta_y + frame["y"].values[0]

            x_1, y_1 = frame["x"].values[0], frame["y"].values[0]
            x_2, y_2 = player_df.iloc[-2][["x", "y"]]

            two_frame_velocity = np.sqrt((nx - x_2)**2 + (ny - y_2)**2) / 0.2
            one_frame_velocity = np.sqrt((nx - x_1)**2 + (ny - y_1)**2) / 0.1
            acceleration = (one_frame_velocity - frame["1_frame_velocity"].values[0]) / 0.1

            dx, dy = nx - x_1, ny - y_1
            direction_rad = np.arctan2(dy, dx)

            new_row = {
                "x": nx,
                "y": ny,
                "x_1": x_1,
                "y_1": y_1,
                "x_2": x_2,
                "y_2": y_2,
                "2_frame_velocity": two_frame_velocity,
                "1_frame_velocity": one_frame_velocity,
                "acceleration": acceleration,
                "direction_rad": direction_rad,
                "dir_sin": np.sin(direction_rad),
                "dir_cos": np.cos(direction_rad),
                "LOS": frame["LOS"].values[0],
                "ball_land_x": frame["ball_land_x"].values[0],
                "ball_land_y": frame["ball_land_y"].values[0],
                "is_post_throw": 1,
                "throw_frame_id": frame["throw_frame_id"].values[0],
                "t_rel_frames": frame["t_rel_frames"].values[0] + 1,
                "player_position": frame["player_position"].values[0],
                "player_role": frame["player_role"].values[0],
                "nfl_id": nfl_id,
                "game_id": frame["game_id"].values[0],
                "play_id": frame["play_id"].values[0],
                "player_name": name,
                "frame_id": frame_count+1,
                "play_direction": frame["play_direction"].values[0],
                "absolute_yardline_number": frame["absolute_yardline_number"].values[0]
            }

            player_df = pd.concat(
                [player_df, pd.DataFrame([new_row])],
                ignore_index=True
            )

        out.append(player_df)

    return pd.concat(out, ignore_index=True)

In [181]:
df = infer(input_play)

In [182]:
def regularize_play_2(df):
    cols_infer = ['game_id', 'play_id', 'nfl_id', 'frame_id', "throw_frame_id",
              'absolute_yardline_number', 'play_direction', 'LOS',
         'player_name', 'player_side', 'player_role',
        'x', 'y', 'ball_land_x', 'ball_land_y']
    df = df[cols_infer]
    
    df = df.sort_values(
        by=['game_id', 'play_id', 'nfl_id'],
        ascending=[True, True, True]
    ).reset_index(drop=True)
    
    df.loc[df['play_direction'] == 'right', 'pred_x'] =  df['x'] + df['LOS'] - 10
    df.loc[df['play_direction'] == 'left', 'pred_x'] =  (df['x'] +  df['LOS'] - 110) * -1
    
    df.loc[df['play_direction'] == 'left', 'pred_y'] =  (df['y'] - 53.3) * -1
    df.loc[df['play_direction'] == 'right', 'pred_y'] =  df['y']

    
    return df

In [183]:
df = regularize_play_2(df)

In [184]:
def get_preds(true_df, df):

    true_df = true_df.merge(
        df[["nfl_id", "frame_id","pred_x", "pred_y"]],
        on=["nfl_id", "frame_id"],
        how="left"
    )

    return true_df

In [185]:
play = get_preds(true, df)

In [186]:
# Create the field (120 x 53.3 yards in NFL Next Gen Stats coords)
def draw_field(ax=None):
    if ax is None:
        ax = plt.gca()
    # Field outline
    ax.set_xlim(0, 120)
    ax.set_ylim(0, 53.3)
    ax.set_facecolor("mediumseagreen")
    # End zones
    ax.add_patch(plt.Rectangle((0, 0), 10, 53.3, color="lightgrey"))
    ax.add_patch(plt.Rectangle((110, 0), 10, 53.3, color="lightgrey"))
    
    for x in range(10, 111, 5):
        if x % 10 == 0:  
            # Solid line every 10 yards
            ax.plot([x, x], [0, 53.3], color="white", linewidth=2)
        else:
            # Dashed line every 5 yards
            ax.plot([x, x], [0, 53.3], color="white", linewidth=1, linestyle="--")
            
    # Hash marks: every yard line from 1 to 99
    for x in range(10, 111):
        # Left side hash
        ax.plot([x, x], [23.36-0.4, 23.36+0.4], color="white", linewidth=2)
        # Right side hash
        ax.plot([x, x], [29.94-0.4, 29.94+0.4], color="white", linewidth=2)
    
    # Yard numbers every 10 yards (don’t label inside endzones)
    for x in range(20, 110, 10):
        num = x - 10 if x <= 50 else 110 - x  # Flip numbers after midfield
        ax.text(x, 53.3 - 2, str(num), color="white", ha="center", va="center", fontsize=14, weight="bold")
        ax.text(x, 2, str(num), color="white", ha="center", va="center", fontsize=14, weight="bold", rotation=180)
        
    return ax

In [187]:
temp = play[play["player_role"] == "Other Route Runner"]
max_input_frame = int(temp["frame_id"].max())

frozen = (play[play["frame_id"] == max_input_frame]
          .sort_values("nfl_id")
          .copy())

def update(frame):
    ax.clear()
    draw_field(ax)

    frame_data = play[play["frame_id"] == frame]

    # Pre-throw: plot everyone normally
    if frame <= max_input_frame:
        plot_df = frame_data
    else:
        # Post-throw: only predicted players exist in frame_data
        moving = frame_data

        # Keep everyone else frozen at max_input_frame (exclude movers so they don't duplicate)
        static = frozen[~frozen["nfl_id"].isin(moving["nfl_id"].unique())]

        plot_df = pd.concat([moving, static], ignore_index=True)

    # TRUE positions (moving + frozen)
    off = plot_df[plot_df["player_side"] == "Offense"]
    deff = plot_df[plot_df["player_side"] == "Defense"]

    ax.scatter(off["x"], off["y"], c="blue", s=100, label="Offense", zorder=5)
    ax.scatter(deff["x"], deff["y"], c="red", s=100, label="Defense", zorder=5)

    # PREDICTED positions (post-throw only, only for rows that actually have preds)
    post = plot_df[(plot_df["is_post_throw"] == 1) & plot_df["pred_x"].notna() & plot_df["pred_y"].notna()]

    if not post.empty:
        ax.scatter(
            post[post["player_side"] == "Offense"]["pred_x"],
            post[post["player_side"] == "Offense"]["pred_y"],
            c="cyan", s=90, alpha=0.6, label="Offense (Pred)", zorder=6
        )
        ax.scatter(
            post[post["player_side"] == "Defense"]["pred_x"],
            post[post["player_side"] == "Defense"]["pred_y"],
            c="maroon", s=90, alpha=0.6, label="Defense (Pred)", zorder=6
        )

    # Ball landing spot
    play_meta = play.dropna(subset=["ball_land_x", "ball_land_y"]).head(1)
    if not play_meta.empty:
        ax.scatter(play_meta["ball_land_x"], play_meta["ball_land_y"],
                   c="yellow", s=120, marker="*", label="Ball Landing", zorder=6)

    ax.set_title(f"Frame {frame}")
    ax.legend(loc="upper center", bbox_to_anchor=(0.5, -0.1), ncol=5)
    return ax.collections


fig, ax = plt.subplots(figsize=(12, 6))

# --- Animate ---
ani = animation.FuncAnimation(
    fig, update,
    frames=sorted(play["frame_id"].unique()),
    interval=100,
    blit=False  # keep False since we redraw the whole field
)
plt.close(fig)
HTML(ani.to_jshtml())