In [3]:
import pandas as pd
import numpy as np
import seaborn as sns 
import matplotlib.pyplot as plt
import plotly.express as px
from sklearn.preprocessing import LabelEncoder

  from pandas.core.computation.check import NUMEXPR_INSTALLED
  from pandas.core import (


## 1st Phase

In [3]:
# --- Step 0: Load Input Data ---
input_dfs = []

for week in range(1, 19):
    input_path = f"data/train/input_2023_w{week:02d}.csv"
    input_dfs.append(pd.read_csv(input_path))


In [4]:
# Combine all weeks
input_df = pd.concat(input_dfs, ignore_index=True)

In [5]:
# normalize each play within the table to go form left to right
df = input_df
df.loc[df["play_direction"] == "left", "x"] = 120 - df.loc[df["play_direction"]=="left", "x"]
df.loc[df["play_direction"] == "left", "y"] = 53.3 - df.loc[df["play_direction"]=="left", "y"]

df.loc[df["play_direction"]=="left", ["dir","o"]] = (
    180 + df.loc[df["play_direction"]=="left", ["dir","o"]]
) % 360

df.loc[df["play_direction"] == "left", "ball_land_x"] = 120 - df.loc[df["play_direction"]=="left", "ball_land_x"]
df.loc[df["play_direction"] == "left", "ball_land_y"] = 53.3 - df.loc[df["play_direction"]=="left", "ball_land_y"]

In [6]:
# add a line of scrimmage column to denote starting x value of play
# flip absolute yard line for plays going left
mask_left = df["play_direction"].str.lower() == "left"
mask_right = df["play_direction"].str.lower() == "right"

df.loc[mask_right, "line_of_scrimmage"] = df.loc[mask_right, "absolute_yardline_number"] - 10
df.loc[mask_left,  "line_of_scrimmage"] = 110 - df.loc[mask_left, "absolute_yardline_number"]
df.loc[mask_left,  "absolute_yardline_number"] = 120 - df.loc[mask_left, "absolute_yardline_number"]

In [7]:
# add columns to give relative x values to each value
df["x_LOS"] = df["x"] - df["absolute_yardline_number"]
df["ball_land_x_LOS"] = df["ball_land_x"] - df["absolute_yardline_number"]

In [8]:
# convert degrees to radians and create velcoity and orientation vectors

df["dir_rad"] = np.deg2rad(90 - df["dir"])
df["o_rad"] = np.deg2rad(90 - df["o"])


# Velocity vector (movement)
df["v_x"] = df["s"] * np.cos(df["dir_rad"])
df["v_y"] = df["s"] * np.sin(df["dir_rad"])

# Orientation vector (where they’re facing)
df["o_x"] = np.cos(df["o_rad"])
df["o_y"] = np.sin(df["o_rad"])

In [9]:
# distance to the landing spot of the ball
df["distance_to_ball"] = np.sqrt(
    (df["x"] - df["ball_land_x"])**2 +
    (df["y"] - df["ball_land_y"])**2
)

In [10]:
# convert height and birthdate to inches and years
df["player_height_inches"] = df["player_height"].apply(
    lambda h: int(h.split("-")[0]) * 12 + int(h.split("-")[1]) if isinstance(h, str) else np.nan
)

In [12]:
# encode player position with integers
le = LabelEncoder()
df["position_encoded"] = le.fit_transform(df["player_position"])

In [13]:
# drop player_side
# drop position
# all defensive players are labeled with defensive coverage player role
df = df.drop(columns=["player_height", "player_name", "play_direction", "player_side"])

In [14]:
# center all y values to go from range -26.65 to 26.65
df["y_centered"] = df["y"] - 26.65
df["ball_land_y_centered"] = df["ball_land_y"] - 26.65

In [15]:
# add kinetics 
df_kin = df.copy()

# get kinetics information
# momentum and kinetic energy
df_kin["momentum"] = df_kin["player_weight"] * df_kin["s"]
df_kin["kinetic_energy"] = 0.5 * df_kin["player_weight"] * df_kin["s"]**2

In [16]:
# get rolling features for speed, acceleration, x and y 
window = 3

df_kin = df_kin.sort_values(["game_id", "play_id", "nfl_id", "frame_id"])

df_kin["speed_roll_mean"] = df_kin.groupby(["game_id", "play_id", "nfl_id"])["s"].transform(lambda x: x.rolling(window, center=True).mean())
df_kin["acc_roll_mean"]   = df_kin.groupby(["game_id", "play_id", "nfl_id"])["a"].transform(lambda x: x.rolling(window, center=True).mean())

df_kin["x_smooth"] = df_kin.groupby(["game_id", "play_id", "nfl_id"])["x_LOS"].transform(lambda x: x.rolling(window, center=True).mean())
df_kin["y_smooth"] = df_kin.groupby(["game_id", "play_id", "nfl_id"])["y_centered"].transform(lambda x: x.rolling(window, center=True).mean())

# Compute smoother velocity & heading
df_kin["v_x_smooth"] = df_kin.groupby(["game_id", "play_id", "nfl_id"])["x_smooth"].diff() * 10
df_kin["v_y_smooth"] = df_kin.groupby(["game_id", "play_id", "nfl_id"])["y_smooth"].diff() * 10
df_kin["heading_smooth"] = np.arctan2(df_kin["v_y_smooth"], df_kin["v_x_smooth"])

In [17]:
df_kin = df_kin.fillna(0)

In [18]:
# angular velocity
frames_per_second = 10

def compute_ang_vel(series):
    # 1️⃣ difference between consecutive frames, wrapped to [-pi, pi]
    diff = np.arctan2(np.sin(series.diff()), np.cos(series.diff()))
    # 2️⃣ convert to radians per second
    return diff * frames_per_second

df_kin["angular_velocity"] = (
    df_kin.groupby(["game_id", "play_id", "nfl_id"])["o_rad"]
      .transform(compute_ang_vel)
)

df_kin["angular_velocity_smooth"] = (
    df_kin.groupby(["game_id", "play_id", "nfl_id"])["angular_velocity"]
      .transform(lambda x: x.rolling(window=3, min_periods=1).mean())
)

In [19]:
df_kin["angle_to_ball_rad"] = np.arctan2(
    df_kin["ball_land_y_centered"] - df_kin["y_centered"],
    df_kin["ball_land_x_LOS"] - df_kin["x_LOS"]
)

In [20]:
df_kin.to_csv("data/inputweeks.csv", index=False)

In [None]:
df = pd.read_csv("data/inputweeks.csv")