In [6]:
import pandas as pd
import numpy as np
from pathlib import Path

BASE_PATH = Path('dataset/train')

df = pd.read_csv(BASE_PATH / 'input_2023_w01.csv')

print(df.shape)
df.head()

(285714, 23)


Unnamed: 0,game_id,play_id,player_to_predict,nfl_id,frame_id,play_direction,absolute_yardline_number,player_name,player_height,player_weight,...,player_role,x,y,s,a,dir,o,num_frames_output,ball_land_x,ball_land_y
0,2023090700,101,False,54527,1,right,42,Bryan Cook,6-1,210,...,Defensive Coverage,52.33,36.94,0.09,0.39,322.4,238.24,21,63.259998,-0.22
1,2023090700,101,False,54527,2,right,42,Bryan Cook,6-1,210,...,Defensive Coverage,52.33,36.94,0.04,0.61,200.89,236.05,21,63.259998,-0.22
2,2023090700,101,False,54527,3,right,42,Bryan Cook,6-1,210,...,Defensive Coverage,52.33,36.93,0.12,0.73,147.55,240.6,21,63.259998,-0.22
3,2023090700,101,False,54527,4,right,42,Bryan Cook,6-1,210,...,Defensive Coverage,52.35,36.92,0.23,0.81,131.4,244.25,21,63.259998,-0.22
4,2023090700,101,False,54527,5,right,42,Bryan Cook,6-1,210,...,Defensive Coverage,52.37,36.9,0.35,0.82,123.26,244.25,21,63.259998,-0.22


In [7]:
def standardize(df: pd.DataFrame) -> pd.DataFrame:
    """
    Create a direction-invariant view of all plays.
    
    Returns a new DataFrame where:
    - x_rel=0 is at the line of scrimmage (offense behind at negative x_rel, defense ahead at positive x_rel)
    - All plays show offense driving toward increasing x (left to right / bottom to top)
    - 'left' plays are flipped since they drive toward decreasing x
    - Orientation and direction angles are properly adjusted
    
    Original DataFrame is not modified.
    """
    # Create a copy to avoid modifying original
    df_rel = df.copy()
    
    # Determine which plays need flipping
    # 'left' means offense drives toward decreasing x (needs flip)
    # 'right' means offense drives toward increasing x (no flip needed)
    is_left = df_rel['play_direction'] == 'left'
    
    # For left plays, flip x coordinates (mirror horizontally)
    # Original: x goes 0 to 120
    # After flip: x' = 120 - x
    df_rel.loc[is_left, 'x'] = 120 - df_rel.loc[is_left, 'x']
    if 'ball_land_x' in df_rel.columns:
        df_rel.loc[is_left, 'ball_land_x'] = 120 - df_rel.loc[is_left, 'ball_land_x']
    
    # For left plays, flip y coordinates (mirror vertically)
    # This keeps players on same relative side after horizontal flip
    df_rel.loc[is_left, 'y'] = 53.3 - df_rel.loc[is_left, 'y']
    if 'ball_land_y' in df_rel.columns:
        df_rel.loc[is_left, 'ball_land_y'] = 53.3 - df_rel.loc[is_left, 'ball_land_y']
    
    # For left plays, flip orientation and direction angles
    # When mirroring horizontally and vertically, angle transforms as: θ' = 180° - θ
    df_rel.loc[is_left, 'o'] = abs(df_rel.loc[is_left, 'o'] - 180)
    df_rel.loc[is_left, 'dir'] = abs(df_rel.loc[is_left, 'dir'] - 180)
    
    # Also need to flip the absolute_yardline_number for left plays
    # because it's measured from their defending endzone
    df_rel.loc[is_left, 'absolute_yardline_number'] = 120 - df_rel.loc[is_left, 'absolute_yardline_number']
    
    # Make x relative to line of scrimmage (LOS at x=0)
    # Negative x = behind LOS (offense side), Positive x = past LOS (defense side)
    df_rel['x_rel'] = df_rel['x'] - df_rel['absolute_yardline_number']
    if 'ball_land_x' in df_rel.columns:
        df_rel['ball_land_x_rel'] = df_rel['ball_land_x'] - df_rel['absolute_yardline_number']
    
    # Make y relative to center of field
    df_rel['y_rel'] = df_rel['y'] - 26.65
    if 'ball_land_y' in df_rel.columns:
        df_rel['ball_land_y_rel'] = df_rel['ball_land_y'] - 26.65
    
    # Add distance to ball landing spot
    if 'ball_land_x' in df_rel.columns and 'ball_land_y' in df_rel.columns:
        df_rel['dist_to_ball'] = np.sqrt(
            (df_rel['x'] - df_rel['ball_land_x'])**2 + 
            (df_rel['y'] - df_rel['ball_land_y'])**2
        )
    
    return df_rel

In [8]:
df_rel = standardize(df)

print(f"Original shape: {df.shape}")
print(f"Standardized shape: {df_rel.shape}")
print(f"\nNew columns: {[col for col in df_rel.columns if col not in df.columns]}")

# Verify: all plays should now show offense moving left to right
# Check a left play and a right play
print("\n--- Sample left play (original) ---")
print(df[df['play_direction'] == 'left'][['x', 'y', 'o', 'absolute_yardline_number']].head(2))
print("\n--- Same play after standardization ---")
left_sample = df[df['play_direction'] == 'left'].index[:2]
print(df_rel.loc[left_sample, ['x', 'y', 'o', 'x_rel', 'y_rel']])

print("\n--- Sample right play (original) ---")
print(df[df['play_direction'] == 'right'][['x', 'y', 'o', 'absolute_yardline_number']].head(2))
print("\n--- Same play after standardization ---")
right_sample = df[df['play_direction'] == 'right'].index[:2]
print(df_rel.loc[right_sample, ['x', 'y', 'o', 'x_rel', 'y_rel']])

Original shape: (285714, 23)
Standardized shape: (285714, 28)

New columns: ['x_rel', 'ball_land_x_rel', 'y_rel', 'ball_land_y_rel', 'dist_to_ball']

--- Sample left play (original) ---
         x      y       o  absolute_yardline_number
234  84.92  32.43  100.58                        89
235  84.92  32.43  100.58                        89

--- Same play after standardization ---
         x      y      o  x_rel  y_rel
234  35.08  20.87  79.42   4.08  -5.78
235  35.08  20.87  79.42   4.08  -5.78

--- Sample right play (original) ---
       x      y       o  absolute_yardline_number
0  52.33  36.94  238.24                        42
1  52.33  36.94  236.05                        42

--- Same play after standardization ---
       x      y       o  x_rel  y_rel
0  52.33  36.94  238.24  10.33  10.29
1  52.33  36.94  236.05  10.33  10.29


In [9]:
def plot_play(df, game_id, play_id):
    import matplotlib.pyplot as plt
    from matplotlib.animation import FuncAnimation
    from IPython.display import HTML

    subset = df[(df['game_id'] == game_id) & (df['play_id'] == play_id)]

    # Get metadata
    ball_x_rel = subset['ball_land_x_rel'].iloc[
        0] if 'ball_land_x_rel' in subset.columns else None
    ball_y_rel = subset['ball_land_y_rel'].iloc[
        0] if 'ball_land_y_rel' in subset.columns else None
    frames = sorted(subset['frame_id'].unique())
    play_dir = subset['play_direction'].iloc[0]

    # Create figure - top-down view
    fig, ax = plt.subplots(figsize=(10, 14))

    # Initialize scatter plots
    scatter_offense = ax.scatter([], [],
                                 c='#DC2626',
                                 s=200,
                                 edgecolors='white',
                                 linewidths=2,
                                 label='Offense',
                                 zorder=10)
    scatter_defense = ax.scatter([], [],
                                 c='#1E3A8A',
                                 s=200,
                                 edgecolors='white',
                                 linewidths=2,
                                 label='Defense',
                                 zorder=10)
    scatter_ball = ax.scatter([], [],
                              c='gold',
                              s=400,
                              marker='*',
                              edgecolors='black',
                              linewidths=2,
                              zorder=15,
                              label='Ball Landing')

    # Store arrow objects for orientation
    arrows = []

    # Frame text
    frame_text = ax.text(0.02,
                         0.98,
                         '',
                         transform=ax.transAxes,
                         fontsize=12,
                         weight='bold',
                         va='top',
                         bbox=dict(boxstyle='round',
                                   facecolor='white',
                                   alpha=0.8))

    def init():
        # Field setup
        ax.set_xlim(-27, 27)
        ax.set_ylim(-15, 45)
        ax.set_facecolor('#00B140')
        ax.set_aspect('equal')

        # Line of scrimmage at x_rel = 0
        ax.axhline(0,
                   color='yellow',
                   linewidth=3,
                   label='Line of Scrimmage',
                   zorder=5)

        # Field center
        ax.axvline(0,
                   color='white',
                   linewidth=2,
                   alpha=0.5,
                   linestyle='--',
                   zorder=5)

        # Yard lines every 5 yards
        for yard in range(-10, 41, 5):
            ax.axhline(yard, color='white', linewidth=1, alpha=0.3)

        # Sidelines
        ax.axvline(-26.65, color='white', linewidth=2)
        ax.axvline(26.65, color='white', linewidth=2)

        # Ball landing spot
        if pd.notna(ball_x_rel) and pd.notna(ball_y_rel):
            scatter_ball.set_offsets([[ball_y_rel, ball_x_rel]])

        ax.set_xlabel('Y relative to center (yards)',
                      fontsize=12,
                      weight='bold')
        ax.set_ylabel('X relative to LOS (yards) - Offense ↓ | ↑ Defense',
                      fontsize=12,
                      weight='bold')
        ax.set_title(f'Game {game_id}, Play {play_id} ({play_dir} play)',
                     fontsize=14,
                     weight='bold')
        ax.legend(loc='upper right')
        ax.grid(True, alpha=0.2)

        return scatter_offense, scatter_defense, scatter_ball, frame_text

    def update(frame_num):
        nonlocal arrows

        frame_id = frames[frame_num]
        frame_data = subset[subset['frame_id'] == frame_id]

        # Remove old arrows
        for arrow in arrows:
            arrow.remove()
        arrows = []

        # Update offense positions (y_rel, x_rel for top-down)
        offense = frame_data[frame_data['player_side'] == 'Offense']
        if len(offense) > 0:
            scatter_offense.set_offsets(offense[['y_rel', 'x_rel']].values)
        else:
            scatter_offense.set_offsets([])

        # Update defense positions
        defense = frame_data[frame_data['player_side'] == 'Defense']
        if len(defense) > 0:
            scatter_defense.set_offsets(defense[['y_rel', 'x_rel']].values)
        else:
            scatter_defense.set_offsets([])

        # Draw orientation arrows
        for _, player in frame_data.iterrows():
            x_rel, y_rel = player['x_rel'], player['y_rel']

            if pd.notna(player['o']):
                # Orientation adjustment for top-down view
                # Standard: 0° = right, 90° = up
                # Top-down plot: horizontal = y_rel, vertical = x_rel
                # Need to rotate by 90° to convert from standard to top-down view
                orientation = player['o'] + 0
                orient_rad = np.radians(orientation)

                arrow_length = 2.0
                dx = arrow_length * np.cos(orient_rad)  # dy_rel component
                dy = arrow_length * np.sin(orient_rad)  # dx_rel component

                if player['player_position'] == 'QB':
                    color = "#DDFA4C"
                elif player['player_side'] == 'Offense':
                    color = "#D61919"
                else:
                    color = '#1E3A8A'

                arrow = ax.arrow(y_rel,
                                 x_rel,
                                 dx,
                                 dy,
                                 head_width=1.2,
                                 head_length=0.8,
                                 fc=color,
                                 ec=color,
                                 alpha=0.7,
                                 linewidth=2,
                                 zorder=8)
                arrows.append(arrow)

        # Update frame counter
        frame_text.set_text(f'Frame: {frame_id}/{frames[-1]}')

        return scatter_offense, scatter_defense, scatter_ball, frame_text

    # Create animation
    anim = FuncAnimation(fig,
                         update,
                         init_func=init,
                         frames=len(frames),
                         interval=100,
                         blit=True,
                         repeat=True)

    plt.close()
    return HTML(anim.to_jshtml())

In [11]:
random_game, random_play = df_rel.sample(1).filter(['game_id', 'play_id']).values.flatten()
plot_play(df_rel, random_game, random_play)