In [15]:
#Set up
import pandas as pd 
import numpy as np


input_df = pd.read_csv('../data/train/input_2023_w01.csv')

output_df = pd.read_csv('../data/train/output_2023_w01.csv')

print(output_df.groupby(['game_id', 'play_id'])['frame_id'].max())


supplemental_df = pd.read_csv('../data/supplementary_data.csv')

# merging supplemental data
df = pd.merge(input_df, supplemental_df, on=['game_id','play_id'], how='left')

# Filtering out Zone coverages
df_man =  df[~df['team_coverage_type'].str.contains("ZONE")]


game_id     play_id
2023090700  101        21
            194         9
            219         8
            361        16
            436         7
                       ..
2023091100  3618       11
            3684       12
            3716        7
            3942        9
            3987       11
Name: frame_id, Length: 819, dtype: int64


  supplemental_df = pd.read_csv('../data/supplementary_data.csv')


In [16]:
test_play = df_man[df_man['pass_result'] == 'IN'].iloc[0]
test_game_id = 2023090700
test_play_id = 3032

# Filter output data to just this one play
output_test = output_df[
    (output_df['game_id'] == test_game_id) & 
    (output_df['play_id'] == test_play_id)
]

# From input_df, who are the players on this play?
input_test = input_df[
    (input_df['game_id'] == test_game_id) & 
    (input_df['play_id'] == test_play_id)
]

# Look at player roles
print(input_test[['nfl_id', 'player_name', 'player_position', 'player_role']].drop_duplicates())

       nfl_id        player_name player_position         player_role
15964   54527         Bryan Cook              FS  Defensive Coverage
15984   54600    Joshua Williams              CB  Defensive Coverage
16004   46137        Justin Reid              SS  Defensive Coverage
16024   53487        Nick Bolton             MLB  Defensive Coverage
16044   54486     Trent McDuffie              CB  Defensive Coverage
16064   52471     Willie Gay Jr.             ILB  Defensive Coverage
16084   47856   David Montgomery              RB  Other Route Runner
16104   43584      Kalif Raymond              WR  Other Route Runner
16124   38696       Marvin Jones              WR  Other Route Runner
16144   55899        Sam LaPorta              TE  Other Route Runner
16164   43290         Jared Goff              QB              Passer
16184   53541  Amon-Ra St. Brown              WR   Targeted Receiver


In [20]:
# Constants
BOUNDARY_THRESHOLD = 12

# Get targeted receiver
targeted_wr = input_test[input_test['player_role'] == 'Targeted Receiver']
wr_id = targeted_wr['nfl_id'].iloc[0]
wr_name = targeted_wr['player_name'].iloc[0]

print(f"Targeted WR: {wr_name} (ID: {wr_id})")

# Get all CBs in coverage
cb_ids = input_test[
    (input_test['player_role'] == 'Defensive Coverage') & 
    (input_test['player_position'].isin(['CB', 'DB']))
]['nfl_id'].unique()

print(f"\nAll CBs on this play: {cb_ids}")

# Check if WR is at boundary position
wr_data = output_test[output_test['nfl_id'] == wr_id]
if len(wr_data) == 0:
    print("WR has no output data - skipping play")
else:
    wr_start = wr_data.iloc[0]
    wr_y = wr_start['y']
    is_boundary_wr = (wr_y < BOUNDARY_THRESHOLD) or (wr_y > (53.3 - BOUNDARY_THRESHOLD))
    
    print(f"\nWR y-position: {wr_y:.1f}")
    print(f"Is boundary WR: {is_boundary_wr}")
    
    if not is_boundary_wr:
        print("→ SLOT RECEIVER - SKIP THIS PLAY")
    else:
        # Filter CBs to boundary positions only
        boundary_cb_ids = []
        for cb_id in cb_ids:
            cb_data = output_test[output_test['nfl_id'] == cb_id]
            
            if len(cb_data) == 0:  # ← THIS IS THE KEY CHECK YOU'RE MISSING
                cb_name = input_test[input_test['nfl_id'] == cb_id]['player_name'].iloc[0]
                print(f"  {cb_name} (ID: {cb_id}): No output data, skipping")
                continue
            
            cb_start = cb_data.iloc[0]
            cb_y = cb_start['y']
            cb_name = input_test[input_test['nfl_id'] == cb_id]['player_name'].iloc[0]
            
            is_boundary = (cb_y < BOUNDARY_THRESHOLD) or (cb_y > (53.3 - BOUNDARY_THRESHOLD))
            
            print(f"  {cb_name}: y={cb_y:.1f}, boundary={is_boundary}")
            
            if is_boundary:
                boundary_cb_ids.append(cb_id)
        
        print(f"\nBoundary CBs: {boundary_cb_ids}")
        
        # Find closest boundary CB
        if len(boundary_cb_ids) > 0:
            cb_distances = {}
            for cb_id in boundary_cb_ids:
                cb_start = output_test[output_test['nfl_id'] == cb_id].iloc[0]
                distance = np.sqrt(
                    (cb_start['x'] - wr_start['x'])**2 + 
                    (cb_start['y'] - wr_start['y'])**2
                )
                cb_distances[cb_id] = distance
                cb_name = input_test[input_test['nfl_id'] == cb_id]['player_name'].iloc[0]
                print(f"  {cb_name}: {distance:.2f} yards")
            
            primary_cb_id = min(cb_distances, key=cb_distances.get)
            primary_cb_name = input_test[input_test['nfl_id'] == primary_cb_id]['player_name'].iloc[0]
            
            print(f"\n✓ Primary CB: {primary_cb_name} (ID: {primary_cb_id})")
            print(f"✓ Distance: {cb_distances[primary_cb_id]:.2f} yards")
        else:
            print("→ No boundary CBs with output data found - SKIP THIS PLAY")

Targeted WR: Amon-Ra St. Brown (ID: 53541)

All CBs on this play: [54600 54486]

WR y-position: 42.2
Is boundary WR: True
  Joshua Williams (ID: 54600): No output data, skipping
  Trent McDuffie (ID: 54486): No output data, skipping

Boundary CBs: []
→ No boundary CBs with output data found - SKIP THIS PLAY


In [21]:
# Find plays with boundary receivers
for idx in range(len(df_man)):
    test_play = df_man.iloc[idx]
    test_game_id = test_play['game_id']
    test_play_id = test_play['play_id']
    
    # Get input and output for this play
    input_test = input_df[
        (input_df['game_id'] == test_game_id) & 
        (input_df['play_id'] == test_play_id)
    ]
    
    output_test = output_df[
        (output_df['game_id'] == test_game_id) & 
        (output_df['play_id'] == test_play_id)
    ]
    
    # Check if targeted WR is boundary
    targeted_wr = input_test[input_test['player_role'] == 'Targeted Receiver']
    if len(targeted_wr) == 0:
        continue
        
    wr_id = targeted_wr['nfl_id'].iloc[0]
    wr_data = output_test[output_test['nfl_id'] == wr_id]
    
    if len(wr_data) == 0:
        continue
        
    wr_y = wr_data.iloc[0]['y']
    is_boundary = (wr_y < 12) or (wr_y > 41)
    
    if is_boundary:
        print(f"Found boundary play: game_id={test_game_id}, play_id={test_play_id}")
        print(f"  WR y-position: {wr_y:.1f}")
        print(f"  Pass result: {test_play['pass_result']}")
        break

Found boundary play: game_id=2023090700, play_id=3032
  WR y-position: 42.2
  Pass result: I


In [None]:
## stopped here. need to find plays with boundary cbs