# NFL Big Data Bowl 2026 - Player Matchup Matrix

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path
import warnings
warnings.filterwarnings('ignore')

pd.set_option('display.max_columns', None)
sns.set_style("whitegrid")
plt.rcParams['figure.figsize'] = (16, 8)
plt.rcParams['font.size'] = 11

print("✅ Setup complete!")

✅ Setup complete!


## 1. Load Data

In [None]:
DATA_DIR = Path('../data')
PROCESSED_DIR = DATA_DIR / 'processed'
COMPETITION_DIR = DATA_DIR / '114239_nfl_competition_files_published_analytics_final'
TRAIN_DIR = COMPETITION_DIR / 'train'
OUTPUT_DIR = PROCESSED_DIR / 'definisive_matchups'
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)

print("Loading data...")

convergence_df = pd.read_csv(PROCESSED_DIR / 'convergence_speed_all_plays.csv')
print(f"✓ Convergence data: {len(convergence_df):,} rows")

supp_data = pd.read_csv(COMPETITION_DIR / 'supplementary_data.csv')
print(f"✓ Supplementary data: {len(supp_data):,} rows")

merged_df = convergence_df.merge(
    supp_data,
    on=['game_id', 'play_id', 'week'],
    how='left'
)

print(f"✓ Merged data: {len(merged_df):,} rows")

defenders = merged_df[merged_df['player_role'] == 'Defensive Coverage'].copy()
print(f"✓ Defender instances: {len(defenders):,}")

Loading data...
✓ Convergence data: 173,150 rows
✓ Supplementary data: 18,009 rows
✓ Merged data: 173,150 rows
✓ Defender instances: 94,293


In [3]:
defenders.head()

Unnamed: 0,week,game_id,play_id,nfl_id,player_role,player_position,initial_distance,final_distance,min_distance,distance_change,time_elapsed,convergence_speed,avg_speed,max_speed,num_frames,season,game_date,game_time_eastern,home_team_abbr,visitor_team_abbr,play_description,quarter,game_clock,down,yards_to_go,possession_team,defensive_team,yardline_side,yardline_number,pre_snap_home_score,pre_snap_visitor_score,play_nullified_by_penalty,pass_result,pass_length,offense_formation,receiver_alignment,route_of_targeted_receiver,play_action,dropback_type,dropback_distance,pass_location_type,defenders_in_the_box,team_coverage_man_zone,team_coverage_type,penalty_yards,pre_penalty_yards_gained,yards_gained,expected_points,expected_points_added,pre_snap_home_team_win_probability,pre_snap_visitor_team_win_probability,home_team_win_probability_added,visitor_team_win_probility_added
0,1,2023090700,101,54527,Defensive Coverage,FS,38.734099,36.314143,36.314143,-2.419957,2.6,0.930753,2.026538,3.68,26,2023,09/07/2023,20:20:00,KC,DET,(14:25) (Shotgun) J.Goff pass incomplete deep ...,1,14:25,3,3,DET,KC,DET,32,0,0,N,I,22,SHOTGUN,2x2,CORNER,False,TRADITIONAL,2.13,INSIDE_BOX,6,ZONE_COVERAGE,COVER_2_ZONE,,0,0,0.927021,-2.145443,0.590426,0.409574,0.04972,-0.04972
1,1,2023090700,101,46137,Defensive Coverage,SS,24.078863,19.375388,19.375388,-4.703475,2.6,1.809029,2.212308,5.34,26,2023,09/07/2023,20:20:00,KC,DET,(14:25) (Shotgun) J.Goff pass incomplete deep ...,1,14:25,3,3,DET,KC,DET,32,0,0,N,I,22,SHOTGUN,2x2,CORNER,False,TRADITIONAL,2.13,INSIDE_BOX,6,ZONE_COVERAGE,COVER_2_ZONE,,0,0,0.927021,-2.145443,0.590426,0.409574,0.04972,-0.04972
2,1,2023090700,101,52546,Defensive Coverage,CB,22.288868,19.820143,19.820143,-2.468725,2.6,0.94951,1.91,3.17,26,2023,09/07/2023,20:20:00,KC,DET,(14:25) (Shotgun) J.Goff pass incomplete deep ...,1,14:25,3,3,DET,KC,DET,32,0,0,N,I,22,SHOTGUN,2x2,CORNER,False,TRADITIONAL,2.13,INSIDE_BOX,6,ZONE_COVERAGE,COVER_2_ZONE,,0,0,0.927021,-2.145443,0.590426,0.409574,0.04972,-0.04972
3,1,2023090700,101,53487,Defensive Coverage,MLB,33.570587,26.305413,26.305413,-7.265174,2.6,2.794298,2.965,5.49,26,2023,09/07/2023,20:20:00,KC,DET,(14:25) (Shotgun) J.Goff pass incomplete deep ...,1,14:25,3,3,DET,KC,DET,32,0,0,N,I,22,SHOTGUN,2x2,CORNER,False,TRADITIONAL,2.13,INSIDE_BOX,6,ZONE_COVERAGE,COVER_2_ZONE,,0,0,0.927021,-2.145443,0.590426,0.409574,0.04972,-0.04972
4,1,2023090700,101,54486,Defensive Coverage,CB,44.243102,44.342922,44.243102,0.099821,2.6,-0.038393,2.556923,4.83,26,2023,09/07/2023,20:20:00,KC,DET,(14:25) (Shotgun) J.Goff pass incomplete deep ...,1,14:25,3,3,DET,KC,DET,32,0,0,N,I,22,SHOTGUN,2x2,CORNER,False,TRADITIONAL,2.13,INSIDE_BOX,6,ZONE_COVERAGE,COVER_2_ZONE,,0,0,0.927021,-2.145443,0.590426,0.409574,0.04972,-0.04972


In [4]:
# LOAD PLAYER NAMES FROM ALL INPUT FILES (MODIFIED)
print("\n" + "="*80)
print("LOADING PLAYER NAMES FROM ALL INPUT FILES")
print("="*80)

input_files = sorted(TRAIN_DIR.glob('input_2023_w*.csv'))
print(f"Found {len(input_files)} input files")

all_players = []

if len(input_files) > 0:
    for input_file in input_files:
        print(f"Loading: {input_file.name}...", end=" ")
        try:
            input_df = pd.read_csv(input_file)
            
            if 'nfl_id' in input_df.columns and 'player_name' in input_df.columns:
                file_players = input_df[['nfl_id', 'player_name', 'player_position']].drop_duplicates('nfl_id')
                all_players.append(file_players)
                print(f"✓ {len(file_players)} unique players")
            else:
                print(f"⚠️ Missing required columns")
        except Exception as e:
            print(f"❌ Error: {e}")
    
    if len(all_players) > 0:
        # Combine all player data and keep first occurrence of each nfl_id
        player_lookup = pd.concat(all_players, ignore_index=True).drop_duplicates('nfl_id', keep='first')
        player_lookup.columns = ['nfl_id', 'player_name', 'position']
        
        print(f"\n✅ TOTAL UNIQUE PLAYERS LOADED: {len(player_lookup)}")
        
        # Add team info
        if 'defensive_team' in defenders.columns:
            team_lookup = defenders.groupby('nfl_id')['defensive_team'].agg(
                lambda x: x.mode()[0] if len(x.mode()) > 0 else 'UNK'
            ).reset_index()
            team_lookup.columns = ['nfl_id', 'team']
            player_lookup = player_lookup.merge(team_lookup, on='nfl_id', how='left')
            player_lookup['team'] = player_lookup['team'].fillna('UNK')
            print(f"✓ Added team info")
        
        # CRITICAL: Filter defenders to only include players we have names for
        print(f"\nBefore filtering: {defenders['nfl_id'].nunique()} unique defenders")
        defenders = defenders[defenders['nfl_id'].isin(player_lookup['nfl_id'])].copy()
        print(f"After filtering: {defenders['nfl_id'].nunique()} unique defenders")
        print(f"✓ Filtered to only players with metadata\n")
        
        print(f"📋 Sample of loaded player names:")
        print(player_lookup.head(15).to_string(index=False))
    else:
        print("❌ No player data could be loaded!")
        player_lookup = None
else:
    print("❌ No input files found!")
    player_lookup = None


LOADING PLAYER NAMES FROM ALL INPUT FILES
Found 19 input files
Loading: input_2023_w01 - sample.csv... ✓ 165 unique players
Loading: input_2023_w01.csv... ✓ 737 unique players
Loading: input_2023_w02.csv... ✓ 746 unique players
Loading: input_2023_w03.csv... ✓ 754 unique players
Loading: input_2023_w04.csv... ✓ 750 unique players
Loading: input_2023_w05.csv... ✓ 656 unique players
Loading: input_2023_w06.csv... ✓ 706 unique players
Loading: input_2023_w07.csv... ✓ 611 unique players
Loading: input_2023_w08.csv... ✓ 740 unique players
Loading: input_2023_w09.csv... ✓ 671 unique players
Loading: input_2023_w10.csv... ✓ 671 unique players
Loading: input_2023_w11.csv... ✓ 650 unique players
Loading: input_2023_w12.csv... ✓ 745 unique players
Loading: input_2023_w13.csv... ✓ 614 unique players
Loading: input_2023_w14.csv... ✓ 704 unique players
Loading: input_2023_w15.csv... ✓ 754 unique players
Loading: input_2023_w16.csv... ✓ 757 unique players
Loading: input_2023_w17.csv... ✓ 741 unique

In [5]:
defenders.head()

Unnamed: 0,week,game_id,play_id,nfl_id,player_role,player_position,initial_distance,final_distance,min_distance,distance_change,time_elapsed,convergence_speed,avg_speed,max_speed,num_frames,season,game_date,game_time_eastern,home_team_abbr,visitor_team_abbr,play_description,quarter,game_clock,down,yards_to_go,possession_team,defensive_team,yardline_side,yardline_number,pre_snap_home_score,pre_snap_visitor_score,play_nullified_by_penalty,pass_result,pass_length,offense_formation,receiver_alignment,route_of_targeted_receiver,play_action,dropback_type,dropback_distance,pass_location_type,defenders_in_the_box,team_coverage_man_zone,team_coverage_type,penalty_yards,pre_penalty_yards_gained,yards_gained,expected_points,expected_points_added,pre_snap_home_team_win_probability,pre_snap_visitor_team_win_probability,home_team_win_probability_added,visitor_team_win_probility_added
0,1,2023090700,101,54527,Defensive Coverage,FS,38.734099,36.314143,36.314143,-2.419957,2.6,0.930753,2.026538,3.68,26,2023,09/07/2023,20:20:00,KC,DET,(14:25) (Shotgun) J.Goff pass incomplete deep ...,1,14:25,3,3,DET,KC,DET,32,0,0,N,I,22,SHOTGUN,2x2,CORNER,False,TRADITIONAL,2.13,INSIDE_BOX,6,ZONE_COVERAGE,COVER_2_ZONE,,0,0,0.927021,-2.145443,0.590426,0.409574,0.04972,-0.04972
1,1,2023090700,101,46137,Defensive Coverage,SS,24.078863,19.375388,19.375388,-4.703475,2.6,1.809029,2.212308,5.34,26,2023,09/07/2023,20:20:00,KC,DET,(14:25) (Shotgun) J.Goff pass incomplete deep ...,1,14:25,3,3,DET,KC,DET,32,0,0,N,I,22,SHOTGUN,2x2,CORNER,False,TRADITIONAL,2.13,INSIDE_BOX,6,ZONE_COVERAGE,COVER_2_ZONE,,0,0,0.927021,-2.145443,0.590426,0.409574,0.04972,-0.04972
2,1,2023090700,101,52546,Defensive Coverage,CB,22.288868,19.820143,19.820143,-2.468725,2.6,0.94951,1.91,3.17,26,2023,09/07/2023,20:20:00,KC,DET,(14:25) (Shotgun) J.Goff pass incomplete deep ...,1,14:25,3,3,DET,KC,DET,32,0,0,N,I,22,SHOTGUN,2x2,CORNER,False,TRADITIONAL,2.13,INSIDE_BOX,6,ZONE_COVERAGE,COVER_2_ZONE,,0,0,0.927021,-2.145443,0.590426,0.409574,0.04972,-0.04972
3,1,2023090700,101,53487,Defensive Coverage,MLB,33.570587,26.305413,26.305413,-7.265174,2.6,2.794298,2.965,5.49,26,2023,09/07/2023,20:20:00,KC,DET,(14:25) (Shotgun) J.Goff pass incomplete deep ...,1,14:25,3,3,DET,KC,DET,32,0,0,N,I,22,SHOTGUN,2x2,CORNER,False,TRADITIONAL,2.13,INSIDE_BOX,6,ZONE_COVERAGE,COVER_2_ZONE,,0,0,0.927021,-2.145443,0.590426,0.409574,0.04972,-0.04972
4,1,2023090700,101,54486,Defensive Coverage,CB,44.243102,44.342922,44.243102,0.099821,2.6,-0.038393,2.556923,4.83,26,2023,09/07/2023,20:20:00,KC,DET,(14:25) (Shotgun) J.Goff pass incomplete deep ...,1,14:25,3,3,DET,KC,DET,32,0,0,N,I,22,SHOTGUN,2x2,CORNER,False,TRADITIONAL,2.13,INSIDE_BOX,6,ZONE_COVERAGE,COVER_2_ZONE,,0,0,0.927021,-2.145443,0.590426,0.409574,0.04972,-0.04972


## 2. Player-Route Performance Matrix

In [6]:
print("="*80)
print("PLAYER × ROUTE PERFORMANCE ANALYSIS")
print("="*80)

MIN_PLAYS_ROUTE = 3

player_route_perf = defenders.groupby(['nfl_id', 'route_of_targeted_receiver']).agg({
    'convergence_speed': ['mean', 'std', 'count'],
    'min_distance': 'mean',
    'player_position': lambda x: x.mode()[0] if len(x) > 0 else None
}).round(3)

player_route_perf.columns = ['Avg Conv', 'Std Dev', 'N Plays', 'Avg Min Dist', 'Position']
player_route_perf = player_route_perf[player_route_perf['N Plays'] >= MIN_PLAYS_ROUTE].reset_index()

print(f"\nPlayers with route-specific data: {player_route_perf['nfl_id'].nunique()}")
print(f"Total player-route combinations: {len(player_route_perf)}")

# Identify biggest route vulnerabilities
print("\n" + "="*80)
print("TOP PLAYER-ROUTE VULNERABILITIES TO EXPLOIT")
print("="*80)

player_vulnerabilities = []

for player_id in player_route_perf['nfl_id'].unique():
    player_data = player_route_perf[player_route_perf['nfl_id'] == player_id]
    
    if len(player_data) < 2:
        continue
    
    worst_route_idx = player_data['Avg Conv'].idxmin()
    worst_route = player_data.loc[worst_route_idx]
    
    best_route_idx = player_data['Avg Conv'].idxmax()
    best_route = player_data.loc[best_route_idx]
    
    vulnerability_score = best_route['Avg Conv'] - worst_route['Avg Conv']
    
    player_vulnerabilities.append({
        'nfl_id': player_id,
        'worst_route': worst_route['route_of_targeted_receiver'],
        'worst_route_conv': worst_route['Avg Conv'],
        'worst_route_plays': worst_route['N Plays'],
        'best_route': best_route['route_of_targeted_receiver'],
        'best_route_conv': best_route['Avg Conv'],
        'vulnerability_score': vulnerability_score
    })

vuln_df = pd.DataFrame(player_vulnerabilities)
vuln_df = vuln_df.sort_values('vulnerability_score', ascending=False)

# Add player names and teams (MODIFIED WITH NAN HANDLING)
if player_lookup is not None:
    vuln_df = vuln_df.merge(
        player_lookup[['nfl_id', 'player_name', 'team', 'position']], 
        on='nfl_id', 
        how='left'
    )
    
    # Handle any remaining NaN values (shouldn't happen after filtering, but just in case)
    vuln_df['player_name'] = vuln_df['player_name'].fillna('Unknown Player')
    vuln_df['team'] = vuln_df['team'].fillna('UNK')
    vuln_df['position'] = vuln_df['position'].fillna('UNK')
    
    # Reorder columns
    cols = ['nfl_id', 'player_name', 'team', 'position', 'worst_route', 'worst_route_conv', 
            'worst_route_plays', 'best_route', 'best_route_conv', 'vulnerability_score']
    vuln_df = vuln_df[cols]

print("\nTop 20 Players with Biggest Route Vulnerabilities:")
print("-" * 80)
if player_lookup is not None and 'player_name' in vuln_df.columns:
    display_cols = ['player_name', 'team', 'position', 'worst_route', 'worst_route_conv', 
                    'best_route', 'best_route_conv', 'vulnerability_score']
    print(vuln_df[display_cols].head(20).to_string(index=False))
else:
    print(vuln_df.head(20).to_string(index=False))

vuln_df.to_csv(OUTPUT_DIR / 'definsive_player_route_vulnerabilities.csv', index=False)

PLAYER × ROUTE PERFORMANCE ANALYSIS

Players with route-specific data: 587
Total player-route combinations: 4715

TOP PLAYER-ROUTE VULNERABILITIES TO EXPLOIT

Top 20 Players with Biggest Route Vulnerabilities:
--------------------------------------------------------------------------------
       player_name team position worst_route  worst_route_conv best_route  best_route_conv  vulnerability_score
   Miles Killebrew  PIT       FS       ANGLE            -3.388     CORNER            1.787                5.175
   Darrick Forrest  WAS       FS       ANGLE            -2.419     CORNER            2.470                4.889
         Tre Brown  SEA       CB      SCREEN            -2.326      WHEEL            2.511                4.837
    Andre Chachere  ARI       SS       ANGLE            -3.055     CORNER            1.731                4.786
 Darrell Luter Jr.   SF       CB        FLAT            -1.880         GO            2.810                4.690
      Kerby Joseph  DET       FS     

## 3. Player Performance by Coverage

In [7]:
print("\n" + "="*80)
print("PLAYER × COVERAGE PERFORMANCE ANALYSIS")
print("="*80)

MIN_PLAYS_COV = 5

player_coverage_perf = defenders.groupby(['nfl_id', 'team_coverage_type']).agg({
    'convergence_speed': ['mean', 'count'],
    'min_distance': 'mean',
    'player_position': lambda x: x.mode()[0] if len(x) > 0 else None
}).round(3)

player_coverage_perf.columns = ['Avg Conv', 'N Plays', 'Avg Min Dist', 'Position']
player_coverage_perf = player_coverage_perf[player_coverage_perf['N Plays'] >= MIN_PLAYS_COV].reset_index()

print(f"\nPlayers with coverage-specific data: {player_coverage_perf['nfl_id'].nunique()}")

coverage_vulnerabilities = []

for player_id in player_coverage_perf['nfl_id'].unique():
    player_data = player_coverage_perf[player_coverage_perf['nfl_id'] == player_id]
    
    if len(player_data) < 2:
        continue
    
    worst_cov_idx = player_data['Avg Conv'].idxmin()
    worst_cov = player_data.loc[worst_cov_idx]
    
    best_cov_idx = player_data['Avg Conv'].idxmax()
    best_cov = player_data.loc[best_cov_idx]
    
    cov_diff = best_cov['Avg Conv'] - worst_cov['Avg Conv']
    
    coverage_vulnerabilities.append({
        'nfl_id': player_id,
        'worst_coverage': worst_cov['team_coverage_type'],
        'worst_coverage_conv': worst_cov['Avg Conv'],
        'best_coverage': best_cov['team_coverage_type'],
        'best_coverage_conv': best_cov['Avg Conv'],
        'coverage_diff': cov_diff
    })

cov_vuln_df = pd.DataFrame(coverage_vulnerabilities)
cov_vuln_df = cov_vuln_df.sort_values('coverage_diff', ascending=False)

# Add player names and teams (MODIFIED WITH NAN HANDLING)
if player_lookup is not None:
    cov_vuln_df = cov_vuln_df.merge(
        player_lookup[['nfl_id', 'player_name', 'team', 'position']], 
        on='nfl_id', 
        how='left'
    )
    
    # Handle any remaining NaN values
    cov_vuln_df['player_name'] = cov_vuln_df['player_name'].fillna('Unknown Player')
    cov_vuln_df['team'] = cov_vuln_df['team'].fillna('UNK')
    cov_vuln_df['position'] = cov_vuln_df['position'].fillna('UNK')
    
    cols = ['nfl_id', 'player_name', 'team', 'position', 'worst_coverage', 'worst_coverage_conv', 
            'best_coverage', 'best_coverage_conv', 'coverage_diff']
    cov_vuln_df = cov_vuln_df[cols]

print("\nTop 20 Players with Coverage-Specific Weaknesses:")
print("-" * 80)
if player_lookup is not None and 'player_name' in cov_vuln_df.columns:
    display_cols = ['player_name', 'team', 'position', 'worst_coverage', 'worst_coverage_conv', 
                    'best_coverage', 'best_coverage_conv', 'coverage_diff']
    print(cov_vuln_df[display_cols].head(20).to_string(index=False))
else:
    print(cov_vuln_df.head(20).to_string(index=False))

cov_vuln_df.to_csv(OUTPUT_DIR / 'definsive_player_coverage_vulnerabilities.csv', index=False)


PLAYER × COVERAGE PERFORMANCE ANALYSIS

Players with coverage-specific data: 551

Top 20 Players with Coverage-Specific Weaknesses:
--------------------------------------------------------------------------------
           player_name team position worst_coverage  worst_coverage_conv best_coverage  best_coverage_conv  coverage_diff
      Sam Franklin Jr.  CAR       FS   COVER_2_ZONE               -2.958   COVER_1_MAN               0.094          3.052
         Darnay Holmes  NYG       CB    COVER_0_MAN               -0.855   COVER_1_MAN               2.053          2.908
              JT Woods  LAC       CB   COVER_2_ZONE               -2.645   COVER_2_MAN               0.009          2.654
         Michael Davis  LAC       CB   COVER_2_ZONE               -0.770   COVER_2_MAN               1.808          2.578
           Mike Hughes  ATL       CB   COVER_4_ZONE               -1.684   COVER_2_MAN               0.871          2.555
         Arthur Maulet  BAL       CB   COVER_2_ZONE   

## 4. Distance-Based Performance

In [8]:
print("\n" + "="*80)
print("PLAYER × STARTING DISTANCE ANALYSIS")
print("="*80)

defenders['distance_category'] = pd.cut(
    defenders['initial_distance'],
    bins=[0, 10, 15, 20, 100],
    labels=['Close (<10y)', 'Medium (10-15y)', 'Far (15-20y)', 'Very Far (>20y)']
)

MIN_PLAYS_DIST = 5

player_distance_perf = defenders.groupby(['nfl_id', 'distance_category']).agg({
    'convergence_speed': ['mean', 'count'],
    'min_distance': 'mean',
    'player_position': lambda x: x.mode()[0] if len(x) > 0 else None
}).round(3)

player_distance_perf.columns = ['Avg Conv', 'N Plays', 'Avg Min Dist', 'Position']
player_distance_perf = player_distance_perf[player_distance_perf['N Plays'] >= MIN_PLAYS_DIST].reset_index()

print(f"\nPlayers with distance-specific data: {player_distance_perf['nfl_id'].nunique()}")

distance_vulnerabilities = []

for player_id in player_distance_perf['nfl_id'].unique():
    player_data = player_distance_perf[player_distance_perf['nfl_id'] == player_id]
    
    if len(player_data) < 2:
        continue
    
    worst_dist_idx = player_data['Avg Conv'].idxmin()
    worst_dist = player_data.loc[worst_dist_idx]
    
    best_dist_idx = player_data['Avg Conv'].idxmax()
    best_dist = player_data.loc[best_dist_idx]
    
    dist_diff = best_dist['Avg Conv'] - worst_dist['Avg Conv']
    
    distance_vulnerabilities.append({
        'nfl_id': player_id,
        'worst_distance': worst_dist['distance_category'],
        'worst_distance_conv': worst_dist['Avg Conv'],
        'best_distance': best_dist['distance_category'],
        'best_distance_conv': best_dist['Avg Conv'],
        'distance_diff': dist_diff
    })

dist_vuln_df = pd.DataFrame(distance_vulnerabilities)
dist_vuln_df = dist_vuln_df.sort_values('distance_diff', ascending=False)

# Add player names and teams (MODIFIED WITH NAN HANDLING)
if player_lookup is not None:
    dist_vuln_df = dist_vuln_df.merge(
        player_lookup[['nfl_id', 'player_name', 'team', 'position']], 
        on='nfl_id', 
        how='left'
    )
    
    # Handle any remaining NaN values
    dist_vuln_df['player_name'] = dist_vuln_df['player_name'].fillna('Unknown Player')
    dist_vuln_df['team'] = dist_vuln_df['team'].fillna('UNK')
    dist_vuln_df['position'] = dist_vuln_df['position'].fillna('UNK')
    
    cols = ['nfl_id', 'player_name', 'team', 'position', 'worst_distance', 'worst_distance_conv', 
            'best_distance', 'best_distance_conv', 'distance_diff']
    dist_vuln_df = dist_vuln_df[cols]

print("\nTop 20 Players with Distance-Specific Weaknesses:")
print("-" * 80)
if player_lookup is not None and 'player_name' in dist_vuln_df.columns:
    display_cols = ['player_name', 'team', 'position', 'worst_distance', 'worst_distance_conv', 
                    'best_distance', 'best_distance_conv', 'distance_diff']
    print(dist_vuln_df[display_cols].head(20).to_string(index=False))
else:
    print(dist_vuln_df.head(20).to_string(index=False))

dist_vuln_df.to_csv(OUTPUT_DIR / 'definsive_player_distance_vulnerabilities.csv', index=False)


PLAYER × STARTING DISTANCE ANALYSIS

Players with distance-specific data: 544

Top 20 Players with Distance-Specific Weaknesses:
--------------------------------------------------------------------------------
        player_name team position  worst_distance  worst_distance_conv   best_distance  best_distance_conv  distance_diff
  Darrell Luter Jr.   SF       CB    Close (<10y)               -1.913 Very Far (>20y)               0.794          2.707
         Yaya Diaby   TB      OLB    Close (<10y)               -1.285 Very Far (>20y)               1.391          2.676
     Ka'dar Hollman  HOU       CB    Close (<10y)               -0.700    Far (15-20y)               1.842          2.542
     Brandin Echols  NYJ       CB Medium (10-15y)               -2.005 Very Far (>20y)               0.499          2.504
      Deane Leonard  LAC       CB    Close (<10y)               -1.232 Very Far (>20y)               1.250          2.482
          Alex Cook  CAR       SS    Close (<10y)        

## 5. Comprehensive Player Scouting Card

In [9]:
def generate_player_scouting_card(player_id, data, player_lookup=None):
    """Generate comprehensive player scouting report"""
    player_data = data[data['nfl_id'] == player_id]
    
    if len(player_data) == 0:
        return None
    
    card = {
        'nfl_id': player_id,
        'position': player_data['player_position'].mode()[0] if len(player_data) > 0 else 'UNK',
        'total_plays': len(player_data),
        'avg_convergence': player_data['convergence_speed'].mean(),
        'avg_min_distance': player_data['min_distance'].mean(),
        'avg_speed': player_data['avg_speed'].mean()
    }
    
    if player_lookup is not None:
        player_info = player_lookup[player_lookup['nfl_id'] == player_id]
        if len(player_info) > 0:
            card['player_name'] = player_info['player_name'].iloc[0]
            if 'team' in player_info.columns:
                card['team'] = player_info['team'].iloc[0]
    
    route_perf = player_data.groupby('route_of_targeted_receiver').agg({
        'convergence_speed': 'mean',
        'game_id': 'count'
    }).round(3)
    route_perf.columns = ['Avg Conv', 'N']
    route_perf = route_perf[route_perf['N'] >= 3].sort_values('Avg Conv')
    
    card['worst_routes'] = route_perf.head(3).to_dict()
    card['best_routes'] = route_perf.tail(3).to_dict()
    
    if 'team_coverage_type' in player_data.columns:
        cov_perf = player_data.groupby('team_coverage_type').agg({
            'convergence_speed': 'mean',
            'game_id': 'count'
        }).round(3)
        cov_perf.columns = ['Avg Conv', 'N']
        cov_perf = cov_perf[cov_perf['N'] >= 3].sort_values('Avg Conv')
        
        card['coverage_performance'] = cov_perf.to_dict()
    
    player_data_dist = player_data.copy()
    player_data_dist['dist_cat'] = pd.cut(
        player_data_dist['initial_distance'],
        bins=[0, 10, 15, 20, 100],
        labels=['<10y', '10-15y', '15-20y', '>20y']
    )
    
    dist_perf = player_data_dist.groupby('dist_cat').agg({
        'convergence_speed': 'mean',
        'game_id': 'count'
    }).round(3)
    dist_perf.columns = ['Avg Conv', 'N']
    dist_perf = dist_perf[dist_perf['N'] >= 3]
    
    card['distance_performance'] = dist_perf.to_dict()
    
    return card

print("\n" + "="*80)
print("GENERATING INDIVIDUAL PLAYER SCOUTING CARDS")
print("="*80)

top_route_vuln = vuln_df.head(10)['nfl_id'].tolist()
top_cov_vuln = cov_vuln_df.head(10)['nfl_id'].tolist()
top_dist_vuln = dist_vuln_df.head(10)['nfl_id'].tolist()

priority_players = list(set(top_route_vuln + top_cov_vuln + top_dist_vuln))

player_cards = {}

for player_id in priority_players:
    card = generate_player_scouting_card(player_id, defenders, player_lookup)
    if card:
        player_cards[player_id] = card
        name_str = f"{card.get('player_name', f'Player #{player_id}')}"
        team_str = f" ({card.get('team', 'UNK')})" if 'team' in card else ""
        pos_str = f" - {card['position']}"
        print(f"✓ Generated card for {name_str}{team_str}{pos_str}")

print(f"\n✅ Generated {len(player_cards)} player scouting cards")


GENERATING INDIVIDUAL PLAYER SCOUTING CARDS
✓ Generated card for Michael Davis (LAC) - CB
✓ Generated card for Miles Killebrew (PIT) - FS
✓ Generated card for Christian Elliss (PHI) - ILB
✓ Generated card for Yaya Diaby (TB) - OLB
✓ Generated card for JT Woods (LAC) - CB
✓ Generated card for Mike Hughes (ATL) - CB
✓ Generated card for Jaylon Jones (IND) - CB
✓ Generated card for Kerby Joseph (DET) - FS
✓ Generated card for Darnay Holmes (NYG) - CB
✓ Generated card for Alex Cook (CAR) - SS
✓ Generated card for M.J. Stewart (HOU) - FS
✓ Generated card for Clark Phillips III (ATL) - CB
✓ Generated card for Deane Leonard (LAC) - CB
✓ Generated card for Jonathan Jones (NE) - CB
✓ Generated card for Tre Brown (SEA) - CB
✓ Generated card for A.J. Terrell (ATL) - CB
✓ Generated card for Sam Franklin Jr. (CAR) - FS
✓ Generated card for Quindell Johnson (CHI) - SS
✓ Generated card for Deonte Banks (NYG) - CB
✓ Generated card for Arthur Maulet (BAL) - CB
✓ Generated card for Darrell Luter Jr. (S

## 6. Visualize Player Matchup Matrices

In [10]:
print("\n" + "="*80)
print("CREATING PLAYER-ROUTE HEATMAP")
print("="*80)

top_players = vuln_df.head(20)['nfl_id'].tolist()

player_route_matrix = player_route_perf[player_route_perf['nfl_id'].isin(top_players)]

# Add player names with NaN handling (MODIFIED)
if player_lookup is not None:
    player_route_matrix = player_route_matrix.merge(
        player_lookup[['nfl_id', 'player_name', 'team']], 
        on='nfl_id', 
        how='left'
    )
    
    # Handle NaN values in player_name and team
    player_route_matrix['player_name'] = player_route_matrix['player_name'].fillna('Unknown')
    player_route_matrix['team'] = player_route_matrix['team'].fillna('UNK')
    
    player_route_matrix['player_label'] = player_route_matrix.apply(
        lambda x: f"{x['player_name']} ({x['team']})", 
        axis=1
    )
    index_col = 'player_label'
else:
    player_route_matrix['player_label'] = player_route_matrix['nfl_id'].apply(lambda x: f"Player #{int(x)}")
    index_col = 'player_label'

matrix_pivot = player_route_matrix.pivot_table(
    values='Avg Conv',
    index=index_col,
    columns='route_of_targeted_receiver',
    aggfunc='mean'
)

if len(matrix_pivot) > 0:
    matrix_pivot.to_csv(OUTPUT_DIR / 'definisve_player_route_vulnerability_matrix.csv')
    
    print(f"\n✅ Player-Route Matrix Created ({len(matrix_pivot)} players × {len(matrix_pivot.columns)} routes)")
    print("\nTop 10 players in matrix:")
    print(matrix_pivot.head(10))
    
    fig, ax = plt.subplots(figsize=(14, 10))
    
    sns.heatmap(matrix_pivot, annot=True, fmt='.2f', cmap='RdYlGn', 
               center=0, ax=ax, cbar_kws={'label': 'Avg Convergence (yd/s)'})
    
    ax.set_title('Player-Route Vulnerability Matrix (Top 20 Vulnerable Players)\n' + 
                'Lower values (red) = Attack these routes against these players',
                fontsize=14, fontweight='bold')
    ax.set_xlabel('Route Type', fontsize=12)
    ax.set_ylabel('Player Name (Team)', fontsize=12)
    
    plt.tight_layout()
    plt.savefig(OUTPUT_DIR / 'definsive_player_route_heatmap.png', dpi=300, bbox_inches='tight')
    plt.close()
    
    print("✅ Heatmap saved!")
else:
    print("⚠️ No data for heatmap")


CREATING PLAYER-ROUTE HEATMAP

✅ Player-Route Matrix Created (20 players × 12 routes)

Top 10 players in matrix:
route_of_targeted_receiver  ANGLE  CORNER  CROSS   FLAT     GO  HITCH     IN  \
player_label                                                                   
A.J. Terrell (ATL)         -2.292   1.319 -0.104 -0.594  1.618 -0.092  0.352   
Alex Cook (CAR)            -2.665   1.531  0.747 -0.693  0.784 -0.646 -1.606   
Andre Chachere (ARI)       -3.055   1.731 -1.339 -1.226  0.037 -0.726 -1.438   
Antonio Hamilton (ARI)     -1.903   0.812 -0.335 -1.038  2.239 -0.350 -0.111   
Clark Phillips III (ATL)   -2.475   1.343  0.120 -0.841  1.669 -0.436  0.809   
Darrell Luter Jr. (SF)        NaN     NaN    NaN -1.880  2.810    NaN    NaN   
Darrick Forrest (WAS)      -2.419   2.470 -0.748 -0.305  0.904 -0.607 -0.432   
Jaylon Jones (IND)         -1.861   1.467 -0.918 -1.241  0.828 -0.220 -0.200   
Joejuan Williams (MIN)     -1.688     NaN -1.059 -1.568  2.560 -0.910 -0.398   
Jordan

## 7. Player Quick Reference Cards (Visual)

In [11]:
def create_visual_player_card(player_id, card, filename):
    """Create visual quick reference card for a player"""
    fig, axes = plt.subplots(2, 2, figsize=(12, 10))
    
    player_name = card.get('player_name', f'Player #{player_id}')
    team = card.get('team', 'UNK')
    position = card['position']
    
    title = f'{player_name} ({team}) - {position}\nSCOUTING CARD'
    fig.suptitle(title, fontsize=14, fontweight='bold')
    
    ax = axes[0, 0]
    ax.axis('off')
    
    stats_text = f"""
OVERALL STATS
━━━━━━━━━━━━━━━━━━
Total Plays: {card['total_plays']}
Avg Convergence: {card['avg_convergence']:.3f} yd/s
Avg Min Distance: {card['avg_min_distance']:.2f} yards
Avg Speed: {card['avg_speed']:.2f} yd/s
    """
    
    ax.text(0.1, 0.5, stats_text, fontsize=10, family='monospace',
           bbox=dict(boxstyle='round', facecolor='wheat', alpha=0.5))
    
    ax = axes[0, 1]
    ax.axis('off')
    
    routes_text = "🎯 ROUTES TO ATTACK\n" + "━"*25 + "\n"
    if 'worst_routes' in card and 'Avg Conv' in card['worst_routes']:
        for route, conv in list(card['worst_routes']['Avg Conv'].items())[:3]:
            n = card['worst_routes']['N'].get(route, 0)
            routes_text += f"• {route}: {conv:.3f} yd/s ({int(n)})\n"
    
    ax.text(0.1, 0.5, routes_text, fontsize=10, family='monospace',
           bbox=dict(boxstyle='round', facecolor='lightgreen', alpha=0.5))
    
    ax = axes[1, 0]
    ax.axis('off')
    
    cov_text = "📊 COVERAGE NOTES\n" + "━"*25 + "\n"
    if 'coverage_performance' in card and 'Avg Conv' in card['coverage_performance']:
        for cov, conv in list(card['coverage_performance']['Avg Conv'].items())[:3]:
            cov_text += f"• {cov}: {conv:.3f} yd/s\n"
    
    ax.text(0.1, 0.5, cov_text, fontsize=10, family='monospace',
           bbox=dict(boxstyle='round', facecolor='lightblue', alpha=0.5))
    
    ax = axes[1, 1]
    ax.axis('off')
    
    dist_text = "📏 DISTANCE PROFILE\n" + "━"*25 + "\n"
    if 'distance_performance' in card and 'Avg Conv' in card['distance_performance']:
        for dist, conv in card['distance_performance']['Avg Conv'].items():
            dist_text += f"• {dist}: {conv:.3f} yd/s\n"
    
    ax.text(0.1, 0.5, dist_text, fontsize=10, family='monospace',
           bbox=dict(boxstyle='round', facecolor='lightyellow', alpha=0.5))
    
    plt.tight_layout()
    plt.savefig(filename, dpi=300, bbox_inches='tight')
    plt.close()

print("\n" + "="*80)
print("GENERATING VISUAL PLAYER CARDS")
print("="*80)

cards_dir = OUTPUT_DIR / 'definsive_cards'
cards_dir.mkdir(exist_ok=True)

for player_id, card in player_cards.items():
    if 'player_name' in card:
        clean_name = card['player_name'].replace(' ', '_').replace('.', '')
        team = card.get('team', 'UNK')
        card_file = cards_dir / f'{clean_name}_{team}_{int(player_id)}.png'
    else:
        card_file = cards_dir / f'player_{int(player_id)}_card.png'
    
    create_visual_player_card(player_id, card, card_file)

print(f"✓ Generated {len(player_cards)} visual cards")
print(f"✅ Cards saved to: {cards_dir}")


GENERATING VISUAL PLAYER CARDS
✓ Generated 29 visual cards
✅ Cards saved to: ..\data\processed\definisive_player_matchups\definsive_cards


## 8. Summary Report

In [12]:
print("\n" + "="*80)
print("PLAYER MATCHUP ANALYSIS - SUMMARY")
print("="*80)

print(f"""
ANALYSIS COMPLETE:

PLAYERS ANALYZED:
- Total defenders: {defenders['nfl_id'].nunique():,}
- Players with route data: {player_route_perf['nfl_id'].nunique():,}
- Players with coverage data: {player_coverage_perf['nfl_id'].nunique():,}
- Players with distance data: {player_distance_perf['nfl_id'].nunique():,}
- Priority scouting cards generated: {len(player_cards)}

TOP 3 BIGGEST ROUTE VULNERABILITIES:
""")

for idx, (i, row) in enumerate(vuln_df.head(3).iterrows(), 1):
    if 'player_name' in vuln_df.columns:
        player_str = f"{row['player_name']} ({row.get('team', 'UNK')}) - {row['position']}"
    else:
        player_str = f"Player #{int(row['nfl_id'])}"
    
    print(f"  {idx}. {player_str}")
    print(f"     Worst on: {row['worst_route']} ({row['worst_route_conv']:.3f} yd/s)")
    print(f"     Best on: {row['best_route']} ({row['best_route_conv']:.3f} yd/s)")
    print(f"     Vulnerability gap: {row['vulnerability_score']:.3f} yd/s\n")

print(f"""
TOP 3 COVERAGE-SPECIFIC WEAKNESSES:
""")

for idx, (i, row) in enumerate(cov_vuln_df.head(3).iterrows(), 1):
    if 'player_name' in cov_vuln_df.columns:
        player_str = f"{row['player_name']} ({row.get('team', 'UNK')}) - {row['position']}"
    else:
        player_str = f"Player #{int(row['nfl_id'])}"
    
    print(f"  {idx}. {player_str}")
    print(f"     Worst in: {row['worst_coverage']} ({row['worst_coverage_conv']:.3f} yd/s)")
    print(f"     Best in: {row['best_coverage']} ({row['best_coverage_conv']:.3f} yd/s)")
    print(f"     Coverage gap: {row['coverage_diff']:.3f} yd/s\n")

print(f"""
FILES GENERATED:
✓ player_route_vulnerabilities.csv - Route-specific weaknesses WITH NAMES
✓ player_coverage_vulnerabilities.csv - Coverage-specific weaknesses WITH NAMES
✓ player_distance_vulnerabilities.csv - Distance-specific weaknesses WITH NAMES
✓ player_route_heatmap.png - Visual matrix WITH NAMES
✓ player_route_vulnerability_matrix.csv - Matrix WITH NAMES
✓ player_cards/ - Individual player scouting cards WITH NAMES

USAGE:
1. Identify opponent's defenders from team roster
2. Check their vulnerability files for specific weaknesses
3. Use visual cards for quick reference during game planning
4. Design plays to exploit identified route/coverage mismatches
""")

print("="*80)
print("✅ PLAYER MATCHUP ANALYSIS COMPLETE!")
print("="*80)


PLAYER MATCHUP ANALYSIS - SUMMARY

ANALYSIS COMPLETE:

PLAYERS ANALYZED:
- Total defenders: 797
- Players with route data: 587
- Players with coverage data: 551
- Players with distance data: 544
- Priority scouting cards generated: 29

TOP 3 BIGGEST ROUTE VULNERABILITIES:

  1. Miles Killebrew (PIT) - FS
     Worst on: ANGLE (-3.388 yd/s)
     Best on: CORNER (1.787 yd/s)
     Vulnerability gap: 5.175 yd/s

  2. Darrick Forrest (WAS) - FS
     Worst on: ANGLE (-2.419 yd/s)
     Best on: CORNER (2.470 yd/s)
     Vulnerability gap: 4.889 yd/s

  3. Tre Brown (SEA) - CB
     Worst on: SCREEN (-2.326 yd/s)
     Best on: WHEEL (2.511 yd/s)
     Vulnerability gap: 4.837 yd/s


TOP 3 COVERAGE-SPECIFIC WEAKNESSES:

  1. Sam Franklin Jr. (CAR) - FS
     Worst in: COVER_2_ZONE (-2.958 yd/s)
     Best in: COVER_1_MAN (0.094 yd/s)
     Coverage gap: 3.052 yd/s

  2. Darnay Holmes (NYG) - CB
     Worst in: COVER_0_MAN (-0.855 yd/s)
     Best in: COVER_1_MAN (2.053 yd/s)
     Coverage gap: 2.908 yd