# NFL Big Data Bowl 2026 - Offensive Player Matchup Matrix

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path
import warnings
warnings.filterwarnings('ignore')

pd.set_option('display.max_columns', None)
sns.set_style("whitegrid")
plt.rcParams['figure.figsize'] = (16, 8)
plt.rcParams['font.size'] = 11

print("✅ Setup complete!")

✅ Setup complete!


## 1. Load Data

In [2]:
DATA_DIR = Path('../data')
PROCESSED_DIR = DATA_DIR / 'processed'
COMPETITION_DIR = DATA_DIR / '114239_nfl_competition_files_published_analytics_final'
TRAIN_DIR = COMPETITION_DIR / 'train'
OUTPUT_DIR = PROCESSED_DIR / 'offensive_matchups'
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)

print("Loading data...")

convergence_df = pd.read_csv(PROCESSED_DIR / 'convergence_speed_all_plays.csv')
print(f"✓ Convergence data: {len(convergence_df):,} rows")

supp_data = pd.read_csv(COMPETITION_DIR / 'supplementary_data.csv')
print(f"✓ Supplementary data: {len(supp_data):,} rows")

merged_df = convergence_df.merge(
    supp_data,
    on=['game_id', 'play_id', 'week'],
    how='left'
)

print(f"✓ Merged data: {len(merged_df):,} rows")

# Filter for OFFENSIVE players (targeted receivers)
offense = merged_df[merged_df['player_role'] == 'Targeted Receiver'].copy()
print(f"✓ Offensive player (receiver) instances: {len(offense):,}")

Loading data...
✓ Convergence data: 173,150 rows
✓ Supplementary data: 18,009 rows
✓ Merged data: 173,150 rows
✓ Offensive player (receiver) instances: 14,108


In [3]:
offense.head()

Unnamed: 0,week,game_id,play_id,nfl_id,player_role,player_position,initial_distance,final_distance,min_distance,distance_change,time_elapsed,convergence_speed,avg_speed,max_speed,num_frames,season,game_date,game_time_eastern,home_team_abbr,visitor_team_abbr,play_description,quarter,game_clock,down,yards_to_go,possession_team,defensive_team,yardline_side,yardline_number,pre_snap_home_score,pre_snap_visitor_score,play_nullified_by_penalty,pass_result,pass_length,offense_formation,receiver_alignment,route_of_targeted_receiver,play_action,dropback_type,dropback_distance,pass_location_type,defenders_in_the_box,team_coverage_man_zone,team_coverage_type,penalty_yards,pre_penalty_yards_gained,yards_gained,expected_points,expected_points_added,pre_snap_home_team_win_probability,pre_snap_visitor_team_win_probability,home_team_win_probability_added,visitor_team_win_probility_added
8,1,2023090700,101,44930,Targeted Receiver,WR,25.449655,17.986063,17.986063,-7.463592,2.6,2.870612,4.611154,7.9,26,2023,09/07/2023,20:20:00,KC,DET,(14:25) (Shotgun) J.Goff pass incomplete deep ...,1,14:25,3,3,DET,KC,DET,32,0,0,N,I,22,SHOTGUN,2x2,CORNER,False,TRADITIONAL,2.13,INSIDE_BOX,6,ZONE_COVERAGE,COVER_2_ZONE,,0,0,0.927021,-2.145443,0.590426,0.409574,0.04972,-0.04972
21,1,2023090700,194,41325,Targeted Receiver,RB,10.373141,4.068413,4.068413,-6.304729,3.2,1.970228,2.72375,6.13,32,2023,09/07/2023,20:20:00,KC,DET,(12:56) (Shotgun) P.Mahomes pass short left to...,1,12:56,3,2,KC,DET,KC,21,0,0,N,C,6,SHOTGUN,2x2,ANGLE,False,TRADITIONAL,3.86,INSIDE_BOX,6,MAN_COVERAGE,COVER_1_MAN,,10,10,0.121505,1.702563,0.582476,0.417524,0.047277,-0.047277
34,1,2023090700,219,53591,Targeted Receiver,TE,4.794165,1.276636,1.276636,-3.517528,1.7,2.069134,2.78,4.69,17,2023,09/07/2023,20:20:00,KC,DET,(12:20) (Shotgun) P.Mahomes pass short left to...,1,12:20,1,10,KC,DET,KC,31,0,0,N,C,4,SHOTGUN,2x2,HITCH,False,TRADITIONAL,2.37,INSIDE_BOX,6,ZONE_COVERAGE,COVER_4_ZONE,,5,5,1.824068,0.089352,0.629753,0.370247,0.00042,-0.00042
45,1,2023090700,361,38696,Targeted Receiver,WR,12.527606,8.134531,4.191515,-4.393075,5.1,0.861387,3.50098,6.78,51,2023,09/07/2023,20:20:00,KC,DET,(10:06) (Shotgun) J.Goff pass short left to M....,1,10:06,3,7,DET,KC,DET,12,0,0,N,C,5,SHOTGUN,3x1,HITCH,False,TRADITIONAL,3.03,OUTSIDE_RIGHT,6,ZONE_COVERAGE,COVER_4_ZONE,,5,5,-1.04916,-0.862062,0.60631,0.39369,0.019525,-0.019525
56,1,2023090700,436,53541,Targeted Receiver,WR,5.540297,3.220062,3.220062,-2.320235,2.0,1.160118,2.512,4.43,20,2023,09/07/2023,20:20:00,KC,DET,(8:09) (Shotgun) J.Goff pass short left to A.S...,1,08:09,2,9,DET,KC,DET,21,0,0,N,C,6,SHOTGUN,2x2,SLANT,False,TRADITIONAL,2.55,INSIDE_BOX,6,MAN_COVERAGE,COVER_1_MAN,,13,13,0.312204,1.613927,0.575537,0.424463,-0.030918,0.030918


In [4]:
# LOAD PLAYER NAMES FROM ALL INPUT FILES
print("\n" + "="*80)
print("LOADING PLAYER NAMES FROM ALL INPUT FILES")
print("="*80)

input_files = sorted(TRAIN_DIR.glob('input_2023_w*.csv'))
print(f"Found {len(input_files)} input files")

all_players = []

if len(input_files) > 0:
    for input_file in input_files:
        print(f"Loading: {input_file.name}...", end=" ")
        try:
            input_df = pd.read_csv(input_file)
            
            if 'nfl_id' in input_df.columns and 'player_name' in input_df.columns:
                file_players = input_df[['nfl_id', 'player_name', 'player_position']].drop_duplicates('nfl_id')
                all_players.append(file_players)
                print(f"✓ {len(file_players)} unique players")
            else:
                print(f"⚠️ Missing required columns")
        except Exception as e:
            print(f"❌ Error: {e}")
    
    if len(all_players) > 0:
        player_lookup = pd.concat(all_players, ignore_index=True).drop_duplicates('nfl_id', keep='first')
        player_lookup.columns = ['nfl_id', 'player_name', 'position']
        
        print(f"\n✅ TOTAL UNIQUE PLAYERS LOADED: {len(player_lookup)}")
        
        # Add team info for OFFENSE
        if 'possession_team' in offense.columns:
            team_lookup = offense.groupby('nfl_id')['possession_team'].agg(
                lambda x: x.mode()[0] if len(x.mode()) > 0 else 'UNK'
            ).reset_index()
            team_lookup.columns = ['nfl_id', 'team']
            player_lookup = player_lookup.merge(team_lookup, on='nfl_id', how='left')
            player_lookup['team'] = player_lookup['team'].fillna('UNK')
            print(f"✓ Added team info")
        
        # Filter offense to only include players we have names for
        print(f"\nBefore filtering: {offense['nfl_id'].nunique()} unique offensive players")
        offense = offense[offense['nfl_id'].isin(player_lookup['nfl_id'])].copy()
        print(f"After filtering: {offense['nfl_id'].nunique()} unique offensive players")
        print(f"✓ Filtered to only players with metadata\n")
        
        print(f"📋 Sample of loaded player names:")
        print(player_lookup.head(15).to_string(index=False))
    else:
        print("❌ No player data could be loaded!")
        player_lookup = None
else:
    print("❌ No input files found!")
    player_lookup = None


LOADING PLAYER NAMES FROM ALL INPUT FILES
Found 19 input files
Loading: input_2023_w01 - sample.csv... ✓ 165 unique players
Loading: input_2023_w01.csv... ✓ 737 unique players
Loading: input_2023_w02.csv... ✓ 746 unique players
Loading: input_2023_w03.csv... ✓ 754 unique players
Loading: input_2023_w04.csv... ✓ 750 unique players
Loading: input_2023_w05.csv... ✓ 656 unique players
Loading: input_2023_w06.csv... ✓ 706 unique players
Loading: input_2023_w07.csv... ✓ 611 unique players
Loading: input_2023_w08.csv... ✓ 740 unique players
Loading: input_2023_w09.csv... ✓ 671 unique players
Loading: input_2023_w10.csv... ✓ 671 unique players
Loading: input_2023_w11.csv... ✓ 650 unique players
Loading: input_2023_w12.csv... ✓ 745 unique players
Loading: input_2023_w13.csv... ✓ 614 unique players
Loading: input_2023_w14.csv... ✓ 704 unique players
Loading: input_2023_w15.csv... ✓ 754 unique players
Loading: input_2023_w16.csv... ✓ 757 unique players
Loading: input_2023_w17.csv... ✓ 741 unique

## 2. Receiver Performance by Coverage Type

In [5]:
print("="*80)
print("RECEIVER × COVERAGE TYPE PERFORMANCE ANALYSIS")
print("="*80)
print("Higher convergence = Better separation (GOOD for offense)")
print("="*80)

MIN_PLAYS_COV = 5

receiver_coverage_perf = offense.groupby(['nfl_id', 'team_coverage_type']).agg({
    'convergence_speed': ['mean', 'std', 'count'],
    'min_distance': 'mean',
    'player_position': lambda x: x.mode()[0] if len(x) > 0 else None
}).round(3)

receiver_coverage_perf.columns = ['Avg Conv', 'Std Dev', 'N Plays', 'Avg Min Dist', 'Position']
receiver_coverage_perf = receiver_coverage_perf[receiver_coverage_perf['N Plays'] >= MIN_PLAYS_COV].reset_index()

print(f"\nReceivers with coverage-specific data: {receiver_coverage_perf['nfl_id'].nunique()}")
print(f"Total receiver-coverage combinations: {len(receiver_coverage_perf)}")

# Identify coverage strengths/weaknesses
print("\n" + "="*80)
print("TOP RECEIVERS WHO DOMINATE/STRUGGLE VS SPECIFIC COVERAGES")
print("="*80)

coverage_matchups = []

for player_id in receiver_coverage_perf['nfl_id'].unique():
    player_data = receiver_coverage_perf[receiver_coverage_perf['nfl_id'] == player_id]
    
    if len(player_data) < 2:
        continue
    
    # For offense, HIGHEST convergence = best (getting separation)
    best_cov_idx = player_data['Avg Conv'].idxmax()
    best_cov = player_data.loc[best_cov_idx]
    
    worst_cov_idx = player_data['Avg Conv'].idxmin()
    worst_cov = player_data.loc[worst_cov_idx]
    
    coverage_gap = best_cov['Avg Conv'] - worst_cov['Avg Conv']
    
    coverage_matchups.append({
        'nfl_id': player_id,
        'best_coverage': best_cov['team_coverage_type'],
        'best_coverage_conv': best_cov['Avg Conv'],
        'best_coverage_plays': best_cov['N Plays'],
        'worst_coverage': worst_cov['team_coverage_type'],
        'worst_coverage_conv': worst_cov['Avg Conv'],
        'worst_coverage_plays': worst_cov['N Plays'],
        'coverage_gap': coverage_gap
    })

cov_matchup_df = pd.DataFrame(coverage_matchups)
cov_matchup_df = cov_matchup_df.sort_values('coverage_gap', ascending=False)

# Add player names
if player_lookup is not None:
    cov_matchup_df = cov_matchup_df.merge(
        player_lookup[['nfl_id', 'player_name', 'team', 'position']], 
        on='nfl_id', 
        how='left'
    )
    cov_matchup_df['player_name'] = cov_matchup_df['player_name'].fillna('Unknown Player')
    cov_matchup_df['team'] = cov_matchup_df['team'].fillna('UNK')
    cov_matchup_df['position'] = cov_matchup_df['position'].fillna('UNK')
    
    cols = ['nfl_id', 'player_name', 'team', 'position', 'best_coverage', 'best_coverage_conv',
            'best_coverage_plays', 'worst_coverage', 'worst_coverage_conv', 'worst_coverage_plays', 'coverage_gap']
    cov_matchup_df = cov_matchup_df[cols]

print("\nTop 20 Receivers with Biggest Coverage Performance Gaps:")
print("-" * 80)
if player_lookup is not None and 'player_name' in cov_matchup_df.columns:
    display_cols = ['player_name', 'team', 'position', 'best_coverage', 'best_coverage_conv',
                    'worst_coverage', 'worst_coverage_conv', 'coverage_gap']
    print(cov_matchup_df[display_cols].head(20).to_string(index=False))
else:
    print(cov_matchup_df.head(20).to_string(index=False))

cov_matchup_df.to_csv(OUTPUT_DIR / 'receiver_coverage_matchups.csv', index=False)

RECEIVER × COVERAGE TYPE PERFORMANCE ANALYSIS
Higher convergence = Better separation (GOOD for offense)

Receivers with coverage-specific data: 289
Total receiver-coverage combinations: 945

TOP RECEIVERS WHO DOMINATE/STRUGGLE VS SPECIFIC COVERAGES

Top 20 Receivers with Biggest Coverage Performance Gaps:
--------------------------------------------------------------------------------
       player_name team position best_coverage  best_coverage_conv worst_coverage  worst_coverage_conv  coverage_gap
         Zay Jones  JAX       WR  COVER_2_ZONE               4.739    COVER_0_MAN                1.861         2.878
   Deven Thompkins   TB       WR  COVER_3_ZONE               4.278    COVER_1_MAN                1.623         2.655
     Kalif Raymond  DET       WR  COVER_6_ZONE               4.430   COVER_4_ZONE                2.003         2.427
        DK Metcalf  SEA       WR  COVER_2_ZONE               4.463   COVER_4_ZONE                2.282         2.181
     Byron Pringle  WAS    

## 3. Receiver Performance by Route Type

In [6]:
print("\n" + "="*80)
print("RECEIVER × ROUTE TYPE PERFORMANCE ANALYSIS")
print("="*80)

MIN_PLAYS_ROUTE = 3

receiver_route_perf = offense.groupby(['nfl_id', 'route_of_targeted_receiver']).agg({
    'convergence_speed': ['mean', 'std', 'count'],
    'min_distance': 'mean',
    'player_position': lambda x: x.mode()[0] if len(x) > 0 else None
}).round(3)

receiver_route_perf.columns = ['Avg Conv', 'Std Dev', 'N Plays', 'Avg Min Dist', 'Position']
receiver_route_perf = receiver_route_perf[receiver_route_perf['N Plays'] >= MIN_PLAYS_ROUTE].reset_index()

print(f"\nReceivers with route-specific data: {receiver_route_perf['nfl_id'].nunique()}")

route_strengths = []

for player_id in receiver_route_perf['nfl_id'].unique():
    player_data = receiver_route_perf[receiver_route_perf['nfl_id'] == player_id]
    
    if len(player_data) < 2:
        continue
    
    best_route_idx = player_data['Avg Conv'].idxmax()
    best_route = player_data.loc[best_route_idx]
    
    worst_route_idx = player_data['Avg Conv'].idxmin()
    worst_route = player_data.loc[worst_route_idx]
    
    route_gap = best_route['Avg Conv'] - worst_route['Avg Conv']
    
    route_strengths.append({
        'nfl_id': player_id,
        'best_route': best_route['route_of_targeted_receiver'],
        'best_route_conv': best_route['Avg Conv'],
        'best_route_plays': best_route['N Plays'],
        'worst_route': worst_route['route_of_targeted_receiver'],
        'worst_route_conv': worst_route['Avg Conv'],
        'worst_route_plays': worst_route['N Plays'],
        'route_gap': route_gap
    })

route_strength_df = pd.DataFrame(route_strengths)
route_strength_df = route_strength_df.sort_values('route_gap', ascending=False)

if player_lookup is not None:
    route_strength_df = route_strength_df.merge(
        player_lookup[['nfl_id', 'player_name', 'team', 'position']], 
        on='nfl_id', 
        how='left'
    )
    route_strength_df['player_name'] = route_strength_df['player_name'].fillna('Unknown Player')
    route_strength_df['team'] = route_strength_df['team'].fillna('UNK')
    route_strength_df['position'] = route_strength_df['position'].fillna('UNK')
    
    cols = ['nfl_id', 'player_name', 'team', 'position', 'best_route', 'best_route_conv',
            'best_route_plays', 'worst_route', 'worst_route_conv', 'worst_route_plays', 'route_gap']
    route_strength_df = route_strength_df[cols]

print("\nTop 20 Receivers with Route-Specific Strengths:")
print("-" * 80)
if player_lookup is not None and 'player_name' in route_strength_df.columns:
    display_cols = ['player_name', 'team', 'position', 'best_route', 'best_route_conv',
                    'worst_route', 'worst_route_conv', 'route_gap']
    print(route_strength_df[display_cols].head(20).to_string(index=False))
else:
    print(route_strength_df.head(20).to_string(index=False))

route_strength_df.to_csv(OUTPUT_DIR / 'receiver_route_strengths.csv', index=False)


RECEIVER × ROUTE TYPE PERFORMANCE ANALYSIS

Receivers with route-specific data: 348

Top 20 Receivers with Route-Specific Strengths:
--------------------------------------------------------------------------------
       player_name team position best_route  best_route_conv worst_route  worst_route_conv  route_gap
    Terry McLaurin  WAS       WR       POST            5.951      SCREEN             0.061      5.890
    Rashid Shaheed   NO       WR       POST            5.514      SCREEN             0.382      5.132
       Jerry Jeudy  DEN       WR       POST            5.616      SCREEN             0.864      4.752
       Jayden Reed   GB       WR      CROSS            5.541       SLANT             0.898      4.643
    DeVante Parker   NE       WR      CROSS            4.450      SCREEN             0.089      4.361
       Zay Flowers  BAL       WR         GO            4.947      SCREEN             0.693      4.254
Jaxon Smith-Njigba  SEA       WR         GO            4.924      SCREE

In [7]:
offense.head()

Unnamed: 0,week,game_id,play_id,nfl_id,player_role,player_position,initial_distance,final_distance,min_distance,distance_change,time_elapsed,convergence_speed,avg_speed,max_speed,num_frames,season,game_date,game_time_eastern,home_team_abbr,visitor_team_abbr,play_description,quarter,game_clock,down,yards_to_go,possession_team,defensive_team,yardline_side,yardline_number,pre_snap_home_score,pre_snap_visitor_score,play_nullified_by_penalty,pass_result,pass_length,offense_formation,receiver_alignment,route_of_targeted_receiver,play_action,dropback_type,dropback_distance,pass_location_type,defenders_in_the_box,team_coverage_man_zone,team_coverage_type,penalty_yards,pre_penalty_yards_gained,yards_gained,expected_points,expected_points_added,pre_snap_home_team_win_probability,pre_snap_visitor_team_win_probability,home_team_win_probability_added,visitor_team_win_probility_added
8,1,2023090700,101,44930,Targeted Receiver,WR,25.449655,17.986063,17.986063,-7.463592,2.6,2.870612,4.611154,7.9,26,2023,09/07/2023,20:20:00,KC,DET,(14:25) (Shotgun) J.Goff pass incomplete deep ...,1,14:25,3,3,DET,KC,DET,32,0,0,N,I,22,SHOTGUN,2x2,CORNER,False,TRADITIONAL,2.13,INSIDE_BOX,6,ZONE_COVERAGE,COVER_2_ZONE,,0,0,0.927021,-2.145443,0.590426,0.409574,0.04972,-0.04972
21,1,2023090700,194,41325,Targeted Receiver,RB,10.373141,4.068413,4.068413,-6.304729,3.2,1.970228,2.72375,6.13,32,2023,09/07/2023,20:20:00,KC,DET,(12:56) (Shotgun) P.Mahomes pass short left to...,1,12:56,3,2,KC,DET,KC,21,0,0,N,C,6,SHOTGUN,2x2,ANGLE,False,TRADITIONAL,3.86,INSIDE_BOX,6,MAN_COVERAGE,COVER_1_MAN,,10,10,0.121505,1.702563,0.582476,0.417524,0.047277,-0.047277
34,1,2023090700,219,53591,Targeted Receiver,TE,4.794165,1.276636,1.276636,-3.517528,1.7,2.069134,2.78,4.69,17,2023,09/07/2023,20:20:00,KC,DET,(12:20) (Shotgun) P.Mahomes pass short left to...,1,12:20,1,10,KC,DET,KC,31,0,0,N,C,4,SHOTGUN,2x2,HITCH,False,TRADITIONAL,2.37,INSIDE_BOX,6,ZONE_COVERAGE,COVER_4_ZONE,,5,5,1.824068,0.089352,0.629753,0.370247,0.00042,-0.00042
45,1,2023090700,361,38696,Targeted Receiver,WR,12.527606,8.134531,4.191515,-4.393075,5.1,0.861387,3.50098,6.78,51,2023,09/07/2023,20:20:00,KC,DET,(10:06) (Shotgun) J.Goff pass short left to M....,1,10:06,3,7,DET,KC,DET,12,0,0,N,C,5,SHOTGUN,3x1,HITCH,False,TRADITIONAL,3.03,OUTSIDE_RIGHT,6,ZONE_COVERAGE,COVER_4_ZONE,,5,5,-1.04916,-0.862062,0.60631,0.39369,0.019525,-0.019525
56,1,2023090700,436,53541,Targeted Receiver,WR,5.540297,3.220062,3.220062,-2.320235,2.0,1.160118,2.512,4.43,20,2023,09/07/2023,20:20:00,KC,DET,(8:09) (Shotgun) J.Goff pass short left to A.S...,1,08:09,2,9,DET,KC,DET,21,0,0,N,C,6,SHOTGUN,2x2,SLANT,False,TRADITIONAL,2.55,INSIDE_BOX,6,MAN_COVERAGE,COVER_1_MAN,,13,13,0.312204,1.613927,0.575537,0.424463,-0.030918,0.030918


## 4. Receiver vs Defender Position Performance

In [8]:
print("\n" + "="*80)
print("RECEIVER × DEFENDER POSITION MATCHUP ANALYSIS")
print("="*80)

MIN_PLAYS_DEF = 5

# Get defender position data from the full merged_df
# For each play, get the defender(s) covering the targeted receiver
defenders_data = merged_df[merged_df['player_role'] == 'Defensive Coverage'].copy()

# Create a mapping of play -> defender position
# Group by play and get the primary defender position (most common)
play_defender_map = defenders_data.groupby(['game_id', 'play_id', 'week']).agg({
    'player_position': lambda x: x.mode()[0] if len(x.mode()) > 0 else 'UNK'
}).reset_index()
play_defender_map.columns = ['game_id', 'play_id', 'week', 'defender_position']

# Merge defender position back to offense data
offense_with_def = offense.merge(
    play_defender_map,
    on=['game_id', 'play_id', 'week'],
    how='left'
)

print(f"Plays with defender position data: {offense_with_def['defender_position'].notna().sum():,}")

# Now analyze receiver performance vs defender positions
receiver_vs_def = offense_with_def.groupby(['nfl_id', 'defender_position']).agg({
    'convergence_speed': ['mean', 'count'],
    'min_distance': 'mean',
    'player_position': lambda x: x.mode()[0] if len(x) > 0 else None
}).round(3)

receiver_vs_def.columns = ['Avg Conv', 'N Plays', 'Avg Min Dist', 'Position']
receiver_vs_def = receiver_vs_def[receiver_vs_def['N Plays'] >= MIN_PLAYS_DEF].reset_index()

# Remove any rows with NaN defender_position
receiver_vs_def = receiver_vs_def[receiver_vs_def['defender_position'].notna()]

print(f"\nReceivers with defender-specific data: {receiver_vs_def['nfl_id'].nunique()}")
print(f"Defender positions analyzed: {receiver_vs_def['defender_position'].unique()}")

def_matchups = []

for player_id in receiver_vs_def['nfl_id'].unique():
    player_data = receiver_vs_def[receiver_vs_def['nfl_id'] == player_id]
    
    if len(player_data) < 2:
        continue
    
    best_def_idx = player_data['Avg Conv'].idxmax()
    best_def = player_data.loc[best_def_idx]
    
    worst_def_idx = player_data['Avg Conv'].idxmin()
    worst_def = player_data.loc[worst_def_idx]
    
    def_gap = best_def['Avg Conv'] - worst_def['Avg Conv']
    
    def_matchups.append({
        'nfl_id': player_id,
        'beats_position': best_def['defender_position'],
        'beats_conv': best_def['Avg Conv'],
        'beats_plays': best_def['N Plays'],
        'struggles_vs': worst_def['defender_position'],
        'struggles_conv': worst_def['Avg Conv'],
        'struggles_plays': worst_def['N Plays'],
        'position_gap': def_gap
    })

def_matchup_df = pd.DataFrame(def_matchups)
def_matchup_df = def_matchup_df.sort_values('position_gap', ascending=False)

if player_lookup is not None:
    def_matchup_df = def_matchup_df.merge(
        player_lookup[['nfl_id', 'player_name', 'team', 'position']], 
        on='nfl_id', 
        how='left'
    )
    def_matchup_df['player_name'] = def_matchup_df['player_name'].fillna('Unknown Player')
    def_matchup_df['team'] = def_matchup_df['team'].fillna('UNK')
    def_matchup_df['position'] = def_matchup_df['position'].fillna('UNK')
    
    cols = ['nfl_id', 'player_name', 'team', 'position', 'beats_position', 'beats_conv',
            'beats_plays', 'struggles_vs', 'struggles_conv', 'struggles_plays', 'position_gap']
    def_matchup_df = def_matchup_df[cols]

print("\nTop 20 Receivers with Position-Specific Matchup Advantages:")
print("-" * 80)
if player_lookup is not None and 'player_name' in def_matchup_df.columns:
    display_cols = ['player_name', 'team', 'position', 'beats_position', 'beats_conv',
                    'struggles_vs', 'struggles_conv', 'position_gap']
    print(def_matchup_df[display_cols].head(20).to_string(index=False))
else:
    print(def_matchup_df.head(20).to_string(index=False))

def_matchup_df.to_csv(OUTPUT_DIR / 'receiver_vs_defender_position.csv', index=False)


RECEIVER × DEFENDER POSITION MATCHUP ANALYSIS
Plays with defender position data: 14,107

Receivers with defender-specific data: 357
Defender positions analyzed: ['CB' 'FS' 'SS']

Top 20 Receivers with Position-Specific Matchup Advantages:
--------------------------------------------------------------------------------
      player_name team position beats_position  beats_conv struggles_vs  struggles_conv  position_gap
       Tyler Boyd  CIN       WR             FS       3.616           CB           2.675         0.941
       Mike Evans   TB       WR             CB       3.259           FS           2.338         0.921
   Rashid Shaheed   NO       WR             FS       4.063           CB           3.157         0.906
Amon-Ra St. Brown  DET       WR             FS       4.081           CB           3.198         0.883
      Jayden Reed   GB       WR             FS       4.481           CB           3.684         0.797
     Mark Andrews  BAL       TE             FS       3.558         

## 5. Receiver Performance by Distance

In [9]:
print("\n" + "="*80)
print("RECEIVER × ROUTE DEPTH ANALYSIS")
print("="*80)

offense['distance_category'] = pd.cut(
    offense['initial_distance'],
    bins=[0, 10, 15, 20, 100],
    labels=['Short (<10y)', 'Medium (10-15y)', 'Deep (15-20y)', 'Bomb (>20y)']
)

MIN_PLAYS_DIST = 5

receiver_distance_perf = offense.groupby(['nfl_id', 'distance_category']).agg({
    'convergence_speed': ['mean', 'count'],
    'min_distance': 'mean',
    'player_position': lambda x: x.mode()[0] if len(x) > 0 else None
}).round(3)

receiver_distance_perf.columns = ['Avg Conv', 'N Plays', 'Avg Min Dist', 'Position']
receiver_distance_perf = receiver_distance_perf[receiver_distance_perf['N Plays'] >= MIN_PLAYS_DIST].reset_index()

print(f"\nReceivers with distance-specific data: {receiver_distance_perf['nfl_id'].nunique()}")

distance_strengths = []

for player_id in receiver_distance_perf['nfl_id'].unique():
    player_data = receiver_distance_perf[receiver_distance_perf['nfl_id'] == player_id]
    
    if len(player_data) < 2:
        continue
    
    best_dist_idx = player_data['Avg Conv'].idxmax()
    best_dist = player_data.loc[best_dist_idx]
    
    worst_dist_idx = player_data['Avg Conv'].idxmin()
    worst_dist = player_data.loc[worst_dist_idx]
    
    dist_gap = best_dist['Avg Conv'] - worst_dist['Avg Conv']
    
    distance_strengths.append({
        'nfl_id': player_id,
        'best_depth': best_dist['distance_category'],
        'best_depth_conv': best_dist['Avg Conv'],
        'best_depth_plays': best_dist['N Plays'],
        'worst_depth': worst_dist['distance_category'],
        'worst_depth_conv': worst_dist['Avg Conv'],
        'worst_depth_plays': worst_dist['N Plays'],
        'depth_gap': dist_gap
    })

dist_strength_df = pd.DataFrame(distance_strengths)
dist_strength_df = dist_strength_df.sort_values('depth_gap', ascending=False)

if player_lookup is not None:
    dist_strength_df = dist_strength_df.merge(
        player_lookup[['nfl_id', 'player_name', 'team', 'position']], 
        on='nfl_id', 
        how='left'
    )
    dist_strength_df['player_name'] = dist_strength_df['player_name'].fillna('Unknown Player')
    dist_strength_df['team'] = dist_strength_df['team'].fillna('UNK')
    dist_strength_df['position'] = dist_strength_df['position'].fillna('UNK')
    
    cols = ['nfl_id', 'player_name', 'team', 'position', 'best_depth', 'best_depth_conv',
            'best_depth_plays', 'worst_depth', 'worst_depth_conv', 'worst_depth_plays', 'depth_gap']
    dist_strength_df = dist_strength_df[cols]

print("\nTop 20 Receivers with Depth-Specific Strengths:")
print("-" * 80)
if player_lookup is not None and 'player_name' in dist_strength_df.columns:
    display_cols = ['player_name', 'team', 'position', 'best_depth', 'best_depth_conv',
                    'worst_depth', 'worst_depth_conv', 'depth_gap']
    print(dist_strength_df[display_cols].head(20).to_string(index=False))
else:
    print(dist_strength_df.head(20).to_string(index=False))

dist_strength_df.to_csv(OUTPUT_DIR / 'receiver_depth_strengths.csv', index=False)


RECEIVER × ROUTE DEPTH ANALYSIS

Receivers with distance-specific data: 299

Top 20 Receivers with Depth-Specific Strengths:
--------------------------------------------------------------------------------
       player_name team position    best_depth  best_depth_conv  worst_depth  worst_depth_conv  depth_gap
       Dyami Brown  WAS       WR   Bomb (>20y)            5.086 Short (<10y)             1.214      3.872
   Deven Thompkins   TB       WR Deep (15-20y)            4.839 Short (<10y)             1.181      3.658
  Christian Watson   GB       WR   Bomb (>20y)            4.915 Short (<10y)             1.260      3.655
    Rashid Shaheed   NO       WR   Bomb (>20y)            4.973 Short (<10y)             1.417      3.556
       K.J. Osborn  MIN       WR   Bomb (>20y)            4.614 Short (<10y)             1.171      3.443
        Skyy Moore   KC       WR   Bomb (>20y)            4.499 Short (<10y)             1.159      3.340
Jaxon Smith-Njigba  SEA       WR   Bomb (>20y)     

## 6. Comprehensive Receiver Scouting Cards

In [10]:
def generate_receiver_scouting_card(player_id, data, player_lookup=None):
    """Generate comprehensive receiver scouting report"""
    player_data = data[data['nfl_id'] == player_id]
    
    if len(player_data) == 0:
        return None
    
    card = {
        'nfl_id': player_id,
        'position': player_data['player_position'].mode()[0] if len(player_data) > 0 else 'UNK',
        'total_plays': len(player_data),
        'avg_convergence': player_data['convergence_speed'].mean(),
        'avg_min_distance': player_data['min_distance'].mean(),
        'avg_speed': player_data['avg_speed'].mean()
    }
    
    if player_lookup is not None:
        player_info = player_lookup[player_lookup['nfl_id'] == player_id]
        if len(player_info) > 0:
            card['player_name'] = player_info['player_name'].iloc[0]
            if 'team' in player_info.columns:
                card['team'] = player_info['team'].iloc[0]
    
    # Best/worst routes
    route_perf = player_data.groupby('route_of_targeted_receiver').agg({
        'convergence_speed': 'mean',
        'game_id': 'count'
    }).round(3)
    route_perf.columns = ['Avg Conv', 'N']
    route_perf = route_perf[route_perf['N'] >= 3].sort_values('Avg Conv', ascending=False)
    
    card['best_routes'] = route_perf.head(3).to_dict()
    card['worst_routes'] = route_perf.tail(3).to_dict()
    
    # Coverage performance
    if 'team_coverage_type' in player_data.columns:
        cov_perf = player_data.groupby('team_coverage_type').agg({
            'convergence_speed': 'mean',
            'game_id': 'count'
        }).round(3)
        cov_perf.columns = ['Avg Conv', 'N']
        cov_perf = cov_perf[cov_perf['N'] >= 3].sort_values('Avg Conv', ascending=False)
        
        card['coverage_performance'] = cov_perf.to_dict()
    
    # Distance performance
    player_data_dist = player_data.copy()
    player_data_dist['dist_cat'] = pd.cut(
        player_data_dist['initial_distance'],
        bins=[0, 10, 15, 20, 100],
        labels=['Short', 'Medium', 'Deep', 'Bomb']
    )
    
    dist_perf = player_data_dist.groupby('dist_cat').agg({
        'convergence_speed': 'mean',
        'game_id': 'count'
    }).round(3)
    dist_perf.columns = ['Avg Conv', 'N']
    dist_perf = dist_perf[dist_perf['N'] >= 3]
    
    card['distance_performance'] = dist_perf.to_dict()
    
    return card

print("\n" + "="*80)
print("GENERATING INDIVIDUAL RECEIVER SCOUTING CARDS")
print("="*80)

top_cov_players = cov_matchup_df.head(10)['nfl_id'].tolist()
top_route_players = route_strength_df.head(10)['nfl_id'].tolist()
top_dist_players = dist_strength_df.head(10)['nfl_id'].tolist()

priority_receivers = list(set(top_cov_players + top_route_players + top_dist_players))

receiver_cards = {}

for player_id in priority_receivers:
    card = generate_receiver_scouting_card(player_id, offense, player_lookup)
    if card:
        receiver_cards[player_id] = card
        name_str = f"{card.get('player_name', f'Player #{player_id}')}"
        team_str = f" ({card.get('team', 'UNK')})" if 'team' in card else ""
        pos_str = f" - {card['position']}"
        print(f"✓ Generated card for {name_str}{team_str}{pos_str}")

print(f"\n✅ Generated {len(receiver_cards)} receiver scouting cards")


GENERATING INDIVIDUAL RECEIVER SCOUTING CARDS
✓ Generated card for Dyami Brown (WAS) - WR
✓ Generated card for Deven Thompkins (TB) - WR
✓ Generated card for Zay Jones (JAX) - WR
✓ Generated card for Byron Pringle (WAS) - WR
✓ Generated card for Joe Mixon (CIN) - RB
✓ Generated card for Tyler Scott (CHI) - WR
✓ Generated card for Kalif Raymond (DET) - WR
✓ Generated card for Bijan Robinson (ATL) - RB
✓ Generated card for Jerry Jeudy (DEN) - WR
✓ Generated card for Chris Olave (NO) - WR
✓ Generated card for Jaxon Smith-Njigba (SEA) - WR
✓ Generated card for Zay Flowers (BAL) - WR
✓ Generated card for Cooper Kupp (LA) - WR
✓ Generated card for Keenan Allen (LAC) - WR
✓ Generated card for Rashid Shaheed (NO) - WR
✓ Generated card for Jonathan Mingo (CAR) - WR
✓ Generated card for Christian Watson (GB) - WR
✓ Generated card for DK Metcalf (SEA) - WR
✓ Generated card for K.J. Osborn (MIN) - WR
✓ Generated card for Jayden Reed (GB) - WR
✓ Generated card for Josh Oliver (MIN) - TE
✓ Generate

## 7. Visualize Receiver Matchup Matrices

In [11]:
print("\n" + "="*80)
print("CREATING RECEIVER-COVERAGE HEATMAP")
print("="*80)

top_receivers = cov_matchup_df.head(20)['nfl_id'].tolist()

receiver_cov_matrix = receiver_coverage_perf[receiver_coverage_perf['nfl_id'].isin(top_receivers)]

if player_lookup is not None:
    receiver_cov_matrix = receiver_cov_matrix.merge(
        player_lookup[['nfl_id', 'player_name', 'team']], 
        on='nfl_id', 
        how='left'
    )
    receiver_cov_matrix['player_name'] = receiver_cov_matrix['player_name'].fillna('Unknown')
    receiver_cov_matrix['team'] = receiver_cov_matrix['team'].fillna('UNK')
    
    receiver_cov_matrix['player_label'] = receiver_cov_matrix.apply(
        lambda x: f"{x['player_name']} ({x['team']})", 
        axis=1
    )
    index_col = 'player_label'
else:
    receiver_cov_matrix['player_label'] = receiver_cov_matrix['nfl_id'].apply(lambda x: f"Player #{int(x)}")
    index_col = 'player_label'

matrix_pivot = receiver_cov_matrix.pivot_table(
    values='Avg Conv',
    index=index_col,
    columns='team_coverage_type',
    aggfunc='mean'
)

if len(matrix_pivot) > 0:
    matrix_pivot.to_csv(OUTPUT_DIR / 'receiver_coverage_matrix.csv')
    
    print(f"\n✅ Receiver-Coverage Matrix Created ({len(matrix_pivot)} receivers × {len(matrix_pivot.columns)} coverages)")
    print("\nTop 10 receivers in matrix:")
    print(matrix_pivot.head(10))
    
    fig, ax = plt.subplots(figsize=(14, 10))
    
    sns.heatmap(matrix_pivot, annot=True, fmt='.2f', cmap='RdYlGn', 
               center=0, ax=ax, cbar_kws={'label': 'Avg Convergence (yd/s)'})
    
    ax.set_title('Receiver-Coverage Performance Matrix (Top 20 Receivers)\n' + 
                'Higher values (green) = Better separation/performance',
                fontsize=14, fontweight='bold')
    ax.set_xlabel('Coverage Type', fontsize=12)
    ax.set_ylabel('Receiver Name (Team)', fontsize=12)
    
    plt.tight_layout()
    plt.savefig(OUTPUT_DIR / 'receiver_coverage_heatmap.png', dpi=300, bbox_inches='tight')
    plt.close()
    
    print("✅ Heatmap saved!")
else:
    print("⚠️ No data for heatmap")


CREATING RECEIVER-COVERAGE HEATMAP

✅ Receiver-Coverage Matrix Created (20 receivers × 7 coverages)

Top 10 receivers in matrix:
team_coverage_type        COVER_0_MAN  COVER_1_MAN  COVER_2_MAN  COVER_2_ZONE  \
player_label                                                                    
Brandin Cooks (DAL)               NaN        3.699          NaN         3.159   
Byron Pringle (WAS)               NaN        3.930          NaN           NaN   
Cedrick Wilson Jr. (MIA)          NaN        3.208          NaN         4.442   
Chris Olave (NO)                  NaN        3.153          NaN         4.607   
Colby Parkinson (SEA)             NaN          NaN          NaN           NaN   
Cole Kmet (CHI)                 2.917        2.508          NaN         2.386   
DK Metcalf (SEA)                  NaN        2.944        3.126         4.463   
Deven Thompkins (TB)              NaN        1.623          NaN           NaN   
Jayden Reed (GB)                  NaN        2.936          

## 8. Receiver Visual Cards

In [12]:
def create_visual_receiver_card(player_id, card, filename):
    """Create visual quick reference card for a receiver"""
    fig, axes = plt.subplots(2, 2, figsize=(12, 10))
    
    player_name = card.get('player_name', f'Player #{player_id}')
    team = card.get('team', 'UNK')
    position = card['position']
    
    title = f'{player_name} ({team}) - {position}\nOFFENSIVE SCOUTING CARD'
    fig.suptitle(title, fontsize=14, fontweight='bold')
    
    # Overall stats
    ax = axes[0, 0]
    ax.axis('off')
    
    stats_text = f"""
OVERALL STATS
━━━━━━━━━━━━━━━━━━
Total Targets: {card['total_plays']}
Avg Separation: {card['avg_convergence']:.3f} yd/s
Avg Min Distance: {card['avg_min_distance']:.2f} yards
Avg Speed: {card['avg_speed']:.2f} yd/s
    """
    
    ax.text(0.1, 0.5, stats_text, fontsize=10, family='monospace',
           bbox=dict(boxstyle='round', facecolor='wheat', alpha=0.5))
    
    # Best routes
    ax = axes[0, 1]
    ax.axis('off')
    
    routes_text = "⭐ BEST ROUTES\n" + "━"*25 + "\n"
    if 'best_routes' in card and 'Avg Conv' in card['best_routes']:
        for route, conv in list(card['best_routes']['Avg Conv'].items())[:3]:
            n = card['best_routes']['N'].get(route, 0)
            routes_text += f"• {route}: {conv:.3f} yd/s ({int(n)})\n"
    
    ax.text(0.1, 0.5, routes_text, fontsize=10, family='monospace',
           bbox=dict(boxstyle='round', facecolor='lightgreen', alpha=0.5))
    
    # Coverage matchups
    ax = axes[1, 0]
    ax.axis('off')
    
    cov_text = "🎯 BEST VS COVERAGE\n" + "━"*25 + "\n"
    if 'coverage_performance' in card and 'Avg Conv' in card['coverage_performance']:
        for cov, conv in list(card['coverage_performance']['Avg Conv'].items())[:3]:
            cov_text += f"• {cov}: {conv:.3f} yd/s\n"
    
    ax.text(0.1, 0.5, cov_text, fontsize=10, family='monospace',
           bbox=dict(boxstyle='round', facecolor='lightblue', alpha=0.5))
    
    # Distance profile
    ax = axes[1, 1]
    ax.axis('off')
    
    dist_text = "📏 DEPTH PROFILE\n" + "━"*25 + "\n"
    if 'distance_performance' in card and 'Avg Conv' in card['distance_performance']:
        for dist, conv in card['distance_performance']['Avg Conv'].items():
            dist_text += f"• {dist}: {conv:.3f} yd/s\n"
    
    ax.text(0.1, 0.5, dist_text, fontsize=10, family='monospace',
           bbox=dict(boxstyle='round', facecolor='lightyellow', alpha=0.5))
    
    plt.tight_layout()
    plt.savefig(filename, dpi=300, bbox_inches='tight')
    plt.close()

print("\n" + "="*80)
print("GENERATING VISUAL RECEIVER CARDS")
print("="*80)

cards_dir = OUTPUT_DIR / 'receiver_cards'
cards_dir.mkdir(exist_ok=True)

for player_id, card in receiver_cards.items():
    if 'player_name' in card:
        clean_name = card['player_name'].replace(' ', '_').replace('.', '')
        team = card.get('team', 'UNK')
        card_file = cards_dir / f'{clean_name}_{team}_{int(player_id)}.png'
    else:
        card_file = cards_dir / f'receiver_{int(player_id)}_card.png'
    
    create_visual_receiver_card(player_id, card, card_file)

print(f"✓ Generated {len(receiver_cards)} visual cards")
print(f"✅ Cards saved to: {cards_dir}")


GENERATING VISUAL RECEIVER CARDS
✓ Generated 24 visual cards
✅ Cards saved to: ..\data\processed\offensive_matchups\receiver_cards


## 9. Summary Report

In [13]:
print("\n" + "="*80)
print("OFFENSIVE PLAYER MATCHUP ANALYSIS - SUMMARY")
print("="*80)

print(f"""
ANALYSIS COMPLETE:

OFFENSIVE PLAYERS ANALYZED:
- Total receivers/targets: {offense['nfl_id'].nunique():,}
- Receivers with coverage data: {receiver_coverage_perf['nfl_id'].nunique():,}
- Receivers with route data: {receiver_route_perf['nfl_id'].nunique():,}
- Receivers with distance data: {receiver_distance_perf['nfl_id'].nunique():,}
- Priority scouting cards generated: {len(receiver_cards)}

TOP 3 RECEIVERS WHO DOMINATE SPECIFIC COVERAGES:
""")

for idx, (i, row) in enumerate(cov_matchup_df.head(3).iterrows(), 1):
    if 'player_name' in cov_matchup_df.columns:
        player_str = f"{row['player_name']} ({row.get('team', 'UNK')}) - {row['position']}"
    else:
        player_str = f"Player #{int(row['nfl_id'])}"
    
    print(f"  {idx}. {player_str}")
    print(f"     Dominates: {row['best_coverage']} ({row['best_coverage_conv']:.3f} yd/s)")
    print(f"     Struggles vs: {row['worst_coverage']} ({row['worst_coverage_conv']:.3f} yd/s)")
    print(f"     Coverage gap: {row['coverage_gap']:.3f} yd/s\n")

print(f"""
TOP 3 ROUTE SPECIALISTS:
""")

for idx, (i, row) in enumerate(route_strength_df.head(3).iterrows(), 1):
    if 'player_name' in route_strength_df.columns:
        player_str = f"{row['player_name']} ({row.get('team', 'UNK')}) - {row['position']}"
    else:
        player_str = f"Player #{int(row['nfl_id'])}"
    
    print(f"  {idx}. {player_str}")
    print(f"     Best route: {row['best_route']} ({row['best_route_conv']:.3f} yd/s)")
    print(f"     Worst route: {row['worst_route']} ({row['worst_route_conv']:.3f} yd/s)")
    print(f"     Route gap: {row['route_gap']:.3f} yd/s\n")

print(f"""
FILES GENERATED:
✓ receiver_coverage_matchups.csv - Coverage-specific performance
✓ receiver_route_strengths.csv - Route-specific performance  
✓ receiver_vs_defender_position.csv - Position matchup data
✓ receiver_depth_strengths.csv - Distance/depth performance
✓ receiver_coverage_heatmap.png - Visual matrix
✓ receiver_coverage_matrix.csv - Performance matrix
✓ receiver_cards/ - Individual receiver scouting cards

USAGE:
1. Identify opponent's defensive weaknesses (coverage types, positions)
2. Match your receivers' strengths to opponent's weaknesses
3. Design route concepts that exploit coverage matchups
4. Use depth analysis to optimize route combinations
""")

print("="*80)
print("✅ OFFENSIVE MATCHUP ANALYSIS COMPLETE!")
print("="*80)


OFFENSIVE PLAYER MATCHUP ANALYSIS - SUMMARY

ANALYSIS COMPLETE:

OFFENSIVE PLAYERS ANALYZED:
- Total receivers/targets: 464
- Receivers with coverage data: 289
- Receivers with route data: 348
- Receivers with distance data: 299
- Priority scouting cards generated: 24

TOP 3 RECEIVERS WHO DOMINATE SPECIFIC COVERAGES:

  1. Zay Jones (JAX) - WR
     Dominates: COVER_2_ZONE (4.739 yd/s)
     Struggles vs: COVER_0_MAN (1.861 yd/s)
     Coverage gap: 2.878 yd/s

  2. Deven Thompkins (TB) - WR
     Dominates: COVER_3_ZONE (4.278 yd/s)
     Struggles vs: COVER_1_MAN (1.623 yd/s)
     Coverage gap: 2.655 yd/s

  3. Kalif Raymond (DET) - WR
     Dominates: COVER_6_ZONE (4.430 yd/s)
     Struggles vs: COVER_4_ZONE (2.003 yd/s)
     Coverage gap: 2.427 yd/s


TOP 3 ROUTE SPECIALISTS:

  1. Terry McLaurin (WAS) - WR
     Best route: POST (5.951 yd/s)
     Worst route: SCREEN (0.061 yd/s)
     Route gap: 5.890 yd/s

  2. Rashid Shaheed (NO) - WR
     Best route: POST (5.514 yd/s)
     Worst route