# Zone Vulnerability Analysis - Validation Notebook

This notebook validates the zone vulnerability timeseries dataset and visualization tools.

## Objectives
1. Load and explore the zone vulnerability dataset
2. Visualize zone grids for sample plays
3. Analyze zone evolution over time
4. Validate zone boundaries and metrics
5. Identify most/least vulnerable zones

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path

# Import our custom classes
from nfl_analysis import ZoneVulnerabilityVisualizer

# Set display options
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', 100)
pd.set_option('display.float_format', '{:.2f}'.format)

%matplotlib inline

## 1. Load Zone Vulnerability Dataset

In [None]:
# Load the dataset
data_dir = Path('../data/consolidated')
zone_file = data_dir / 'zone_vulnerability_timeseries.parquet'

if not zone_file.exists():
    print(f"ERROR: Zone vulnerability dataset not found at {zone_file}")
    print("Please run the consolidation pipeline first.")
else:
    zone_data = pd.read_parquet(zone_file)
    print(f"✓ Loaded zone vulnerability dataset")
    print(f"  Shape: {zone_data.shape}")
    print(f"  Memory: {zone_data.memory_usage(deep=True).sum() / 1024**2:.1f} MB")

## 2. Dataset Overview

In [None]:
# Display first few rows
print("First 5 rows:")
zone_data.head()

In [None]:
# Column info
print("\nColumn Information:")
zone_data.info()

In [None]:
# Summary statistics
print("\nSummary Statistics:")
zone_data[['defender_count', 'nearest_defender_dist', 'coverage_density', 
           'zone_void_score', 'receiver_count']].describe()

## 3. Data Validation

In [None]:
# Check for expected 15 zones per frame
zones_per_frame = zone_data.groupby(['game_id', 'play_id', 'frame_id']).size()
print(f"Zones per frame - Min: {zones_per_frame.min()}, Max: {zones_per_frame.max()}")
print(f"Expected: 15 zones per frame")
if zones_per_frame.min() == 15 and zones_per_frame.max() == 15:
    print("✓ PASS: All frames have exactly 15 zones")
else:
    print("✗ FAIL: Some frames don't have 15 zones")
    print(zones_per_frame.value_counts())

In [None]:
# Check zone IDs
print("\nUnique zone IDs:")
print(sorted(zone_data['zone_id'].unique()))
print(f"\nTotal unique zones: {zone_data['zone_id'].nunique()}")
print("Expected: 15 zones (3 depths × 5 lateral positions)")

In [None]:
# Check phases
print("\nPhase distribution:")
print(zone_data['phase'].value_counts())
print("\nExpected phases: pre_snap, route_development, at_throw")

## 4. Zone Analysis

In [None]:
# Average vulnerability by zone
print("Average Vulnerability Score by Zone:")
avg_by_zone = zone_data.groupby('zone_id')['zone_void_score'].mean().sort_values(ascending=False)
print(avg_by_zone)

# Plot
fig, ax = plt.subplots(figsize=(12, 6))
avg_by_zone.plot(kind='bar', ax=ax, color='coral', edgecolor='black')
ax.set_title('Average Vulnerability Score by Zone', fontsize=14, fontweight='bold')
ax.set_xlabel('Zone ID', fontsize=12)
ax.set_ylabel('Average Void Score', fontsize=12)
ax.grid(axis='y', alpha=0.3)
plt.xticks(rotation=45, ha='right')
plt.tight_layout()
plt.show()

In [None]:
# Defender distribution by zone
print("\nAverage Defender Count by Zone:")
defenders_by_zone = zone_data.groupby('zone_id')['defender_count'].mean().sort_values(ascending=False)
print(defenders_by_zone)

# Plot
fig, ax = plt.subplots(figsize=(12, 6))
defenders_by_zone.plot(kind='bar', ax=ax, color='steelblue', edgecolor='black')
ax.set_title('Average Defender Count by Zone', fontsize=14, fontweight='bold')
ax.set_xlabel('Zone ID', fontsize=12)
ax.set_ylabel('Average Defenders', fontsize=12)
ax.grid(axis='y', alpha=0.3)
plt.xticks(rotation=45, ha='right')
plt.tight_layout()
plt.show()

## 5. Target Zone Analysis

In [None]:
# Which zones are targeted most frequently?
target_zones = zone_data[zone_data['is_target_zone'] == True]

print("Target Zone Distribution:")
target_dist = target_zones.groupby('zone_id').size().sort_values(ascending=False)
print(target_dist)

# Plot
fig, ax = plt.subplots(figsize=(12, 6))
target_dist.plot(kind='bar', ax=ax, color='green', edgecolor='black')
ax.set_title('Frequency of Ball Landing by Zone', fontsize=14, fontweight='bold')
ax.set_xlabel('Zone ID', fontsize=12)
ax.set_ylabel('Count', fontsize=12)
ax.grid(axis='y', alpha=0.3)
plt.xticks(rotation=45, ha='right')
plt.tight_layout()
plt.show()

In [None]:
# Vulnerability of target zones at throw
target_at_throw = zone_data[
    (zone_data['is_target_zone'] == True) & 
    (zone_data['phase'] == 'at_throw')
]

print("\nTarget Zone Vulnerability at Throw:")
print(target_at_throw[['zone_void_score', 'defender_count', 'nearest_defender_dist']].describe())

# Histogram
fig, axes = plt.subplots(1, 3, figsize=(15, 4))

axes[0].hist(target_at_throw['zone_void_score'], bins=30, color='red', edgecolor='black', alpha=0.7)
axes[0].set_title('Void Score Distribution', fontweight='bold')
axes[0].set_xlabel('Void Score')
axes[0].set_ylabel('Frequency')

axes[1].hist(target_at_throw['defender_count'], bins=range(6), color='blue', edgecolor='black', alpha=0.7)
axes[1].set_title('Defender Count Distribution', fontweight='bold')
axes[1].set_xlabel('Defenders in Zone')
axes[1].set_ylabel('Frequency')

axes[2].hist(target_at_throw['nearest_defender_dist'], bins=30, color='green', edgecolor='black', alpha=0.7)
axes[2].set_title('Nearest Defender Distance', fontweight='bold')
axes[2].set_xlabel('Distance (yards)')
axes[2].set_ylabel('Frequency')

plt.tight_layout()
plt.show()

## 6. Zone Grid Visualization (Sample Plays)

In [None]:
# Initialize visualizer
visualizer = ZoneVulnerabilityVisualizer(data_dir='../data/consolidated')
visualizer.load_zone_data()

print("✓ Visualizer initialized and data loaded")

In [None]:
# Get a sample play
sample_plays = zone_data[['game_id', 'play_id']].drop_duplicates().head(5)
print("Sample plays for visualization:")
print(sample_plays)

In [None]:
# Visualize zone grid for first sample play at different frames
game_id = sample_plays.iloc[0]['game_id']
play_id = sample_plays.iloc[0]['play_id']

# Get frames for this play
frames = zone_data[
    (zone_data['game_id'] == game_id) & 
    (zone_data['play_id'] == play_id)
]['frame_id'].unique()

print(f"\nVisualizing Game {game_id}, Play {play_id}")
print(f"Available frames: {sorted(frames)}")

# Visualize at pre-snap, mid-route, and at throw
for frame_id in [frames[0], frames[len(frames)//2], frames[-1]]:
    fig, ax = visualizer.plot_zone_grid(
        game_id=game_id,
        play_id=play_id,
        frame_id=frame_id,
        metric='zone_void_score',
        figsize=(14, 8)
    )
    plt.show()

## 7. Zone Evolution Over Time

In [None]:
# Track evolution of a specific zone
game_id = sample_plays.iloc[0]['game_id']
play_id = sample_plays.iloc[0]['play_id']

# Find target zone for this play
target_zone = zone_data[
    (zone_data['game_id'] == game_id) & 
    (zone_data['play_id'] == play_id) &
    (zone_data['is_target_zone'] == True)
]['zone_id'].iloc[0]

print(f"Target zone for this play: {target_zone}")

# Visualize evolution
fig, axes = visualizer.plot_zone_evolution(
    game_id=game_id,
    play_id=play_id,
    zone_id=target_zone,
    figsize=(14, 8)
)
plt.show()

## 8. Phase Analysis

In [None]:
# How does vulnerability change across phases?
phase_stats = zone_data.groupby('phase').agg({
    'zone_void_score': ['mean', 'std', 'min', 'max'],
    'defender_count': 'mean',
    'nearest_defender_dist': 'mean'
})

print("Vulnerability by Phase:")
print(phase_stats)

In [None]:
# Visualize phase progression
fig, axes = plt.subplots(1, 2, figsize=(15, 5))

zone_data.boxplot(column='zone_void_score', by='phase', ax=axes[0])
axes[0].set_title('Vulnerability Score by Phase', fontweight='bold')
axes[0].set_xlabel('Phase')
axes[0].set_ylabel('Void Score')
plt.suptitle('')  # Remove default title

zone_data.boxplot(column='defender_count', by='phase', ax=axes[1])
axes[1].set_title('Defender Count by Phase', fontweight='bold')
axes[1].set_xlabel('Phase')
axes[1].set_ylabel('Defenders per Zone')
plt.suptitle('')  # Remove default title

plt.tight_layout()
plt.show()

## 9. Summary and Validation Results

In [None]:
print("="*70)
print("ZONE VULNERABILITY DATASET VALIDATION SUMMARY")
print("="*70)
print(f"\nDataset Shape: {zone_data.shape}")
print(f"Total Plays: {zone_data[['game_id', 'play_id']].drop_duplicates().shape[0]:,}")
print(f"Total Frames: {zone_data[['game_id', 'play_id', 'frame_id']].drop_duplicates().shape[0]:,}")
print(f"Avg Frames/Play: {zone_data.groupby(['game_id', 'play_id'])['frame_id'].nunique().mean():.1f}")
print(f"\nZones per Frame: {zones_per_frame.mode()[0]}")
print(f"Unique Zones: {zone_data['zone_id'].nunique()}")
print(f"\nVulnerability Score Range: [{zone_data['zone_void_score'].min():.2f}, {zone_data['zone_void_score'].max():.2f}]")
print(f"Average Vulnerability: {zone_data['zone_void_score'].mean():.2f}")
print(f"\nMost Vulnerable Zone: {avg_by_zone.index[0]} (avg score: {avg_by_zone.iloc[0]:.2f})")
print(f"Least Vulnerable Zone: {avg_by_zone.index[-1]} (avg score: {avg_by_zone.iloc[-1]:.2f})")
print(f"\nMost Targeted Zone: {target_dist.index[0]} ({target_dist.iloc[0]:,} times)")
print("\n" + "="*70)
print("✓ VALIDATION COMPLETE")
print("="*70)