In [1]:
import pandas as pd
import numpy as np
import sqlite3
import matplotlib.pyplot as plt
import seaborn as sns

pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)
pd.set_option('display.max_rows', 100)
sns.set_style('whitegrid')
plt.rcParams['figure.figsize'] = (14, 8)

## 1. Load Data from qb_contract_value View

**Note:** This analysis uses the `qb_contract_value` view which pre-aggregates QB performance metrics with contract data. The view automatically handles:
- Player name standardization and matching  
- Custom QB rating calculations
- Salary percentile rankings by season
- Value score computations
- Contract year expansions

In [None]:
# Connect to database
db_path = '../../data_load/nfl_qb_data.db'
conn = sqlite3.connect(db_path)

# Load contract value data from pre-built view
qb_contracts = pd.read_sql_query("SELECT * FROM qb_contract_value", conn)

print(f"Loaded {len(qb_contracts)} QB-season records with contract data")
print(f"\nColumns available:")
print(qb_contracts.columns.tolist())
print(f"\nYears covered: {qb_contracts['season'].min()} - {qb_contracts['season'].max()}")
print(f"Unique QBs: {qb_contracts['player_name'].nunique()}")
print(f"\nSample data:")
qb_contracts.head()

Loaded 1939 contract records

Contract columns:
['contract_id', 'player_name', 'position', 'team', 'is_active', 'year_signed', 'years', 'value', 'apy', 'guaranteed', 'apy_cap_pct', 'inflated_value', 'inflated_apy', 'inflated_guaranteed', 'player_page', 'otc_id', 'player_id', 'date_of_birth', 'height', 'weight', 'college', 'draft_year', 'draft_round', 'draft_overall', 'draft_team', 'cols']


KeyError: 'year'

## 3. Best & Worst Value QBs (All-Time)

In [None]:
# Display basic info
print("Dataset Overview:")
print(f"  Total QB-seasons: {len(qb_contracts):,}")
print(f"  Seasons: {qb_contracts['season'].min()}-{qb_contracts['season'].max()}")
print(f"  Unique QBs: {qb_contracts['player_name'].nunique()}")

print("\nValue Category Distribution:")
print(qb_contracts['value_category'].value_counts())

print("\nSalary Statistics (in millions):")
print(qb_contracts['salary_millions'].describe())

print("\nCustom Rating Statistics:")
print(qb_contracts['custom_rating'].describe())

print("\nValue Score Statistics:")
print(qb_contracts['value_score'].describe())

In [None]:
# Top 20 best value QBs (all-time)
best_value = qb_contracts.nlargest(20, 'value_score')[[
    'player_name', 'season', 'custom_rating', 'salary_millions', 'salary_percentile', 'value_score', 'value_category'
]].copy()

best_value.columns = ['Player', 'Season', 'Rating', 'Salary ($M)', 'Salary %ile', 'Value Score', 'Category']

print("="*100)
print("TOP 20 BEST VALUE QUARTERBACKS (All Years)")
print("="*100)
print(best_value.to_string(index=False))

In [None]:
# Worst value QBs (most overpaid)
worst_value = qb_contracts.nsmallest(20, 'value_score')[[
    'player_name', 'season', 'custom_rating', 'salary_millions', 'salary_percentile', 'value_score', 'value_category'
]].copy()

worst_value.columns = ['Player', 'Season', 'Rating', 'Salary ($M)', 'Salary %ile', 'Value Score', 'Category']

print("\n" + "="*100)
print("TOP 20 MOST OVERPAID QUARTERBACKS (All Years)")
print("="*100)
print(worst_value.to_string(index=False))

## 4. Year-by-Year Analysis

In [None]:
# Best value by year
print("\n" + "="*100)
print("BEST VALUE QB BY SEASON")
print("="*100)

for year in sorted(qb_contracts['season'].unique()):
    year_data = qb_contracts[qb_contracts['season'] == year].nlargest(1, 'value_score')
    if len(year_data) > 0:
        row = year_data.iloc[0]
        print(f"{year}: {row['player_name']:20s} - Rating: {row['custom_rating']:.1f}, Salary: ${row['salary_millions']:.2f}M, Value: {row['value_score']:+.1f}")

In [None]:
# Most overpaid by year
print("\n" + "="*100)
print("MOST OVERPAID QB BY SEASON")
print("="*100)

for year in sorted(qb_contracts['season'].unique()):
    year_data = qb_contracts[qb_contracts['season'] == year].nsmallest(1, 'value_score')
    if len(year_data) > 0:
        row = year_data.iloc[0]
        print(f"{year}: {row['player_name']:20s} - Rating: {row['custom_rating']:.1f}, Salary: ${row['salary_millions']:.2f}M, Value: {row['value_score']:+.1f}")

## 5. Visualizations

In [None]:
# Scatter plot: Rating vs. Salary
fig, ax = plt.subplots(figsize=(14, 8))

scatter = ax.scatter(
    qb_contracts['salary_millions'],
    qb_contracts['custom_rating'],
    c=qb_contracts['value_score'],
    cmap='RdYlGn',
    s=100,
    alpha=0.6,
    edgecolors='black'
)

ax.set_xlabel('Salary (Millions)', fontsize=12)
ax.set_ylabel('Custom QB Rating', fontsize=12)
ax.set_title('QB Performance vs. Salary (All Years)', fontsize=14, fontweight='bold')
ax.grid(alpha=0.3)

cbar = plt.colorbar(scatter, label='Value Score')
cbar.set_label('Value Score\n(Green = Good Value, Red = Overpaid)', fontsize=10)

plt.tight_layout()
plt.show()

In [None]:
# Distribution of value scores
fig, ax = plt.subplots(figsize=(12, 6))

ax.hist(qb_contracts['value_score'], bins=30, edgecolor='black', alpha=0.7)
ax.axvline(0, color='red', linestyle='--', linewidth=2, label='Break-even point')
ax.set_xlabel('Value Score', fontsize=12)
ax.set_ylabel('Frequency', fontsize=12)
ax.set_title('Distribution of QB Contract Value Scores', fontsize=14, fontweight='bold')
ax.legend()
ax.grid(alpha=0.3)

plt.tight_layout()
plt.show()

In [None]:
# Value category distribution
fig, ax = plt.subplots(figsize=(10, 6))

category_counts = qb_contracts['value_category'].value_counts()
category_order = ['Excellent Value', 'Good Value', 'Fair Value', 'Overpaid', 'Severely Overpaid']
category_counts = category_counts.reindex(category_order, fill_value=0)

colors = ['#2ecc71', '#27ae60', '#f39c12', '#e74c3c', '#c0392b']
category_counts.plot(kind='bar', ax=ax, color=colors, edgecolor='black')

ax.set_xlabel('Value Category', fontsize=12)
ax.set_ylabel('Number of QB-Seasons', fontsize=12)
ax.set_title('QB Contract Value Distribution by Category', fontsize=14, fontweight='bold')
ax.set_xticklabels(ax.get_xticklabels(), rotation=45, ha='right')
ax.grid(axis='y', alpha=0.3)

plt.tight_layout()
plt.show()

## 6. Export for Streamlit App

In [None]:
# Prepare final dataset for Streamlit
app_data = qb_contracts[[
    'player_name', 'season', 'team', 'attempts',
    'custom_rating', 'salary_millions',
    'value_score', 'value_category',
    'total_pass_epa', 'cpoe', 'success_rate_pct',
    'completion_pct', 'td_rate_pct'
]].copy()

# Rename columns for display
app_data.columns = [
    'Player', 'Season', 'Team', 'Attempts',
    'Rating', 'Salary ($M)',
    'Value Score', 'Value Category',
    'EPA', 'CPOE', 'Success Rate %',
    'Completion %', 'TD Rate %'
]

print(f"\nCreated app dataset with {len(app_data)} records")
print("\nSample:")
app_data.head(10)

In [None]:
# Save to CSV for easy loading in Streamlit
output_path = '../project_CSVs/qb_contract_value_analysis.csv'
app_data.to_csv(output_path, index=False)
print(f"\nSaved contract value data to: {output_path}")
print(f"Ready for Streamlit app integration!")

## 7. Key Insights Summary

In [None]:
category_order = ['Excellent Value', 'Good Value', 'Fair Value', 'Overpaid', 'Severely Overpaid']

print("="*100)
print("KEY INSIGHTS FOR STREAMLIT APP")
print("="*100)

print(f"\n1. DATASET OVERVIEW:")
print(f"   - Total QB-seasons analyzed: {len(app_data)}")
print(f"   - Years covered: {app_data['Season'].min()} - {app_data['Season'].max()}")
print(f"   - Unique QBs: {app_data['Player'].nunique()}")

print(f"\n2. VALUE DISTRIBUTION:")
for category in category_order:
    count = len(app_data[app_data['Value Category'] == category])
    pct = (count / len(app_data)) * 100
    print(f"   - {category:20s}: {count:3d} ({pct:.1f}%)")

print(f"\n3. BEST VALUE (Top 5):")
for i, row in app_data.nlargest(5, 'Value Score').iterrows():
    print(f"   {row['Player']:20s} ({row['Season']}) - Rating: {row['Rating']:.1f}, Salary: ${row['Salary ($M)']:.1f}M, Value: {row['Value Score']:+.1f}")

print(f"\n4. WORST VALUE (Bottom 5):")
for i, row in app_data.nsmallest(5, 'Value Score').iterrows():
    print(f"   {row['Player']:20s} ({row['Season']}) - Rating: {row['Rating']:.1f}, Salary: ${row['Salary ($M)']:.1f}M, Value: {row['Value Score']:+.1f}")

print(f"\n5. RECOMMENDED STREAMLIT FEATURES:")
print(f"   - Year filter (dropdown or slider)")
print(f"   - Conditional formatting: Green for positive value, Red for negative")
print(f"   - Sortable columns")
print(f"   - Two side-by-side tables: Best Value vs Worst Value")
print(f"   - Scatter plot: Rating vs Salary with color-coded value scores")

print("\n" + "="*100)

In [None]:
# Close database connection
conn.close()
print("\nDatabase connection closed.")