# 07 — Quarterback Value Deep Dive

**Research Question:** How do QB draft strategies impact value? When do QBs peak? Do second contracts destroy ROI?

**Analysis:**
1. Career arc analysis (QB development curve vs RB immediate peak)
2. Draft ROI by round (rookie contract years)
3. Draft success by pick range (top 10, 11-32, 33-100, 100+)
4. Rookie contract vs second contract value comparison
5. Top 10 bargains and busts
6. QB salary inflation trend (2015-2024)
7. Performance vs. salary scatter plot

**Outputs:**
- Career arc: Value score by years of experience
- Draft ROI: Average value by draft round
- Success rates by pick range
- Rookie vs second contract comparison
- Top bargains and busts visualizations
- Salary trend over time
- Performance vs salary scatter

In [None]:
import sys
sys.path.insert(0, '..')

import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import warnings
warnings.filterwarnings('ignore', category=FutureWarning)
from pathlib import Path

# Set data directory relative to notebook location
DATA_DIR = Path('../data')

import plotly.io as pio
pio.renderers.default = 'notebook'

pd.set_option('display.max_columns', 50)
pd.set_option('display.max_rows', 100)

---
## 1. Load Data

In [None]:
# Load scored player-seasons
scored = pd.read_parquet(DATA_DIR / 'scored.parquet', engine='fastparquet')
print(f"Scored data: {scored.shape[0]:,} player-seasons")

# Load rosters for draft and age information
rosters = pd.read_parquet(DATA_DIR / 'rosters.parquet', engine='fastparquet')
print(f"Rosters data: {rosters.shape[0]:,} player-week records")

# Get unique player draft info
draft_info = rosters[rosters['draft_number'].notna()][['player_id', 'player_name', 'draft_number', 'draft_club', 'rookie_year']].drop_duplicates('player_id').copy()
draft_info['draft_number'] = pd.to_numeric(draft_info['draft_number'], errors='coerce')
draft_info['draft_round'] = ((draft_info['draft_number'] - 1) // 32 + 1).astype('Int64')
draft_info['draft_round'] = draft_info['draft_round'].clip(upper=7)

print(f"\nDrafted players with info: {len(draft_info):,}")

---
## 2. Filter to Quarterbacks

In [None]:
# Filter scored data to QBs only
qb_scored = scored[scored['pos_group'] == 'QB'].copy()
print(f"QB player-seasons: {qb_scored.shape[0]:,}")
print(f"Unique QB players: {qb_scored['player_id'].nunique():,}")
print(f"Seasons covered: {sorted(qb_scored['season'].unique())}")

# Merge with draft info
qb_df = qb_scored.merge(draft_info, on=['player_id', 'player_name'], how='left')
print(f"\nQBs with draft info: {qb_df['draft_number'].notna().sum():,} player-seasons")

# Calculate years since draft
qb_df['years_since_draft'] = qb_df['season'] - qb_df['rookie_year']

# Show sample
qb_df[['player_name', 'season', 'draft_round', 'draft_number', 'years_since_draft', 'value_score', 'apy_cap_pct']].head(10)

---
## 3. Career Arc Analysis (QB Development Curve)

In [None]:
# Filter to QBs with draft info and valid years_since_draft
qb_career = qb_df[qb_df['years_since_draft'].notna() & (qb_df['years_since_draft'] >= 0)].copy()
qb_career['years_since_draft'] = qb_career['years_since_draft'].astype(int)

# Cap at 10 years (small sample after that)
qb_career = qb_career[qb_career['years_since_draft'] <= 10].copy()

# Aggregate by years since draft
career_arc = qb_career.groupby('years_since_draft').agg(
    avg_value=('value_score', 'mean'),
    median_value=('value_score', 'median'),
    avg_performance=('performance_zscore', 'mean'),
    avg_salary_pct=('apy_cap_pct', 'mean'),
    count=('value_score', 'size')
).reset_index()

print("QB Career Arc Summary:")
print(career_arc)

# Line chart: Value score over career
fig = go.Figure()

fig.add_trace(go.Scatter(
    x=career_arc['years_since_draft'],
    y=career_arc['avg_value'],
    mode='lines+markers',
    name='Avg Value Score',
    marker=dict(size=10, color='#0066cc'),
    line=dict(width=3, color='#0066cc'),
    hovertemplate='<b>Year %{x}</b><br>Avg Value: %{y:.2f}<br>N=%{customdata:,}<extra></extra>',
    customdata=career_arc['count']
))

fig.update_layout(
    title='<b>QB Career Arc: Value Score by Years of Experience</b><br><sub>QBs Peak Later Than RBs (Years 3-5)</sub>',
    xaxis_title='Years Since Draft',
    yaxis_title='Average Value Score',
    width=900,
    height=500,
    hovermode='x'
)

fig.add_hline(y=0, line_dash="dash", line_color="gray", opacity=0.5)
fig.show()

# Save to article/images/qb_deep_dive/
import os
os.makedirs('../article/images/qb_deep_dive', exist_ok=True)
fig.write_image('../article/images/qb_deep_dive/career_arc.png', width=900, height=500)

---
## 4. Draft ROI by Round (Rookie Contract Years)

In [None]:
# Filter to rookie contract years (first 4 seasons)
qb_rookie = qb_career[(qb_career['years_since_draft'] >= 0) & (qb_career['years_since_draft'] <= 3)].copy()
print(f"QB rookie contract seasons: {qb_rookie.shape[0]:,}")

# Aggregate by draft round
qb_draft_roi = qb_rookie.groupby('draft_round').agg(
    avg_value=('value_score', 'mean'),
    median_value=('value_score', 'median'),
    avg_performance=('performance_zscore', 'mean'),
    avg_salary_pct=('apy_cap_pct', 'mean'),
    count=('value_score', 'size'),
    pct_bargains=('is_bargain', lambda x: x.sum() / len(x) * 100 if len(x) > 0 else 0)
).reset_index()

print("\nQB Draft ROI by Round:")
print(qb_draft_roi)

# Bar chart
fig = go.Figure()

fig.add_trace(go.Bar(
    x=qb_draft_roi['draft_round'],
    y=qb_draft_roi['avg_value'],
    text=qb_draft_roi['avg_value'].round(2),
    textposition='outside',
    marker_color=['green' if v > 0.5 else 'orange' if v > 0 else 'red' for v in qb_draft_roi['avg_value']],
    hovertemplate='<b>Round %{x}</b><br>Avg Value: %{y:.2f}<br>N=%{customdata[0]:,}<br>% Bargains: %{customdata[1]:.1f}%<extra></extra>',
    customdata=qb_draft_roi[['count', 'pct_bargains']]
))

fig.update_layout(
    title='<b>QB Draft ROI: Average Value by Round</b><br><sub>Rookie Contract Years Only | Round 5 QBs Deliver Best Value</sub>',
    xaxis_title='Draft Round',
    yaxis_title='Average Value Score',
    width=800,
    height=500,
    showlegend=False
)

fig.add_hline(y=0, line_dash="dash", line_color="gray", opacity=0.5)
fig.show()

fig.write_image('../article/images/qb_deep_dive/draft_roi_by_round.png', width=800, height=500)

---
## 5. Draft Success by Pick Range

In [None]:
# Create pick range bins
qb_rookie_picks = qb_rookie[qb_rookie['draft_number'].notna()].copy()
qb_rookie_picks['pick_range'] = pd.cut(
    qb_rookie_picks['draft_number'],
    bins=[0, 10, 32, 100, 300],
    labels=['1-10 (Top 10)', '11-32 (Late R1)', '33-100 (R2-3)', '100+ (R4-7)']
)

# Calculate success rate (% with value > 0.5)
success_by_pick = qb_rookie_picks.groupby('pick_range').agg(
    avg_value=('value_score', 'mean'),
    success_rate=('value_score', lambda x: (x > 0.5).sum() / len(x) * 100),
    count=('value_score', 'size')
).reset_index()

print("QB Success Rates by Pick Range:")
print(success_by_pick)

# Bar chart
fig = go.Figure()

fig.add_trace(go.Bar(
    x=success_by_pick['pick_range'],
    y=success_by_pick['success_rate'],
    text=success_by_pick['success_rate'].round(1).astype(str) + '%',
    textposition='outside',
    marker_color=['#2ecc71' if v > 30 else '#f39c12' if v > 20 else '#e74c3c' for v in success_by_pick['success_rate']],
    hovertemplate='<b>%{x}</b><br>Success Rate: %{y:.1f}%<br>N=%{customdata:,}<extra></extra>',
    customdata=success_by_pick['count']
))

fig.update_layout(
    title='<b>QB Draft Success Rates by Pick Range</b><br><sub>% of Seasons with Value Score > 0.5 (Rookie Contracts)</sub>',
    xaxis_title='Pick Range',
    yaxis_title='Success Rate (%)',
    width=800,
    height=500,
    showlegend=False
)

fig.show()

fig.write_image('../article/images/qb_deep_dive/success_by_pick_range.png', width=800, height=500)

---
## 6. Rookie Contract vs Second Contract Value

In [None]:
# Create contract phase bins
qb_df_contracts = qb_df[qb_df['years_since_draft'].notna()].copy()
qb_df_contracts['contract_phase'] = qb_df_contracts['years_since_draft'].apply(
    lambda x: 'Rookie (Years 0-3)' if x <= 3 else 'Second Contract (Years 4+)'
)

# Aggregate by contract phase
contract_comparison = qb_df_contracts.groupby('contract_phase').agg(
    avg_value=('value_score', 'mean'),
    median_value=('value_score', 'median'),
    avg_performance=('performance_zscore', 'mean'),
    avg_salary_pct=('apy_cap_pct', 'mean'),
    count=('value_score', 'size')
).reset_index()

print("Rookie vs Second Contract Comparison:")
print(contract_comparison)

# Box plot comparison
fig = go.Figure()

for phase in ['Rookie (Years 0-3)', 'Second Contract (Years 4+)']:
    data = qb_df_contracts[qb_df_contracts['contract_phase'] == phase]['value_score']
    fig.add_trace(go.Box(
        y=data,
        name=phase,
        marker_color='#3498db' if 'Rookie' in phase else '#e74c3c',
        boxmean='sd'
    ))

fig.update_layout(
    title='<b>QB Value: Rookie Contract vs Second Contract</b><br><sub>Second Contracts Show Negative Average Value</sub>',
    yaxis_title='Value Score',
    width=700,
    height=500,
    showlegend=False
)

fig.add_hline(y=0, line_dash="dash", line_color="gray", opacity=0.5)
fig.show()

fig.write_image('../article/images/qb_deep_dive/rookie_vs_second_contract.png', width=700, height=500)

---
## 7. Top 10 QB Bargains

In [None]:
# Aggregate by player over their career
qb_player_agg = qb_df[qb_df['draft_number'].notna()].groupby(['player_name', 'draft_round', 'draft_number']).agg(
    avg_value=('value_score', 'mean'),
    total_seasons=('season', 'size'),
    avg_performance=('performance_zscore', 'mean'),
    avg_salary_pct=('apy_cap_pct', 'mean'),
    best_season=('season', 'max')
).reset_index()

# Filter to QBs with at least 2 seasons
qb_player_agg = qb_player_agg[qb_player_agg['total_seasons'] >= 2].copy()

# Top 10 bargains
top_bargains = qb_player_agg.nlargest(10, 'avg_value')

print("Top 10 QB Bargains (Career Avg Value):")
print(top_bargains[['player_name', 'draft_round', 'draft_number', 'avg_value', 'total_seasons']])

# Horizontal bar chart
fig = px.bar(
    top_bargains,
    x='avg_value',
    y='player_name',
    color='draft_round',
    orientation='h',
    title='<b>Top 10 QB Bargains</b><br><sub>Highest Career Average Value Score (Min 2 Seasons)</sub>',
    labels={'avg_value': 'Avg Value Score', 'player_name': '', 'draft_round': 'Draft Round'},
    hover_data=['draft_number', 'total_seasons'],
    color_continuous_scale='Greens'
)

fig.update_layout(
    width=900,
    height=600,
    yaxis={'categoryorder': 'total ascending'}
)

fig.show()

fig.write_image('../article/images/qb_deep_dive/top_bargains.png', width=900, height=600)

---
## 8. Top 10 QB Busts

In [None]:
# Bottom 10 by value (all rounds)
top_busts = qb_player_agg.nsmallest(10, 'avg_value')

print("Top 10 QB Busts (Career Avg Value):")
print(top_busts[['player_name', 'draft_round', 'draft_number', 'avg_value', 'total_seasons']])

# Horizontal bar chart
fig = px.bar(
    top_busts,
    x='avg_value',
    y='player_name',
    color='draft_number',
    orientation='h',
    title='<b>Top 10 QB Busts</b><br><sub>Lowest Career Average Value Score</sub>',
    labels={'avg_value': 'Avg Value Score', 'player_name': '', 'draft_number': 'Draft Pick #'},
    hover_data=['draft_round', 'total_seasons'],
    color_continuous_scale='Reds'
)

fig.update_layout(
    width=900,
    height=600,
    yaxis={'categoryorder': 'total descending'}
)

fig.show()

fig.write_image('../article/images/qb_deep_dive/top_busts.png', width=900, height=600)

---
## 9. QB Salary Inflation Trend (2015-2024)

In [None]:
# Aggregate by season
salary_trend = qb_df.groupby('season').agg(
    avg_salary_pct=('apy_cap_pct', 'mean'),
    median_salary_pct=('apy_cap_pct', 'median'),
    count=('apy_cap_pct', 'size')
).reset_index()

print("QB Salary Inflation Trend:")
print(salary_trend)

# Line chart with trend line
fig = go.Figure()

fig.add_trace(go.Scatter(
    x=salary_trend['season'],
    y=salary_trend['avg_salary_pct'] * 100,  # Convert to percentage
    mode='lines+markers',
    name='Avg QB Salary',
    marker=dict(size=10, color='#e74c3c'),
    line=dict(width=3, color='#e74c3c'),
    hovertemplate='<b>%{x}</b><br>Avg: %{y:.2f}% of cap<extra></extra>'
))

# Add trend line
z = np.polyfit(salary_trend['season'], salary_trend['avg_salary_pct'] * 100, 1)
p = np.poly1d(z)

fig.add_trace(go.Scatter(
    x=salary_trend['season'],
    y=p(salary_trend['season']),
    mode='lines',
    name='Trend',
    line=dict(dash='dash', color='gray', width=2),
    hovertemplate='Trend: %{y:.2f}%<extra></extra>'
))

fig.update_layout(
    title='<b>QB Salary Inflation: Average QB APY as % of Cap (2015-2024)</b><br><sub>QB Salaries Growing Faster Than Salary Cap</sub>',
    xaxis_title='Season',
    yaxis_title='Average QB APY (% of Salary Cap)',
    width=900,
    height=500,
    showlegend=True
)

fig.show()

fig.write_image('../article/images/qb_deep_dive/salary_trend.png', width=900, height=500)

---
## 10. Performance vs. Salary Scatter Plot

In [None]:
# Scatter plot: Performance vs. Salary
fig = px.scatter(
    qb_player_agg,
    x='avg_salary_pct',
    y='avg_performance',
    color='draft_round',
    size='total_seasons',
    hover_name='player_name',
    title='<b>QB Performance vs. Salary</b><br><sub>Career Averages | Size = Total Seasons | Color = Draft Round</sub>',
    labels={
        'avg_salary_pct': 'Avg Salary (% of Cap)',
        'avg_performance': 'Avg Performance Z-Score',
        'draft_round': 'Draft Round',
        'total_seasons': 'Seasons'
    },
    color_continuous_scale='Viridis'
)

fig.update_layout(
    width=1000,
    height=600
)

# Add diagonal reference line
fig.add_shape(
    type="line",
    x0=0, y0=-2, x1=0.2, y1=3,
    line=dict(dash="dash", color="gray", width=2)
)

fig.show()

fig.write_image('../article/images/qb_deep_dive/performance_vs_salary.png', width=1000, height=600)

---
## 11. Summary Statistics and Key Findings

In [None]:
print("="*80)
print("QUARTERBACK VALUE DEEP DIVE - KEY FINDINGS")
print("="*80)

print("\n1. CAREER ARC:")
print(f"Peak Years: Years {career_arc.nlargest(3, 'avg_value')['years_since_draft'].tolist()}")
print(f"Value drops negative by Year: {career_arc[career_arc['avg_value'] < 0]['years_since_draft'].min() if len(career_arc[career_arc['avg_value'] < 0]) > 0 else 'N/A'}")

print("\n2. DRAFT ROI BY ROUND (ROOKIE CONTRACTS):")
print(qb_draft_roi[['draft_round', 'avg_value', 'count']])

print("\n3. SUCCESS RATES BY PICK RANGE:")
print(success_by_pick[['pick_range', 'success_rate', 'count']])

print("\n4. ROOKIE VS SECOND CONTRACT:")
print(contract_comparison[['contract_phase', 'avg_value', 'avg_salary_pct', 'count']])

print("\n5. TOP 5 BARGAINS:")
print(top_bargains.head(5)[['player_name', 'draft_round', 'draft_number', 'avg_value']])

print("\n6. TOP 5 BUSTS:")
print(top_busts.head(5)[['player_name', 'draft_round', 'draft_number', 'avg_value']])

print("\n7. SALARY TREND:")
print(f"2015 Avg QB Salary: {salary_trend[salary_trend['season'] == 2015]['avg_salary_pct'].iloc[0] * 100:.2f}% of cap")
print(f"2024 Avg QB Salary: {salary_trend[salary_trend['season'] == 2024]['avg_salary_pct'].iloc[0] * 100:.2f}% of cap")
print(f"Growth: {((salary_trend[salary_trend['season'] == 2024]['avg_salary_pct'].iloc[0] / salary_trend[salary_trend['season'] == 2015]['avg_salary_pct'].iloc[0]) - 1) * 100:.1f}%")

print("\n" + "="*80)
print("CONCLUSION: Round 5 QBs provide best ROI. Second contracts destroy value.")
print("QB salaries growing faster than cap — teams must maximize rookie contract window.")
print("="*80)