# 06 â€” Running Back Economics

**Research Question:** Why do first-round RBs provide poor value? When do running backs peak? Are late-round RBs better investments?

**Analysis:**
1. Career arc analysis (peak age, decline rate)
2. Draft ROI by round (confirm Round 1-2 RBs barely break even)
3. Top 10 bargains (late-round steals)
4. Top 10 busts (first-round disappointments)
5. Positional replaceability analysis
6. Contract year performance effect

**Outputs:**
- Career arc: Value score by years of experience
- Draft ROI: Average value by draft round (RB-specific)
- Top bargains and busts visualizations
- Salary vs. performance scatter plot
- Replaceability metrics (UDFA vs. drafted RBs)

In [None]:
import sys
sys.path.insert(0, '..')

import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import warnings
warnings.filterwarnings('ignore', category=FutureWarning)
from pathlib import Path

# Set data directory relative to notebook location
DATA_DIR = Path('../data')

import plotly.io as pio
pio.renderers.default = 'notebook'

pd.set_option('display.max_columns', 50)
pd.set_option('display.max_rows', 100)

---
## 1. Load Data

In [None]:
# Load scored player-seasons
scored = pd.read_parquet(DATA_DIR / 'scored.parquet', engine='fastparquet')
print(f"Scored data: {scored.shape[0]:,} player-seasons")

# Load rosters for draft and age information
rosters = pd.read_parquet(DATA_DIR / 'rosters.parquet', engine='fastparquet')
print(f"Rosters data: {rosters.shape[0]:,} player-week records")

# Get unique player draft info
draft_info = rosters[rosters['draft_number'].notna()][['player_id', 'player_name', 'draft_number', 'draft_club', 'rookie_year']].drop_duplicates('player_id').copy()
draft_info['draft_number'] = pd.to_numeric(draft_info['draft_number'], errors='coerce')
draft_info['draft_round'] = ((draft_info['draft_number'] - 1) // 32 + 1).astype('Int64')
draft_info['draft_round'] = draft_info['draft_round'].clip(upper=7)

print(f"\nDrafted players with info: {len(draft_info):,}")

---
## 2. Filter to Running Backs

In [None]:
# Filter scored data to RBs only
rb_scored = scored[scored['pos_group'] == 'RB'].copy()
print(f"RB player-seasons: {rb_scored.shape[0]:,}")
print(f"Unique RB players: {rb_scored['player_id'].nunique():,}")
print(f"Seasons covered: {sorted(rb_scored['season'].unique())}")

# Merge with draft info
rb_df = rb_scored.merge(draft_info, on=['player_id', 'player_name'], how='left')
print(f"\nRBs with draft info: {rb_df['draft_number'].notna().sum():,} player-seasons")

# Calculate years since draft
rb_df['years_since_draft'] = rb_df['season'] - rb_df['rookie_year']

# Show sample
rb_df[['player_name', 'season', 'draft_round', 'draft_number', 'years_since_draft', 'value_score', 'apy_cap_pct']].head(10)

---
## 3. Career Arc Analysis (Performance by Experience)

In [None]:
# Filter to RBs with draft info and valid years_since_draft
rb_career = rb_df[rb_df['years_since_draft'].notna() & (rb_df['years_since_draft'] >= 0)].copy()
rb_career['years_since_draft'] = rb_career['years_since_draft'].astype(int)

# Cap at 10 years (small sample after that)
rb_career = rb_career[rb_career['years_since_draft'] <= 10].copy()

# Aggregate by years since draft
career_arc = rb_career.groupby('years_since_draft').agg(
    avg_value=('value_score', 'mean'),
    median_value=('value_score', 'median'),
    avg_performance=('performance_zscore', 'mean'),
    avg_salary_pct=('apy_cap_pct', 'mean'),
    count=('value_score', 'size')
).reset_index()

print("Career Arc Summary:")
print(career_arc)

# Line chart: Value score over career
fig = go.Figure()

fig.add_trace(go.Scatter(
    x=career_arc['years_since_draft'],
    y=career_arc['avg_value'],
    mode='lines+markers',
    name='Avg Value Score',
    marker=dict(size=10),
    line=dict(width=3),
    hovertemplate='<b>Year %{x}</b><br>Avg Value: %{y:.2f}<br>N=%{customdata:,}<extra></extra>',
    customdata=career_arc['count']
))

fig.update_layout(
    title='<b>RB Career Arc: Value Score by Years of Experience</b><br><sub>When Do Running Backs Peak?</sub>',
    xaxis_title='Years Since Draft',
    yaxis_title='Average Value Score',
    width=900,
    height=500,
    hovermode='x'
)

fig.add_hline(y=0, line_dash="dash", line_color="gray", opacity=0.5)
fig.show()

# TODO: Save to article/images/rb_economics/career_arc.png

---
## 4. Draft ROI by Round (RB-Specific)

In [None]:
# Filter to rookie contract years (first 4 seasons)
rb_rookie = rb_career[(rb_career['years_since_draft'] >= 0) & (rb_career['years_since_draft'] <= 3)].copy()
print(f"RB rookie contract seasons: {rb_rookie.shape[0]:,}")

# Aggregate by draft round
rb_draft_roi = rb_rookie.groupby('draft_round').agg(
    avg_value=('value_score', 'mean'),
    median_value=('value_score', 'median'),
    avg_performance=('performance_zscore', 'mean'),
    avg_salary_pct=('apy_cap_pct', 'mean'),
    count=('value_score', 'size'),
    pct_bargains=('is_bargain', lambda x: x.sum() / len(x) * 100)
).reset_index()

print("\nRB Draft ROI by Round:")
print(rb_draft_roi)

# Bar chart
fig = go.Figure()

fig.add_trace(go.Bar(
    x=rb_draft_roi['draft_round'],
    y=rb_draft_roi['avg_value'],
    text=rb_draft_roi['avg_value'].round(2),
    textposition='outside',
    marker_color=['green' if v > 0.3 else 'orange' if v > 0 else 'red' for v in rb_draft_roi['avg_value']],
    hovertemplate='<b>Round %{x}</b><br>Avg Value: %{y:.2f}<br>N=%{customdata[0]:,}<br>% Bargains: %{customdata[1]:.1f}%<extra></extra>',
    customdata=rb_draft_roi[['count', 'pct_bargains']]
))

fig.update_layout(
    title='<b>RB Draft ROI: Average Value by Round</b><br><sub>Rookie Contract Years Only | Why First-Round RBs Are Poor Investments</sub>',
    xaxis_title='Draft Round',
    yaxis_title='Average Value Score',
    width=800,
    height=500,
    showlegend=False
)

fig.add_hline(y=0, line_dash="dash", line_color="gray", opacity=0.5)
fig.show()

# TODO: Save to article/images/rb_economics/draft_roi_by_round.png

---
## 5. Top 10 RB Bargains (Late-Round Steals)

In [None]:
# Aggregate by player over their career
rb_player_agg = rb_df[rb_df['draft_number'].notna()].groupby(['player_name', 'draft_round', 'draft_number']).agg(
    avg_value=('value_score', 'mean'),
    total_seasons=('season', 'size'),
    avg_performance=('performance_zscore', 'mean'),
    avg_salary_pct=('apy_cap_pct', 'mean'),
    best_season=('season', 'max')
).reset_index()

# Filter to RBs with at least 2 seasons
rb_player_agg = rb_player_agg[rb_player_agg['total_seasons'] >= 2].copy()

# Top 10 bargains
top_bargains = rb_player_agg.nlargest(10, 'avg_value')

print("Top 10 RB Bargains (Career Avg Value):")
print(top_bargains[['player_name', 'draft_round', 'draft_number', 'avg_value', 'total_seasons']])

# Bar chart
fig = px.bar(
    top_bargains,
    x='avg_value',
    y='player_name',
    color='draft_round',
    orientation='h',
    title='<b>Top 10 RB Bargains</b><br><sub>Highest Career Average Value Score (Min 2 Seasons)</sub>',
    labels={'avg_value': 'Avg Value Score', 'player_name': '', 'draft_round': 'Draft Round'},
    hover_data=['draft_number', 'total_seasons'],
    color_continuous_scale='Greens'
)

fig.update_layout(
    width=900,
    height=600,
    yaxis={'categoryorder': 'total ascending'}
)

fig.show()

# TODO: Save to article/images/rb_economics/top_bargains.png

---
## 6. Top 10 RB Busts (First-Round Disappointments)

In [None]:
# Filter to first-round RBs
first_round_rbs = rb_player_agg[rb_player_agg['draft_round'] == 1].copy()

# Bottom 10 by value
top_busts = first_round_rbs.nsmallest(10, 'avg_value')

print("Top 10 First-Round RB Busts (Career Avg Value):")
print(top_busts[['player_name', 'draft_number', 'avg_value', 'total_seasons']])

# Bar chart
fig = px.bar(
    top_busts,
    x='avg_value',
    y='player_name',
    color='draft_number',
    orientation='h',
    title='<b>Top 10 First-Round RB Busts</b><br><sub>Lowest Career Average Value Score</sub>',
    labels={'avg_value': 'Avg Value Score', 'player_name': '', 'draft_number': 'Draft Pick #'},
    hover_data=['total_seasons'],
    color_continuous_scale='Reds'
)

fig.update_layout(
    width=900,
    height=600,
    yaxis={'categoryorder': 'total descending'}
)

fig.show()

# TODO: Save to article/images/rb_economics/top_busts.png

---
## 7. Performance vs. Salary Scatter Plot

In [None]:
# Scatter plot: Performance vs. Salary
fig = px.scatter(
    rb_player_agg,
    x='avg_salary_pct',
    y='avg_performance',
    color='draft_round',
    size='total_seasons',
    hover_name='player_name',
    title='<b>RB Performance vs. Salary</b><br><sub>Career Averages | Size = Total Seasons</sub>',
    labels={
        'avg_salary_pct': 'Avg Salary (% of Cap)',
        'avg_performance': 'Avg Performance Z-Score',
        'draft_round': 'Draft Round',
        'total_seasons': 'Seasons'
    },
    color_continuous_scale='Viridis'
)

fig.update_layout(
    width=1000,
    height=600
)

# Add diagonal line (where performance = salary)
fig.add_shape(
    type="line",
    x0=0, y0=-2, x1=0.1, y1=3,
    line=dict(dash="dash", color="gray", width=2)
)

fig.show()

# TODO: Save to article/images/rb_economics/performance_vs_salary.png

---
## 8. Positional Replaceability Analysis

In [None]:
# Compare drafted RBs vs. UDFA RBs
rb_df['is_drafted'] = rb_df['draft_number'].notna()

replaceability = rb_df.groupby('is_drafted').agg(
    avg_value=('value_score', 'mean'),
    median_value=('value_score', 'median'),
    avg_performance=('performance_zscore', 'mean'),
    avg_salary_pct=('apy_cap_pct', 'mean'),
    count=('value_score', 'size')
).reset_index()

replaceability['status'] = replaceability['is_drafted'].map({True: 'Drafted', False: 'UDFA'})

print("Drafted vs. UDFA RBs:")
print(replaceability)

# Bar chart comparison
fig = go.Figure()

fig.add_trace(go.Bar(
    x=replaceability['status'],
    y=replaceability['avg_value'],
    text=replaceability['avg_value'].round(2),
    textposition='outside',
    marker_color=['blue', 'orange'],
    hovertemplate='<b>%{x}</b><br>Avg Value: %{y:.2f}<br>N=%{customdata:,}<extra></extra>',
    customdata=replaceability['count']
))

fig.update_layout(
    title='<b>RB Positional Replaceability</b><br><sub>Drafted vs. Undrafted Free Agent Performance</sub>',
    xaxis_title='',
    yaxis_title='Average Value Score',
    width=600,
    height=500,
    showlegend=False
)

fig.add_hline(y=0, line_dash="dash", line_color="gray", opacity=0.5)
fig.show()

# TODO: Save to article/images/rb_economics/replaceability.png

---
## 9. Summary Statistics and Key Findings

In [None]:
print("="*80)
print("RUNNING BACK ECONOMICS - KEY FINDINGS")
print("="*80)

print("\n1. CAREER ARC:")
print(f"Peak Years: Years {career_arc.nlargest(3, 'avg_value')['years_since_draft'].tolist()}")
print(f"Decline Begins: Year {career_arc[career_arc['years_since_draft'] >= 3].nsmallest(1, 'avg_value')['years_since_draft'].iloc[0] if len(career_arc) > 3 else 'N/A'}")

print("\n2. DRAFT ROI BY ROUND:")
print(rb_draft_roi[['draft_round', 'avg_value', 'count']])

print("\n3. TOP 5 BARGAINS:")
print(top_bargains.head(5)[['player_name', 'draft_round', 'draft_number', 'avg_value']])

print("\n4. TOP 5 FIRST-ROUND BUSTS:")
print(top_busts.head(5)[['player_name', 'draft_number', 'avg_value']])

print("\n5. REPLACEABILITY:")
print(replaceability[['status', 'avg_value', 'avg_performance', 'count']])

print("\n" + "="*80)
print("CONCLUSION: First-round RBs provide minimal value premium over later rounds.")
print("Draft capital is better spent on other positions with higher ROI.")
print("="*80)

---
## 10. Export Visualizations

**TODO:** Save all charts to `article/images/rb_economics/` directory:
1. `career_arc.png`
2. `draft_roi_by_round.png`
3. `top_bargains.png`
4. `top_busts.png`
5. `performance_vs_salary.png`
6. `replaceability.png`

Use GitHub raw URLs in article markdown:
```
https://raw.githubusercontent.com/ghighcove/nfl-salary-analysis/master/article/images/rb_economics/[filename].png
```