In [1]:
import nfl_data_py as nfl
import pandas as pd

# Predicting the 2025 WR1 — Notebook Guide

This notebook builds a **short list of candidates to finish as WR1 (PPR) in 2025** using historical patterns and publicly available NFL data via `nfl_data_py`.

---

## What this notebook does

- Pulls **weekly player stats (2015–2024)** to correctly credit production by team, plus **2025 rosters**, **players (draft info)**, and **team IDs**.
- Produces:
  - **QB rankings** by passing yards (per season).
  - **True team passing totals** (weekly → seasonal; injuries/trades handled).
  - **WR1 per season** and each WR1’s **prior-year PPR per game**.
  - Draft continuity flags (**still on drafted team**).
- Applies the WR1 profile to 2025:
  1) **Years 3–8** of career in 2025  
  2) **On drafted team** (by team ID)  
  3) **≥ 16.0 PPR per game in 2024**  
  4) **Top-15 passing offense in 2024**  
  If multiple WRs qualify on the same team, the one with the **higher 2024 PPR/G** is kept.

**Final output:** a compact table — **2025 Team | WR | 2024 PPR/G**.

---

## Quick start (Google Colab)

1. Open the notebook in **Colab**.  
2. Click **Runtime → Run all**.  
3. Scroll to **“WR1 2025 Candidate List”** for the final table.

> Note: First run may install packages and restart the runtime automatically.

---

## Non-technical usage

- No edits needed — just **Run all**.
- The notebook downloads data, cleans it, applies the filters, and prints the candidates.

---

## Troubleshooting (first run)

- **Package install / `nfl_data_py` import error**  
  This can appear on the first run in Colab. Choose **“Restart runtime”** if prompted, then click **Runtime → Run all** again. The error typically resolves after the second run.

- **NameError / KeyError right after opening**  
  You likely didn’t execute all cells. Use **Runtime → Run all** to rebuild every table in order.

---

## Assumptions & caveats

- Scoring is **PPR**.  
- “Games” for PPR/G = **weeks with a recorded stat line**.  
- **Team IDs** normalize franchise code changes (e.g., OAK/LV), ensuring joins work across years.

---

## Credits

Data via **`nfl_data_py`** and the community maintaining stable team/ID mappings.


In [None]:
years = range(2015,2025)
weekly_stats_df = nfl.import_weekly_data(years)
team_ids = nfl.import_team_desc()

Downcasting floats.


## QB Seasonal Stats Table

This section lists the **top quarterbacks each year by passing yards**.  
It’s a quick way to spot the offenses that push the ball through the air.

**Why it matters for WR1 hunting**
- More QB passing yards → more chances for a WR to pile up PPR points.
- WRs attached to high-volume passers are the most likely WR1 candidates.


In [3]:
# === Build season-level QB table from WEEKLY inputs (no weekly rosters) ===

# 1) Filter weekly to REG-season QB rows and select needed columns
wk = weekly_stats_df.loc[
    (weekly_stats_df["season_type"] == "REG") & (weekly_stats_df["position"] == "QB"),
    [
        "player_id", "player_display_name",
        "season", "week",
        "recent_team",
        "attempts", "completions", "passing_yards", "passing_tds", "interceptions"
    ]
].copy()

# Coerce numerics safely
num_cols = ["attempts","completions","passing_yards","passing_tds","interceptions"]
for c in num_cols:
    wk[c] = pd.to_numeric(wk[c], errors="coerce").fillna(0)

# 2) QB totals by (player, season) — league-wide totals regardless of team
qb_season_totals = (
    wk.groupby(["player_id","season"], as_index=False)
      .agg(
          attempts=("attempts","sum"),
          completions=("completions","sum"),
          passing_yards=("passing_yards","sum"),
          passing_tds=("passing_tds","sum"),
          interceptions=("interceptions","sum"),
          games=("week","nunique"),
          # keep a stable display name (first non-null)
          player_name=("player_display_name","first")
      )
)

# 3) QB per-team breakdown within season (to find primary team)
qb_team_breakdown = (
    wk.groupby(["player_id","season","recent_team"], as_index=False)
      .agg(
          team_attempts=("attempts","sum"),
          team_completions=("completions","sum"),
          team_passing_yards=("passing_yards","sum"),
          team_passing_tds=("passing_tds","sum"),
          team_interceptions=("interceptions","sum"),
          team_games=("week","nunique"),
      )
)

# 4) Choose the QB's primary team for that season:
#    most team_passing_yards, tie-break by team_attempts, then team_completions, then team abbr
primary_team = (
    qb_team_breakdown
      .sort_values(
          ["player_id","season","team_passing_yards","team_attempts","team_completions","recent_team"],
          ascending=[True, True, False, False, False, True]
      )
      .drop_duplicates(subset=["player_id","season"], keep="first")
      .rename(columns={"recent_team":"team"})
      [["player_id","season","team"]]
)

# 5) Attach primary team to season totals
qbs_stats_df = qb_season_totals.merge(primary_team, on=["player_id","season"], how="left")

# 6) Attach team_id for the primary team (current-season team id)
qbs_stats_df = (
    qbs_stats_df
      .merge(
          team_ids[["team_abbr","team_id"]],
          left_on="team",
          right_on="team_abbr",
          how="left"
      )
      .rename(columns={"team_id":"team_id_current"})
      .drop(columns=["team_abbr"])
)

# 7) Final column order
ordered_cols = [
    "season","team","team_id_current",
    "player_name","player_id","games",
    "attempts","completions","passing_yards","passing_tds","interceptions"
]
qbs_stats_df = qbs_stats_df[[c for c in ordered_cols if c in qbs_stats_df.columns]].copy()
qbs_stats_df

Unnamed: 0,season,team,team_id_current,player_name,player_id,games,attempts,completions,passing_yards,passing_tds,interceptions
0,2015,IND,2200,Matt Hasselbeck,00-0007091,8,256,156,1690.0,9,5.0
1,2015,DEN,1400,Peyton Manning,00-0010346,10,331,198,2249.0,9,17.0
2,2015,NE,3200,Tom Brady,00-0019596,16,624,402,4770.0,36,7.0
3,2016,NE,3200,Tom Brady,00-0019596,12,432,291,3554.0,28,2.0
4,2017,NE,3200,Tom Brady,00-0019596,16,581,385,4577.0,32,8.0
...,...,...,...,...,...,...,...,...,...,...,...
763,2024,DEN,1400,Bo Nix,00-0039732,17,567,376,3775.0,29,12.0
764,2024,NE,3200,Drake Maye,00-0039851,13,338,225,2276.0,15,10.0
765,2024,WAS,5110,Jayden Daniels,00-0039910,17,480,331,3568.0,25,9.0
766,2024,ATL,200,Michael Penix,00-0039917,5,105,61,775.0,3,3.0


## Team Passing Offense (Season-by-Season)

This section shows **how strong each team’s passing offense was each year**.  
It helps account for seasons where a starter gets hurt and backups step in.

**Why it matters for WR1 hunting**
- WR1 candidates almost always come from **productive passing offenses**.
- Strong team passing can reflect great QB play, **depth at QB**, and/or a **coach who elevates the passing game**.

**What you’ll see**
- Teams ranked each season by **total passing yards** (1 = most).
- A quick view of which offenses consistently create the most air yards.

**How to use it**
- Focus on **top-15 teams** in passing each year—this is the pool where WR1 seasons usually live.
- Cross-check those teams with the **QB rankings** and your **lead WRs** to spot prime WR1 candidates.


In [16]:
# === Team Passing Stats from WEEKLY data (REG only) ===
# Uses weekly_stats_df['recent_team'] to attribute yards to the correct team each week.

# 1) Filter to REG season and select needed columns
wk_team = weekly_stats_df.loc[
    weekly_stats_df["season_type"] == "REG",
    ["season", "recent_team", "player_id", "position", "passing_yards", "passing_tds"]
].copy()

# 2) Coerce numerics (safe even if already numeric)
for c in ["passing_yards", "passing_tds"]:
    wk_team[c] = pd.to_numeric(wk_team[c], errors="coerce").fillna(0)

# 3) Aggregate to team-season (sum all passers' yards/TDs, including backups & trick plays)
team_passing_stats = (
    wk_team
      .groupby(["season", "recent_team"], as_index=False, observed=True)
      .agg(
          team_passing_yards=("passing_yards", "sum"),
          team_passing_tds=("passing_tds", "sum"),
          passer_count=("player_id", "nunique")
      )
)

# 4) (Optional) distinct QBs used per team-season
qb_count = (
    wk_team.loc[wk_team["position"] == "QB", ["season", "recent_team", "player_id"]]
           .drop_duplicates()
           .groupby(["season", "recent_team"], as_index=False)["player_id"]
           .nunique()
           .rename(columns={"player_id": "qb_count"})
)
team_passing_stats = team_passing_stats.merge(qb_count, on=["season", "recent_team"], how="left")
team_passing_stats["qb_count"] = team_passing_stats["qb_count"].fillna(0).astype(int)

# 5) Attach team_id and standardize team column name
team_passing_stats = (
    team_passing_stats
      .merge(
          team_ids[["team_abbr", "team_id"]],
          left_on="recent_team",
          right_on="team_abbr",
          how="left"
      )
      .rename(columns={"recent_team": "team"})
      .drop(columns=["team_abbr"])
)

# 6) Rank teams within each season by total passing yards (1 = most)
team_passing_stats["rank_passing_yards"] = (
    team_passing_stats
      .groupby("season")["team_passing_yards"]
      .rank(method="dense", ascending=False)
      .astype(int)
)

# 7) Final tidy sort
team_passing_stats = (
    team_passing_stats
      .sort_values(["season", "rank_passing_yards", "team"])
      .reset_index(drop=True)
)

# 8) (Optional) Top 10 passing teams each season
top10_passing_teams = team_passing_stats.loc[team_passing_stats["rank_passing_yards"] <= 10].copy()

# Quick peek
team_passing_stats


Unnamed: 0,season,team,team_passing_yards,team_passing_tds,passer_count,qb_count,team_id,rank_passing_yards
0,2015,NO,5205.0,32,20,2,3300,1
1,2015,LAC,4855.0,30,17,2,4400,2
2,2015,PIT,4822.0,26,19,3,3900,3
3,2015,NE,4812.0,36,21,2,3200,4
4,2015,ARI,4775.0,35,17,2,3800,5
...,...,...,...,...,...,...,...,...
315,2024,CHI,3552.0,20,19,2,810,27
316,2024,NYG,3521.0,15,18,4,3410,28
317,2024,PHI,3517.0,24,20,3,3700,29
318,2024,CAR,3411.0,22,24,2,750,30


## WR1 History (Last 10 Seasons)

This section lists the **WR1 (PPR) for each of the past 10 seasons**.  
It gives a quick “who actually did it?” snapshot so we can spot patterns.



In [5]:
# === WR1 per season from WEEKLY data (REG only) ===

# 1) Filter weekly rows to WRs in the regular season; keep what we need
wk_wr = weekly_stats_df.loc[
    (weekly_stats_df["season_type"] == "REG") & (weekly_stats_df["position"] == "WR"),
    [
        "player_id", "player_display_name",
        "season", "week",
        "recent_team",
        "fantasy_points_ppr",
        "receiving_yards", "targets", "receptions"
    ]
].copy()

# 2) Coerce numerics
for c in ["fantasy_points_ppr", "receiving_yards", "targets", "receptions"]:
    wk_wr[c] = pd.to_numeric(wk_wr[c], errors="coerce").fillna(0)

# 3) Player-season totals (PPR, games, handy totals)
wr_season_totals = (
    wk_wr.groupby(["player_id", "season"], as_index=False)
         .agg(
             player_name=("player_display_name", "first"),
             fantasy_points_ppr=("fantasy_points_ppr", "sum"),
             games=("week", "nunique"),
             receiving_yards=("receiving_yards", "sum"),
             targets=("targets", "sum"),
             receptions=("receptions", "sum"),
         )
)
wr_season_totals["ppr_per_game"] = (
    wr_season_totals["fantasy_points_ppr"] / wr_season_totals["games"].replace(0, pd.NA)
).fillna(0)

# 4) Determine the WR's primary team for that season
#    (team with most PPR for that player that year; tie-break: rec yards, weeks, team abbr)
wr_team_breakdown = (
    wk_wr.groupby(["player_id", "season", "recent_team"], as_index=False)
         .agg(
             team_ppr=("fantasy_points_ppr", "sum"),
             team_rec_yards=("receiving_yards", "sum"),
             team_weeks=("week", "nunique"),
         )
)
primary_team_wr = (
    wr_team_breakdown
      .sort_values(
          ["player_id", "season", "team_ppr", "team_rec_yards", "team_weeks", "recent_team"],
          ascending=[True, True, False, False, False, True]
      )
      .drop_duplicates(subset=["player_id", "season"], keep="first")
      .rename(columns={"recent_team": "team"})
      [["player_id", "season", "team"]]
)

# 5) Attach primary team + team_id
wr_season_totals = wr_season_totals.merge(primary_team_wr, on=["player_id", "season"], how="left")
wr_season_totals = (
    wr_season_totals.merge(
        team_ids[["team_abbr", "team_id"]],
        left_on="team",
        right_on="team_abbr",
        how="left"
    )
    .rename(columns={"team_id": "team_id_current"})
    .drop(columns=["team_abbr"])
)

# 6) Pick the WR1 per season (most total PPR; tie-breakers keep output stable)
wr1_ppr_df = (
    wr_season_totals
      .sort_values(
          ["season", "fantasy_points_ppr", "ppr_per_game", "receiving_yards", "player_name"],
          ascending=[True, False, False, False, True]
      )
      .groupby("season", as_index=False)
      .head(1)
      .sort_values("season")
      .reset_index(drop=True)
)

# 7) Optional: concise print/view
wr1_print = wr1_ppr_df[[
    "season",
    "player_id", "player_name",
    "team", "team_id_current",
    "fantasy_points_ppr", "ppr_per_game",
    "receiving_yards", "targets", "receptions"
]].copy()

wr1_print


Unnamed: 0,season,player_id,player_name,team,team_id_current,fantasy_points_ppr,ppr_per_game,receiving_yards,targets,receptions
0,2015,00-0027793,Antonio Brown,PIT,3900,390.200012,24.387501,1834.0,193,136
1,2016,00-0027793,Antonio Brown,PIT,3900,307.299988,20.486666,1284.0,154,106
2,2017,00-0027793,Antonio Brown,PIT,3900,310.299988,22.164285,1533.0,163,101
3,2018,00-0033040,Tyreek Hill,KC,2310,334.0,20.875,1479.0,137,87
4,2019,00-0032765,Michael Thomas,NO,3300,374.600006,23.4125,1725.0,185,149
5,2020,00-0031381,Davante Adams,GB,1800,358.399994,25.6,1374.0,149,115
6,2021,00-0033908,Cooper Kupp,LA,2510,439.5,25.852941,1947.0,191,145
7,2022,00-0036322,Justin Jefferson,MIN,3000,368.660004,21.685883,1809.0,184,128
8,2023,00-0036358,CeeDee Lamb,DAL,1200,405.200012,23.835295,1749.0,181,135
9,2024,00-0036900,Ja'Marr Chase,CIN,920,403.0,23.705882,1708.0,175,127


## WR1s and Passing Context (Proof Point)

This section pairs each season’s **WR1 (PPR)** with:
- their team’s **passing offense rank** (season total yards), and
- their **primary QB** and that QB’s **league passing rank**.

**What this shows**
- WR1 seasons almost always come from **strong passing attacks**.
- The lead WR is typically tied to a QB who ranks **near the top** in passing yards.
- Even when starters miss time, teams with **deep QB rooms or strong coaching** still support elite WR seasons.

**Takeaway**
- If a WR is attached to a **top passing QB** on a **top passing team**, they’re a realistic WR1 candidate.


In [7]:
# --- (Re)build qbs_ranked_view from qbs_stats_df ---
qbs_ranked = (
    qbs_stats_df.assign(
        passing_yards = pd.to_numeric(qbs_stats_df["passing_yards"], errors="coerce").fillna(0),
        passing_tds   = pd.to_numeric(qbs_stats_df["passing_tds"], errors="coerce").fillna(0),
        interceptions = pd.to_numeric(qbs_stats_df["interceptions"], errors="coerce").fillna(0),
        games         = pd.to_numeric(qbs_stats_df["games"], errors="coerce").fillna(0),
    )
    .sort_values(
        ["season","passing_yards","passing_tds","interceptions","player_name"],
        ascending=[True, False, False, True, True]
    )
)
qbs_ranked["rank_passing_yards"] = (
    qbs_ranked.groupby("season")["passing_yards"]
              .rank(method="dense", ascending=False)
              .astype(int)
)

qb_rank_cols = [
    "season","rank_passing_yards",
    "player_name","player_id","team","team_id_current",
    "games","passing_yards","passing_tds","interceptions"
]
qbs_ranked_view = qbs_ranked[[c for c in qb_rank_cols if c in qbs_ranked.columns]].copy()

# --- Starter QB per team-season: best (lowest) league rank ---
qb_starter_per_team = (
    qbs_ranked_view
      .dropna(subset=["team_id_current"])
      .sort_values(["season","team_id_current","rank_passing_yards","player_name"])
      .groupby(["season","team_id_current"], as_index=False)
      .first()
      .rename(columns={
          "player_name": "QB_Name",
          "rank_passing_yards": "QB_Passing_Yards_Rank"
      })
)

# --- Join WR1s to their team’s passing rank ---
wr1_with_team_rank = (
    wr1_ppr_df
      .merge(
          team_passing_stats[["season","team_id","rank_passing_yards"]],
          left_on=["season","team_id_current"],
          right_on=["season","team_id"],
          how="left"
      )
      .rename(columns={"rank_passing_yards": "Team_Passing_Rank"})
      .drop(columns=["team_id"])
)

# --- Join WR1s to their team's starter QB ---
wr1_team_qb_summary = wr1_with_team_rank.merge(
    qb_starter_per_team[["season","team_id_current","QB_Name","QB_Passing_Yards_Rank"]],
    on=["season","team_id_current"],
    how="left"
)

# --- Print: season, team, WR1, QB, QB rank, team passing rank ---
summary_print = (
    wr1_team_qb_summary
      .rename(columns={"player_name": "WR1_Name"})
      [["season","team","WR1_Name","QB_Name","QB_Passing_Yards_Rank","Team_Passing_Rank"]]
      .sort_values("season")
      .reset_index(drop=True)
)
summary_print


Unnamed: 0,season,team,WR1_Name,QB_Name,QB_Passing_Yards_Rank,Team_Passing_Rank
0,2015,PIT,Antonio Brown,Ben Roethlisberger,14,3
1,2016,PIT,Antonio Brown,Ben Roethlisberger,17,10
2,2017,PIT,Antonio Brown,Ben Roethlisberger,5,4
3,2018,KC,Tyreek Hill,Patrick Mahomes,2,3
4,2019,NO,Michael Thomas,Drew Brees,26,7
5,2020,GB,Davante Adams,Aaron Rodgers,7,11
6,2021,LA,Cooper Kupp,Matthew Stafford,3,5
7,2022,MIN,Justin Jefferson,Kirk Cousins,4,2
8,2023,DAL,CeeDee Lamb,Dak Prescott,3,3
9,2024,CIN,Ja'Marr Chase,Joe Burrow,1,1


## Draft-Team Continuity (WR1s Stay Home)

This section checks whether each season’s **WR1** was still on the **team that drafted him**.

**Why it matters**
- Over the last 10 seasons, WR1s have **all** been on their drafting team.
- That continuity often means trust, role stability, and heavy usage.

**Takeaway**
- When hunting for WR1s, prioritize elite WRs who are **still with their drafting team**.


In [8]:
players = nfl.import_players()
# === Enrich WR1s with draft info from `players` and check "same team that drafted him" ===

# 1) Slim players table to needed columns
players_keep = [
    "gsis_id", "years_of_experience", "draft_year", "draft_round", "draft_pick", "draft_team"
]
players_slim = players[players_keep].copy()

# 2) Merge WR1s with player draft info (player_id == gsis_id)
wr1_with_draft = wr1_ppr_df.merge(
    players_slim,
    left_on="player_id",
    right_on="gsis_id",
    how="left"
)

# 3) Attach draft_team_id from team_ids (handles aliases like OAK/LV, STL/LAR, etc.)
wr1_with_draft = (
    wr1_with_draft
      .merge(
          team_ids[["team_abbr","team_id"]],
          left_on="draft_team",
          right_on="team_abbr",
          how="left"
      )
      .rename(columns={"team_id": "draft_team_id"})
      .drop(columns=["team_abbr"])
)

# 4) Tidy numeric types (optional but nice)
for c in ["years_of_experience", "draft_year", "draft_round", "draft_pick"]:
    if c in wr1_with_draft.columns:
        wr1_with_draft[c] = pd.to_numeric(wr1_with_draft[c], errors="coerce").astype("Int64")

# 5) Boolean: was WR1 on the same team that drafted him?
wr1_with_draft["team_drafted_by"] = (
    wr1_with_draft["team_id_current"] == wr1_with_draft["draft_team_id"]
)

# 6) Concise view for display / analysis
wr1_draft_view = wr1_with_draft[[
    "season",
    "player_id", 
    "player_name",
    "team",
    "draft_team",
    "team_drafted_by"
]].sort_values("season").reset_index(drop=True)

print(f"WR1 seasons on drafting team: {wr1_draft_view['team_drafted_by'].sum()}/{len(wr1_draft_view)} "
      f"({wr1_draft_view['team_drafted_by'].mean():.0%})")
print(wr1_draft_view)


WR1 seasons on drafting team: 10/10 (100%)
   season   player_id       player_name team draft_team  team_drafted_by
0    2015  00-0027793     Antonio Brown  PIT        PIT             True
1    2016  00-0027793     Antonio Brown  PIT        PIT             True
2    2017  00-0027793     Antonio Brown  PIT        PIT             True
3    2018  00-0033040       Tyreek Hill   KC         KC             True
4    2019  00-0032765    Michael Thomas   NO         NO             True
5    2020  00-0031381     Davante Adams   GB         GB             True
6    2021  00-0033908       Cooper Kupp   LA         LA             True
7    2022  00-0036322  Justin Jefferson  MIN        MIN             True
8    2023  00-0036358       CeeDee Lamb  DAL        DAL             True
9    2024  00-0036900     Ja'Marr Chase  CIN        CIN             True


## Prior-Year Production (WR1s Don’t Come Out of Nowhere)

This section shows each WR1’s **PPR points per game in the previous season**.  
Over the last 10 years, WR1s have all been at **16.0+ PPR per game** the year before their WR1 finish.

**Why it matters**
- WR1 seasons are almost always a **continuation of high usage and efficiency**, not a surprise spike.
- A 16+ PPR/G baseline the prior year is a strong signal of true WR1 potential.

**Takeaway**
- **WR1s don’t come out of nowhere**—they were already productive the year before.  
  Treat **16+ PPR/G in the prior season** as **another key piece of the puzzle** when building your WR1 candidate list.

In [10]:
# --- Build prior-season PPR/GP for ALL WR1s and merge (2015–2024 -> 2014–2023) ---

# 1) Which prior seasons do we need?
prior_seasons = sorted({int(s) - 1 for s in wr1_ppr_df["season"].unique() if pd.notna(s)})

# 2) Prior-season stats (REG only) -> prior PPR/GP
prior_stats_all = nfl.import_seasonal_data(prior_seasons)
prior_stats_all = prior_stats_all.loc[
    prior_stats_all["season_type"] == "REG",
    ["player_id", "season", "season_type", "fantasy_points_ppr", "games"]
].copy()

prior_stats_all.rename(columns={
    "season": "prior_season",
    "season_type": "prior_season_type",
    "fantasy_points_ppr": "prior_fantasy_points_ppr",
    "games": "prior_games"
}, inplace=True)

prior_stats_all["prior_fantasy_points_ppr"] = pd.to_numeric(prior_stats_all["prior_fantasy_points_ppr"], errors="coerce").fillna(0)
prior_stats_all["prior_games"] = pd.to_numeric(prior_stats_all["prior_games"], errors="coerce").fillna(0)
prior_stats_all["prior_ppr_per_game"] = (
    prior_stats_all["prior_fantasy_points_ppr"] / prior_stats_all["prior_games"].replace({0: pd.NA})
).fillna(0)

# 3) Prior-season rosters (WR only) -> prior team + prior_team_id
prior_rosters_all = nfl.import_seasonal_rosters(prior_seasons)
prior_wr_rosters_all = (
    prior_rosters_all.loc[
        (prior_rosters_all["status"] == "ACT") & (prior_rosters_all["position"] == "WR"),
        ["player_id", "season", "team"]
    ]
    .drop_duplicates(subset=["player_id", "season"])
    .rename(columns={"season": "prior_season", "team": "prior_team"})
    .merge(
        team_ids[["team_abbr", "team_id"]],
        left_on="prior_team", right_on="team_abbr", how="left"
    )
    .rename(columns={"team_id": "prior_team_id"})
    .drop(columns=["team_abbr"])
)

# 4) Combine prior tables
prior_enriched = prior_stats_all.merge(prior_wr_rosters_all, on=["player_id", "prior_season"], how="left")

# 5) Prepare for join: set 'season' on the prior table = prior_season + 1 (to match WR1 season)
prior_enriched_for_join = prior_enriched.copy()
prior_enriched_for_join["season"] = prior_enriched_for_join["prior_season"] + 1

# 6) Merge onto WR1s without creating duplicate 'prior_season' columns on the left
wr1_with_prior_all = wr1_ppr_df.merge(
    prior_enriched_for_join,
    on=["player_id", "season"],
    how="left"
)

# 7) Final compact view
wr1_prior_ppg_view = wr1_with_prior_all[[
    "season",                          
    "player_id", 
    "player_name",
    "team", "prior_ppr_per_game"
]].sort_values("season").reset_index(drop=True)
wr1_prior_ppg_view


Unnamed: 0,season,player_id,player_name,team,prior_ppr_per_game
0,2015,00-0027793,Antonio Brown,PIT,24.30625
1,2016,00-0027793,Antonio Brown,PIT,24.3875
2,2017,00-0027793,Antonio Brown,PIT,20.486667
3,2018,00-0033040,Tyreek Hill,KC,16.346667
4,2019,00-0032765,Michael Thomas,NO,19.71875
5,2020,00-0031381,Davante Adams,GB,17.725
6,2021,00-0033908,Cooper Kupp,LA,14.046667
7,2022,00-0036322,Justin Jefferson,MIN,19.435294
8,2023,00-0036358,CeeDee Lamb,DAL,17.741176
9,2024,00-0036900,Ja'Marr Chase,CIN,16.42


## WR “Prime Window” (Entering Years 3–8)

This section highlights that WR1 seasons almost always happen during a receiver’s **prime years**.

**What the data shows**
- **Years 3–8**: where most WR1 seasons live.
- **Years 1–2**: no rookie or second-year WR has finished as the overall WR1.
- **Year 9+**: great players can still produce, but **we haven’t seen a late-career WR1**.

**Takeaway**
- When building a WR1 candidate list, **prioritize receivers entering Years 3–8**.  
  It doesn’t guarantee WR1, but it’s a strong filter that removes early-career long shots and late-career outliers.


In [12]:
# === WR1 experience at breakout season (inclusive: season - draft_year + 1) ===
# Assumes: wr1_with_draft already exists and ideally has draft_year from the players merge.

# 1) Add rookie_season from players if missing (rename key to avoid extra column)
if "rookie_season" not in wr1_with_draft.columns and "rookie_season" in players.columns:
    wr1_with_draft = wr1_with_draft.merge(
        players[["gsis_id", "rookie_season"]].rename(columns={"gsis_id": "player_id"}),
        on="player_id",
        how="left"
    )

# 2) Compute experience year at WR1 season
#    Prefer draft_year; fallback to rookie_season when draft_year is NaN
base_year = pd.to_numeric(wr1_with_draft.get("draft_year"), errors="coerce")
if "rookie_season" in wr1_with_draft.columns:
    base_year = base_year.fillna(pd.to_numeric(wr1_with_draft["rookie_season"], errors="coerce"))

wr1_with_draft["wr1_experience_year"] = (
    pd.to_numeric(wr1_with_draft["season"], errors="coerce") - base_year + 1
).astype("Int64")  # nullable int, stays clean if base_year is missing

# 3) Minimal view (adjust columns to taste)
wr1_experience_view = wr1_with_draft[[
    "season",
    "player_id", "player_name",
    "team",
    *([ "draft_year" ] if "draft_year" in wr1_with_draft.columns else []),
    *([ "rookie_season" ] if "rookie_season" in wr1_with_draft.columns else []),
    "wr1_experience_year",
]].sort_values("season").reset_index(drop=True)

wr1_experience_view


Unnamed: 0,season,player_id,player_name,team,draft_year,rookie_season,wr1_experience_year
0,2015,00-0027793,Antonio Brown,PIT,2010,2010,6
1,2016,00-0027793,Antonio Brown,PIT,2010,2010,7
2,2017,00-0027793,Antonio Brown,PIT,2010,2010,8
3,2018,00-0033040,Tyreek Hill,KC,2016,2016,3
4,2019,00-0032765,Michael Thomas,NO,2016,2016,4
5,2020,00-0031381,Davante Adams,GB,2014,2014,7
6,2021,00-0033908,Cooper Kupp,LA,2017,2017,5
7,2022,00-0036322,Justin Jefferson,MIN,2020,2020,3
8,2023,00-0036358,CeeDee Lamb,DAL,2020,2020,4
9,2024,00-0036900,Ja'Marr Chase,CIN,2021,2021,4


## 2025 WR1 Candidate Shortlist

This is where we pull everything together and show the **most likely WR1 candidates for 2025**.

**How the list is built (simple rules):**
- **Prime years:** WR is entering **Years 3–8** in 2025  
- **Same team that drafted him**  
- **Proven production:** **≥ 16.0 PPR per game in 2024**  
- **Strong environment:** Team was a **top-15 passing offense in 2024**

**What you’ll see below**
- A clean table: **2025 Team | WR | 2024 PPR/G**

**How to use it**
- Treat this as a **shortlist** — a small group with the right profile.  
- From here, consider schedule, target competition, coaching changes, and injuries.

**Takeaway**
- WR1 seasons tend to cluster where **talent, role, and passing environment** all line up.  
  This list surfaces the players sitting at that intersection for 2025.


In [15]:
# === WR1 2025 Candidate List per your profile ===
import pandas as pd

TARGET_SEASON = 2025
PRIOR_SEASON  = TARGET_SEASON - 1  # 2024

# --- 0) Top-15 passing offenses from prior season (2024) ---
top15_2024 = (
    team_passing_stats
      .loc[
          (team_passing_stats["season"] == PRIOR_SEASON) &
          (team_passing_stats["rank_passing_yards"] <= 15),
          ["team_id", "team", "rank_passing_yards"]
      ]
      .rename(columns={
          "team": "team_2024",
          "rank_passing_yards": "team_passing_rank_2024"
      })
      .copy()
)

# --- 1) 2024 WR PPR per game + primary team (by most PPR that season) ---
wk_wr_2024 = weekly_stats_df.loc[
    (weekly_stats_df["season_type"] == "REG") &
    (weekly_stats_df["position"] == "WR") &
    (weekly_stats_df["season"] == PRIOR_SEASON),
    ["player_id","player_display_name","season","week","recent_team","fantasy_points_ppr","receiving_yards"]
].copy()

for c in ["fantasy_points_ppr","receiving_yards"]:
    wk_wr_2024[c] = pd.to_numeric(wk_wr_2024[c], errors="coerce").fillna(0)

# Totals per player-season (to get PPR/G)
wr_2024_totals = (
    wk_wr_2024.groupby(["player_id","season"], as_index=False)
              .agg(
                  player_name=("player_display_name","first"),
                  ppr_2024=("fantasy_points_ppr","sum"),
                  games_2024=("week","nunique"),
                  rec_yards_2024=("receiving_yards","sum"),
              )
)
wr_2024_totals["ppr_per_game_2024"] = (
    wr_2024_totals["ppr_2024"] / wr_2024_totals["games_2024"].replace({0: pd.NA})
).fillna(0)

# Pick primary team in 2024: most PPR for that player that year
wr_2024_team_breakdown = (
    wk_wr_2024.groupby(["player_id","season","recent_team"], as_index=False)
              .agg(
                  team_ppr=("fantasy_points_ppr","sum"),
                  team_rec_yards=("receiving_yards","sum"),
                  team_weeks=("week","nunique")
              )
)

primary_team_2024 = (
    wr_2024_team_breakdown
      .sort_values(
          ["player_id","season","team_ppr","team_rec_yards","team_weeks","recent_team"],
          ascending=[True, True, False, False, False, True]
      )
      .drop_duplicates(subset=["player_id","season"], keep="first")
      .rename(columns={"recent_team":"team_2024"})
      [["player_id","season","team_2024"]]
)

# Attach team_id_2024
primary_team_2024 = (
    primary_team_2024.merge(
        team_ids[["team_abbr","team_id"]],
        left_on="team_2024", right_on="team_abbr", how="left"
    )
    .rename(columns={"team_id":"team_id_2024"})
    .drop(columns=["team_abbr"])
)

# Bring together 2024 WR totals + primary team + top-15 pass rank
wr_2024 = (
    wr_2024_totals
      .merge(primary_team_2024, on=["player_id","season"], how="left")
      .merge(
          top15_2024[["team_id","team_passing_rank_2024"]],
          left_on="team_id_2024", right_on="team_id", how="left"
      )
      .drop(columns=["team_id"])
)

# Keep exactly what we need downstream (includes rec_yards_2024!)
wr_2024 = wr_2024[[
    "player_id","player_name",
    "ppr_per_game_2024","ppr_2024","rec_yards_2024","games_2024",
    "team_2024","team_id_2024","team_passing_rank_2024"
]].copy()

# --- 2) 2025 active WRs and their current team ---
rosters_2025 = nfl.import_seasonal_rosters([TARGET_SEASON])
wr_2025 = (
    rosters_2025.loc[
        (rosters_2025["status"] == "ACT") & (rosters_2025["position"] == "WR"),
        ["player_id","season","team"]
    ]
    .drop_duplicates(subset=["player_id","season"], keep="first")
    .rename(columns={"team":"team_2025"})
)

# Attach team_id_2025
wr_2025 = (
    wr_2025.merge(
        team_ids[["team_abbr","team_id"]],
        left_on="team_2025", right_on="team_abbr", how="left"
    )
    .rename(columns={"team_id":"team_id_2025"})
    .drop(columns=["team_abbr"])
)

# --- 3) Draft info & experience at 2025 (inclusive) ---
players_keep = ["gsis_id","draft_year","rookie_season","draft_team"]
players_slim = players[players_keep].copy()

# Map draft_team -> draft_team_id
players_slim = (
    players_slim.merge(
        team_ids[["team_abbr","team_id"]],
        left_on="draft_team", right_on="team_abbr", how="left"
    )
    .rename(columns={"team_id":"draft_team_id"})
    .drop(columns=["team_abbr"])
)

# Merge 2025 WRs + draft info + their 2024 outputs
candidates = (
    wr_2025
      .merge(players_slim.rename(columns={"gsis_id":"player_id"}), on="player_id", how="left")
      .merge(
          wr_2024[[
              "player_id","player_name",
              "ppr_per_game_2024","rec_yards_2024","games_2024",
              "team_2024","team_id_2024","team_passing_rank_2024"
          ]],
          on="player_id", how="left"
      )
)

# Experience: 2025 - draft_year + 1 (fallback to rookie_season if draft_year is missing)
base_year = pd.to_numeric(candidates["draft_year"], errors="coerce")
base_year = base_year.fillna(pd.to_numeric(candidates["rookie_season"], errors="coerce"))
candidates["experience_2025"] = (
    pd.to_numeric(candidates["season"], errors="coerce") - base_year + 1
).astype("Int64")

# Still on drafted team (in 2025)?
candidates["team_drafted_by_2025"] = (candidates["team_id_2025"] == candidates["draft_team_id"])

# --- 4) Apply WR1 profile filters ---
filtered = candidates.loc[
    (candidates["season"] == TARGET_SEASON) &
    (candidates["experience_2025"].between(3, 8, inclusive="both")) &
    (candidates["team_drafted_by_2025"] == True) &
    (pd.to_numeric(candidates["ppr_per_game_2024"], errors="coerce").fillna(0) >= 16) &
    (candidates["team_passing_rank_2024"].notna())   # merged only if team was top-15 in 2024
].copy()

# Safety: ensure rec_yards_2024 exists for tie-break (fill 0 if missing)
if "rec_yards_2024" not in filtered.columns:
    filtered["rec_yards_2024"] = 0
filtered["rec_yards_2024"] = pd.to_numeric(filtered["rec_yards_2024"], errors="coerce").fillna(0)

# --- 5) Keep the primary option per 2025 team (higher 2024 PPR/G wins; tie-break rec yards) ---
filtered = (
    filtered.sort_values(
        ["team_id_2025","ppr_per_game_2024","rec_yards_2024","player_id"],
        ascending=[True, False, False, True]
    )
    .drop_duplicates(subset=["team_id_2025"], keep="first")
)

# --- 6) Final candidate list for 2025 WR1 profile ---
wr1_2025_candidates = filtered[[
    "player_id","player_name",
    "team_2025","team_id_2025",
    "experience_2025","team_drafted_by_2025",
    "ppr_per_game_2024",
    "team_2024","team_id_2024","team_passing_rank_2024",
    "rec_yards_2024","games_2024"
]].copy()

# Minimal print: 2025 team, player name, prior year's PPR/G
wr1_2025_candidates_simple = (
    wr1_2025_candidates
      .rename(columns={
          "team_2025": "Team",
          "player_name": "WR",
          "ppr_per_game_2024": "PPR/G_2024"
      })[["Team", "WR", "PPR/G_2024"]]
      .sort_values(["PPR/G_2024", "Team"], ascending=[False, True])
      .reset_index(drop=True)
)

wr1_2025_candidates_simple



Unnamed: 0,Team,WR,PPR/G_2024
0,CIN,Ja'Marr Chase,23.705882
1,KC,Rashee Rice,21.633334
2,LA,Puka Nacua,18.781819
3,MIN,Justin Jefferson,18.675295
4,DET,Amon-Ra St. Brown,18.598823
5,DAL,CeeDee Lamb,17.56
6,HOU,Nico Collins,17.550001
7,ATL,Drake London,16.517646
