In [4]:
from nfl_data_py import import_seasonal_data, import_seasonal_rosters
import pandas as pd
import re
pd.set_option('display.max_columns', None)

# Function to clean player names
def clean_player_name(player_name):
    if not isinstance(player_name, str):
        return player_name

    # Remove punctuation
    player_name = re.sub(r'[^\w\s]', '', player_name)

    # Remove suffixes like Jr, Sr, II, III, etc.
    suffixes = ['Jr', 'Sr', 'II', 'III', 'IV', 'V']
    pattern = r'\b(?:' + '|'.join(suffixes) + r')\b'
    player_name = re.sub(pattern, '', player_name, flags=re.IGNORECASE)

    # Normalize whitespace
    player_name = ' '.join(player_name.split())

    return player_name

# Define the columns to keep for WRs
wr_columns = [
    'player_name', 'team', 'season', 'week', 'games',
    'receptions', 'targets', 'receiving_yards', 'receiving_tds',
    'receiving_fumbles', 'receiving_fumbles_lost', 'receiving_air_yards',
    'receiving_yards_after_catch', 'receiving_first_downs', 'receiving_epa',
    'receiving_2pt_conversions', 'racr', 'target_share', 'air_yards_share',
    'wopr_x', 'tgt_sh', 'ay_sh', 'yac_sh', 'wopr_y', 'ppr_sh',

    'carries', 'rushing_yards', 'rushing_tds', 'rushing_fumbles',
    'rushing_fumbles_lost', 'rushing_first_downs', 'rushing_epa',
    'rushing_2pt_conversions', 'ry_sh', 'rtd_sh', 'rfd_sh', 'rtdfd_sh',

    'fantasy_points', 'fantasy_points_ppr', 'games', 'dom', 'w8dom'
]

# Dictionary to store yearly WR DataFrames
wr_data_by_year = {}

# Dictionary to store player → DataFrame of season records
wr_season_dict = {}

# Loop through each season
for year in range(2016, 2025):
    stats = import_seasonal_data([year], s_type="REG")
    rosters = import_seasonal_rosters([year])
    
    # Merge player name and position
    merged = stats.merge(
        rosters[['player_id', 'player_name', 'position', 'team']],
        on='player_id',
        how='left'
    )
    
    # Filter to WRs only
    wrs = merged[merged['position'] == 'WR'].copy()
    
    # Drop player_id and move player_name to the front
    wrs.drop(columns=['player_id'], inplace=True)
    cols = ['player_name'] + [col for col in wrs.columns if col != 'player_name']
    wrs = wrs[cols]
    
    # Keep only relevant WR columns
    wr_filtered = wrs[[col for col in wr_columns if col in wrs.columns]]
    
    # Clean player names
    wr_filtered = wr_filtered.copy()
    wr_filtered['player_name_clean'] = wr_filtered['player_name'].apply(clean_player_name)

    
    # Store year-based
    wr_data_by_year[year] = wr_filtered

    # Build player → DataFrame mapping using cleaned names
    for name, group in wr_filtered.groupby('player_name_clean'):
        if name not in wr_season_dict:
            wr_season_dict[name] = group.drop(columns=['player_name', 'player_name_clean']).reset_index(drop=True)
        else:
            wr_season_dict[name] = pd.concat(
                [wr_season_dict[name], group.drop(columns=['player_name', 'player_name_clean'])],
                ignore_index=True
            )

    print(f"✅ Loaded {len(wr_filtered)} WR rows for {year}")


✅ Loaded 205 WR rows for 2016
✅ Loaded 208 WR rows for 2017
✅ Loaded 216 WR rows for 2018
✅ Loaded 221 WR rows for 2019
✅ Loaded 224 WR rows for 2020
✅ Loaded 236 WR rows for 2021
✅ Loaded 226 WR rows for 2022
✅ Loaded 212 WR rows for 2023
✅ Loaded 227 WR rows for 2024


In [5]:
wr_season_dict["Deebo Samuel"]

Unnamed: 0,team,season,games,receptions,targets,receiving_yards,receiving_tds,receiving_fumbles,receiving_fumbles_lost,receiving_air_yards,receiving_yards_after_catch,receiving_first_downs,receiving_epa,receiving_2pt_conversions,racr,target_share,air_yards_share,wopr_x,tgt_sh,ay_sh,yac_sh,wopr_y,ppr_sh,carries,rushing_yards,rushing_tds,rushing_fumbles,rushing_fumbles_lost,rushing_first_downs,rushing_epa,rushing_2pt_conversions,ry_sh,rtd_sh,rfd_sh,rtdfd_sh,fantasy_points,fantasy_points_ppr,games.1,dom,w8dom
0,SF,2019,15,57,81,802.0,3,2.0,1.0,598.0,473.0,32.0,29.648814,1,33.918311,2.645348,2.83641,5.953508,0.177243,0.203471,0.222588,0.428641,0.129103,14,159.0,3,0.0,0.0,5.0,9.733441,0,0.206808,0.107143,0.171123,0.162791,132.1,189.1,15,0.156975,0.186875
1,SF,2020,7,33,44,391.0,1,0.0,0.0,110.0,398.0,19.0,10.135529,0,21.137828,1.302364,0.249019,2.12786,0.171206,0.068793,0.35953,0.311844,0.127553,8,26.0,0,0.0,0.0,1.0,-1.630543,0,0.207758,0.111111,0.193878,0.186916,47.7,80.7,7,0.159434,0.188428
2,SF,2021,16,77,121,1405.0,6,4.0,2.0,1029.0,780.0,51.0,55.310519,0,22.455835,4.192855,4.623942,9.526042,0.25,0.271863,0.373384,0.59249,0.234441,59,365.0,8,0.0,0.0,21.0,20.849417,0,0.339536,0.25,0.269841,0.267606,261.96,338.96,16,0.294768,0.321629
3,SF,2022,13,56,94,632.0,2,0.0,0.0,396.0,493.0,25.0,8.942732,0,24.998538,3.112262,1.682094,5.845858,0.233251,0.147431,0.289489,0.467821,0.149304,42,232.0,3,3.0,2.0,8.0,-8.381753,0,0.207553,0.095238,0.172414,0.162651,112.4,168.4,13,0.151396,0.18509
4,SF,2023,15,60,89,892.0,7,1.0,0.0,586.0,527.0,34.0,40.887983,0,136.542453,3.08527,2.5213,6.392814,0.206977,0.167047,0.278689,0.444103,0.162283,37,225.0,5,0.0,0.0,13.0,10.294309,0,0.226396,0.225806,0.188889,0.194313,183.7,243.7,15,0.226101,0.226278
5,SF,2024,14,51,81,670.0,3,0.0,0.0,551.0,421.0,28.0,11.071186,0,21.333583,2.826012,2.220601,5.793439,0.188811,0.15404,0.259396,0.406449,0.12979,42,136.0,1,0.0,0.0,5.0,-9.080931,0,0.186422,0.166667,0.167665,0.167568,104.6,155.6,14,0.176544,0.182471
