In [1]:
from nfl_data_py import import_seasonal_data, import_seasonal_rosters
import pandas as pd
import re
pd.set_option('display.max_columns', None)

# Function to clean player names (remove punctuation and suffixes)
def clean_player_name(player_name):
    if not isinstance(player_name, str):
        return player_name

    player_name = re.sub(r'[^\w\s]', '', player_name)  # Remove punctuation
    suffixes = ['Jr', 'Sr', 'II', 'III', 'IV', 'V']
    pattern = r'\b(?:' + '|'.join(suffixes) + r')\b'
    player_name = re.sub(pattern, '', player_name, flags=re.IGNORECASE)
    return ' '.join(player_name.split())  # Remove extra spaces

# Define RB columns
rb_columns = [
    'player_name', 'team', 'season', 'week', 'games',
    'carries', 'rushing_yards', 'rushing_tds', 'rushing_fumbles',
    'rushing_fumbles_lost', 'rushing_first_downs', 'rushing_epa',
    'rushing_2pt_conversions', 'ry_sh', 'rtd_sh', 'rfd_sh', 'rtdfd_sh',

    'receptions', 'targets', 'receiving_yards', 'receiving_tds',
    'receiving_fumbles', 'receiving_fumbles_lost', 'receiving_air_yards',
    'receiving_yards_after_catch', 'receiving_first_downs', 'receiving_epa',
    'receiving_2pt_conversions', 'racr', 'target_share', 'air_yards_share',
    'wopr_x', 'tgt_sh', 'ay_sh', 'yac_sh', 'wopr_y', 'ppr_sh',

    'fantasy_points', 'fantasy_points_ppr', 'games', 'dom', 'w8dom'
]

# Dictionaries
rb_data_by_year = {}
rb_season_dict = {}

# Loop through each season
for year in range(2016, 2025):
    stats = import_seasonal_data([year], s_type="REG")
    rosters = import_seasonal_rosters([year])

    merged = stats.merge(
        rosters[['player_id', 'player_name', 'position', 'team']],
        on='player_id',
        how='left'
    )

    # Filter for RBs
    rbs = merged[merged['position'] == 'RB'].copy()
    rbs.drop(columns=['player_id'], inplace=True)
    cols = ['player_name'] + [col for col in rbs.columns if col != 'player_name']
    rbs = rbs[cols]

    # Select only relevant RB columns
    rb_filtered = rbs[[col for col in rb_columns if col in rbs.columns]].copy()

    # Clean player names
    rb_filtered['player_name_clean'] = rb_filtered['player_name'].apply(clean_player_name)

    # Store by year
    rb_data_by_year[year] = rb_filtered

    # Build season-level dict
    for name, group in rb_filtered.groupby('player_name_clean'):
        rb_season_dict[name] = pd.concat(
            [rb_season_dict.get(name, pd.DataFrame()), group.drop(columns=['player_name', 'player_name_clean'])],
            ignore_index=True
        )

    print(f"✅ Loaded {len(rb_filtered)} RB rows for {year}")


✅ Loaded 170 RB rows for 2016
✅ Loaded 160 RB rows for 2017
✅ Loaded 157 RB rows for 2018
✅ Loaded 154 RB rows for 2019
✅ Loaded 164 RB rows for 2020
✅ Loaded 169 RB rows for 2021
✅ Loaded 160 RB rows for 2022
✅ Loaded 149 RB rows for 2023
✅ Loaded 145 RB rows for 2024


In [2]:
rb_season_dict["Saquon Barkley"]

Unnamed: 0,team,season,games,carries,rushing_yards,rushing_tds,rushing_fumbles,rushing_fumbles_lost,rushing_first_downs,rushing_epa,rushing_2pt_conversions,ry_sh,rtd_sh,rfd_sh,rtdfd_sh,receptions,targets,receiving_yards,receiving_tds,receiving_fumbles,receiving_fumbles_lost,receiving_air_yards,receiving_yards_after_catch,receiving_first_downs,receiving_epa,receiving_2pt_conversions,racr,target_share,air_yards_share,wopr_x,tgt_sh,ay_sh,yac_sh,wopr_y,ppr_sh,fantasy_points,fantasy_points_ppr,games.1,dom,w8dom
0,NYG,2018,16,261,1307.0,11,0.0,0.0,50.0,-7.961426,1,0.163678,0.173913,0.144231,0.147186,91,121,721.0,4,0.0,0.0,18.0,768.0,30.0,2.5663,0,-21.073612,3.281652,0.10827,4.998268,0.207547,0.004116,0.349091,0.314614,0.265757,294.8,385.8,16,0.168795,0.165725
1,NYG,2019,13,217,1003.0,6,0.0,0.0,45.0,-18.718533,0,0.125071,0.074074,0.0875,0.085561,52,73,438.0,2,1.0,0.0,61.0,438.0,14.0,4.479645,0,-1.806177,1.934597,0.182124,3.029382,0.143984,0.015092,0.294355,0.22805,0.195941,192.1,244.1,13,0.099573,0.114872
2,NYG,2020,2,19,34.0,0,0.0,0.0,1.0,-8.175074,0,0.115385,0.0,0.096774,0.090909,6,9,60.0,0,0.0,0.0,34.0,55.0,3.0,3.641316,0,1.764706,0.230769,0.139918,0.444096,0.111111,0.069959,0.280612,0.222634,0.101183,9.4,15.4,2,0.057692,0.092308
3,NYG,2021,13,162,593.0,2,1.0,1.0,27.0,-20.349858,0,0.099208,0.181818,0.100775,0.107143,41,57,263.0,2,1.0,0.0,30.0,298.0,13.0,-3.736571,0,-5.320328,1.701285,0.007168,2.556945,0.125275,0.008465,0.25042,0.194684,0.171685,107.6,148.6,13,0.140513,0.11573
4,NYG,2022,16,295,1312.0,10,1.0,0.0,62.0,-15.511169,0,0.103586,0.0,0.062893,0.057143,57,76,338.0,0,0.0,0.0,-16.0,434.0,10.0,-13.652243,1,-56.366754,2.682931,-0.110787,3.946846,0.158333,-0.005112,0.267571,0.233411,0.221043,227.0,284.0,16,0.051793,0.082869
5,NYG,2023,14,247,962.0,6,2.0,2.0,51.0,-50.373962,1,0.099786,0.266667,0.137931,0.152672,41,60,280.0,4,0.0,0.0,134.0,226.0,16.0,-6.571396,0,33.606108,2.332051,-1.22996,2.637105,0.143198,0.04113,0.172783,0.247701,0.221069,182.2,223.2,14,0.183226,0.133162
6,PHI,2024,16,345,2005.0,13,2.0,1.0,82.0,34.099637,3,0.085591,0.090909,0.078431,0.08,33,43,278.0,2,0.0,0.0,98.0,245.0,12.0,12.202534,0,63.156013,1.898664,0.408785,3.134146,0.105651,0.030501,0.168501,0.182878,0.252627,322.3,355.3,16,0.08825,0.086655
