In [4]:
from nfl_data_py import import_seasonal_data, import_seasonal_rosters
import pandas as pd
import re
pd.set_option('display.max_columns', None)

# Function to clean player names
def clean_player_name(player_name):
    if not isinstance(player_name, str):
        return player_name

    # Remove punctuation
    player_name = re.sub(r'[^\w\s]', '', player_name)

    # Remove common suffixes
    suffixes = ['Jr', 'Sr', 'II', 'III', 'IV', 'V']
    pattern = r'\b(?:' + '|'.join(suffixes) + r')\b'
    player_name = re.sub(pattern, '', player_name, flags=re.IGNORECASE)

    # Remove extra spaces
    player_name = ' '.join(player_name.split())

    return player_name

# Columns to retain for QB analysis
qb_columns = [
    'player_name', 'team', 'season', 'week', 'games',
    'completions', 'attempts', 'passing_yards', 'passing_tds', 'interceptions',
    'sacks', 'sack_yards', 'sack_fumbles', 'sack_fumbles_lost',
    'passing_air_yards', 'passing_yards_after_catch', 'passing_first_downs',
    'passing_epa', 'passing_2pt_conversions', 'pacr', 'dakota',
    'carries', 'rushing_yards', 'rushing_tds', 'rushing_fumbles',
    'rushing_fumbles_lost', 'rushing_first_downs', 'rushing_epa',
    'rushing_2pt_conversions', 'ry_sh', 'rtd_sh', 'rfd_sh', 'rtdfd_sh',
    'fantasy_points', 'fantasy_points_ppr'
]

# Initialize storage dictionaries
qb_data_by_year = {}
qb_season_dict = {}

# Loop over each season
for year in range(2016, 2025):
    # Load play-by-play and roster data
    stats = import_seasonal_data([year], s_type="REG")
    rosters = import_seasonal_rosters([year])
    
    # Merge rosters with stats to get positions
    merged = stats.merge(
        rosters[['player_id', 'player_name', 'position', 'team']],
        on='player_id',
        how='left'
    )
    
    # Filter to QBs only
    qbs = merged[merged['position'] == 'QB'].copy()
    
    # Drop player_id and reorder columns
    qbs.drop(columns=['player_id'], inplace=True)
    cols = ['player_name'] + [col for col in qbs.columns if col != 'player_name']
    qbs = qbs[cols]
    
    # Filter to relevant columns
    qb_filtered = qbs[[col for col in qb_columns if col in qbs.columns]]
    qb_filtered = qb_filtered.copy()  # avoid SettingWithCopyWarning

    # Add cleaned name
    qb_filtered['player_name_clean'] = qb_filtered['player_name'].apply(clean_player_name)
    
    # Store yearly DataFrame
    qb_data_by_year[year] = qb_filtered

    # Add to player-level dictionary
    for name, group in qb_filtered.groupby('player_name_clean'):
        qb_season_dict[name] = pd.concat(
            [qb_season_dict.get(name, pd.DataFrame()), group.drop(columns=['player_name', 'player_name_clean'])],
            ignore_index=True
        )

    print(f"✅ Loaded {len(qb_filtered)} QB rows for {year}")


✅ Loaded 71 QB rows for 2016
✅ Loaded 73 QB rows for 2017
✅ Loaded 73 QB rows for 2018
✅ Loaded 72 QB rows for 2019
✅ Loaded 83 QB rows for 2020
✅ Loaded 83 QB rows for 2021
✅ Loaded 83 QB rows for 2022
✅ Loaded 83 QB rows for 2023
✅ Loaded 78 QB rows for 2024


In [5]:
qb_season_dict['Patrick Mahomes']

Unnamed: 0,team,season,games,completions,attempts,passing_yards,passing_tds,interceptions,sacks,sack_yards,sack_fumbles,sack_fumbles_lost,passing_air_yards,passing_yards_after_catch,passing_first_downs,passing_epa,passing_2pt_conversions,pacr,dakota,carries,rushing_yards,rushing_tds,rushing_fumbles,rushing_fumbles_lost,rushing_first_downs,rushing_epa,rushing_2pt_conversions,ry_sh,rtd_sh,rfd_sh,rtdfd_sh,fantasy_points,fantasy_points_ppr
0,KC,2017,1,22,35,284.0,0,1.0,2.0,15.0,0,0,261.0,174.0,14.0,7.888877,0,1.088123,0.132096,7,10.0,0,0.0,0.0,0.0,-2.469162,0,0.0,,0.0,0.0,10.36,10.36
1,KC,2018,16,383,580,5097.0,50,12.0,26.0,171.0,6,2,5255.0,2613.0,237.0,222.708404,0,16.273009,3.33505,60,272.0,2,2.0,0.0,18.0,14.220858,1,0.0,0.0,0.0,0.0,417.08,417.08
2,KC,2019,14,319,484,4031.0,26,5.0,17.0,127.0,2,2,4281.0,1947.0,176.0,137.694041,1,16.149281,2.389139,43,218.0,2,0.0,0.0,15.0,12.167492,0,0.0,0.0,0.0,0.0,287.04,287.04
3,KC,2020,15,390,588,4740.0,38,6.0,22.0,147.0,3,2,4917.0,2241.0,238.0,182.870404,3,15.267915,2.65767,62,308.0,2,2.0,0.0,21.0,17.556064,0,0.0,0.0,0.0,0.0,374.4,374.4
4,KC,2021,17,436,658,4839.0,37,13.0,28.0,146.0,6,2,4829.0,2699.0,260.0,139.734449,1,17.984323,2.680182,66,381.0,2,3.0,2.0,25.0,13.275449,1,0.0,0.0,0.0,0.0,361.66,361.66
5,KC,2022,17,435,648,5250.0,41,12.0,26.0,188.0,1,0,4715.0,2830.0,272.0,191.972205,2,19.204404,2.973218,61,358.0,4,4.0,0.0,25.0,23.256493,1,0.001143,0.0,0.0,0.0,416.4,417.4
6,KC,2023,16,401,597,4183.0,27,14.0,27.0,186.0,4,3,3873.0,2555.0,206.0,50.961516,0,18.826962,1.905731,75,389.0,0,2.0,0.0,27.0,13.332187,0,0.0,0.0,0.0,0.0,280.22,280.22
7,KC,2024,16,392,581,3928.0,26,11.0,36.0,239.0,1,0,3681.0,2300.0,214.0,73.027359,0,18.275815,1.981614,58,307.0,2,1.0,0.0,22.0,24.313267,0,0.000507,0.0,0.0,0.0,282.02,283.02
