In [1]:
from nfl_data_py import import_seasonal_data, import_seasonal_rosters
import pandas as pd
import re
pd.set_option('display.max_columns', None)

# Function to clean player names (punctuation and suffixes)
def clean_player_name(player_name):
    if not isinstance(player_name, str):
        return player_name

    player_name = re.sub(r'[^\w\s]', '', player_name)
    suffixes = ['Jr', 'Sr', 'II', 'III', 'IV', 'V']
    pattern = r'\b(?:' + '|'.join(suffixes) + r')\b'
    player_name = re.sub(pattern, '', player_name, flags=re.IGNORECASE)
    return ' '.join(player_name.split())

# Define columns to keep for TEs
te_columns = [
    'player_name', 'team', 'season', 'week', 'games',
    'receptions', 'targets', 'receiving_yards', 'receiving_tds',
    'receiving_fumbles', 'receiving_fumbles_lost', 'receiving_air_yards',
    'receiving_yards_after_catch', 'receiving_first_downs', 'receiving_epa',
    'receiving_2pt_conversions', 'racr', 'target_share', 'air_yards_share',
    'wopr_x', 'tgt_sh', 'ay_sh', 'yac_sh', 'wopr_y', 'ppr_sh',

    'carries', 'rushing_yards', 'rushing_tds', 'rushing_fumbles',
    'rushing_fumbles_lost', 'rushing_first_downs', 'rushing_epa',
    'rushing_2pt_conversions', 'ry_sh', 'rtd_sh', 'rfd_sh', 'rtdfd_sh',

    'fantasy_points', 'fantasy_points_ppr', 'games', 'dom', 'w8dom'
]

# Initialize storage dictionaries
te_data_by_year = {}
te_season_dict = {}

# Loop through seasons
for year in range(2016, 2025):
    stats = import_seasonal_data([year], s_type="REG")
    rosters = import_seasonal_rosters([year])

    merged = stats.merge(
        rosters[['player_id', 'player_name', 'position', 'team']],
        on='player_id',
        how='left'
    )

    # Filter for TEs only
    tes = merged[merged['position'] == 'TE'].copy()
    tes.drop(columns=['player_id'], inplace=True)
    cols = ['player_name'] + [col for col in tes.columns if col != 'player_name']
    tes = tes[cols]

    te_filtered = tes[[col for col in te_columns if col in tes.columns]].copy()

    # Clean names
    te_filtered['player_name_clean'] = te_filtered['player_name'].apply(clean_player_name)

    # Save year-level data
    te_data_by_year[year] = te_filtered

    # Build season dictionary
    for name, group in te_filtered.groupby('player_name_clean'):
        te_season_dict[name] = pd.concat(
            [te_season_dict.get(name, pd.DataFrame()), group.drop(columns=['player_name', 'player_name_clean'])],
            ignore_index=True
        )

    print(f"✅ Loaded {len(te_filtered)} TE rows for {year}")


✅ Loaded 117 TE rows for 2016
✅ Loaded 111 TE rows for 2017
✅ Loaded 120 TE rows for 2018
✅ Loaded 120 TE rows for 2019
✅ Loaded 124 TE rows for 2020
✅ Loaded 123 TE rows for 2021
✅ Loaded 120 TE rows for 2022
✅ Loaded 115 TE rows for 2023
✅ Loaded 119 TE rows for 2024


In [2]:
te_season_dict["Kyle Pitts"]

Unnamed: 0,team,season,games,receptions,targets,receiving_yards,receiving_tds,receiving_fumbles,receiving_fumbles_lost,receiving_air_yards,receiving_yards_after_catch,receiving_first_downs,receiving_epa,receiving_2pt_conversions,racr,target_share,air_yards_share,wopr_x,tgt_sh,ay_sh,yac_sh,wopr_y,ppr_sh,carries,rushing_yards,rushing_tds,rushing_fumbles,rushing_fumbles_lost,rushing_first_downs,rushing_epa,rushing_2pt_conversions,ry_sh,rtd_sh,rfd_sh,rtdfd_sh,fantasy_points,fantasy_points_ppr,games.1,dom,w8dom
0,ATL,2021,17,68,110,1026.0,1,0.0,0.0,1193.0,313.0,43.0,26.331115,0,14.78225,3.458072,5.175287,8.809809,0.191972,0.287956,0.180716,0.518323,0.136236,0,0.0,0,0.0,0.0,0.0,0.0,0,0.257336,0.05,0.219388,0.203704,108.6,176.6,17,0.153668,0.215869
1,ATL,2022,10,28,59,356.0,2,0.0,0.0,812.0,130.0,20.0,3.377595,0,6.491529,2.792393,3.159249,6.400064,0.261062,0.346121,0.180055,0.66849,0.104921,0,0.0,0,0.0,0.0,0.0,0.0,0,0.205661,0.166667,0.224719,0.217822,47.6,75.6,10,0.186164,0.197863
2,ATL,2023,17,53,90,667.0,3,0.0,0.0,1029.0,134.0,35.0,17.377557,0,12.149034,3.062327,4.588594,7.805506,0.169811,0.236824,0.075749,0.444176,0.107148,1,-4.0,0,0.0,0.0,0.0,-1.651204,0,0.176689,0.176471,0.19337,0.191919,84.3,137.3,17,0.17658,0.176645
3,ATL,2024,17,47,74,602.0,4,0.0,0.0,623.0,272.0,21.0,6.943506,0,24.65364,2.363852,2.551407,5.331762,0.132379,0.139498,0.138917,0.310168,0.089281,0,0.0,0,0.0,0.0,0.0,0.0,0,0.140556,0.190476,0.106061,0.114155,84.2,131.2,17,0.165516,0.15054
