In [4]:
import pandas as pd
import requests
pd.set_option('display.max_columns', None)
import numpy as np
from nba_api.stats.endpoints import LeagueDashPlayerStats
from nba_api.stats.endpoints import commonteamroster
import time


### RetrieveNBA Api Player Level Data using NBA API

In [5]:
def fetch_nba_season_stats(season):
    """
    Fetches NBA player statistics for both the regular season and playoffs for a given season.

    Parameters:
    - season (str): The NBA season in "YYYY-YY" format (e.g., "2023-24").

    Returns:
    - df_regular (DataFrame): Regular season player stats.
    - df_playoffs (DataFrame): Playoff player stats.
    """

    # Fetch regular season stats
    player_stats_regular = LeagueDashPlayerStats(season=season, season_type_all_star="Regular Season")
    df_regular = player_stats_regular.get_data_frames()[0]
    df_regular["Season Type"] = "Regular Season"

    # Fetch playoff stats
    player_stats_playoffs = LeagueDashPlayerStats(season=season, season_type_all_star="Playoffs")
    df_playoffs = player_stats_playoffs.get_data_frames()[0]
    df_playoffs["Season Type"] = "Playoffs"

    return df_regular, df_playoffs

from nba_api.stats.endpoints import LeagueDashPlayerStats

def fetch_nba_season_advanced_stats(season):
    """
    Fetches NBA player advanced statistics for both the regular season and playoffs for a given season.

    Parameters:
    - season (str): The NBA season in "YYYY-YY" format (e.g., "2023-24").

    Returns:
    - df_regular_adv (DataFrame): Regular season player advanced stats.
    - df_playoffs_adv (DataFrame): Playoff player advanced stats.
    """
    # Fetch regular season advanced stats
    player_stats_regular_adv = LeagueDashPlayerStats(
        season=season,
        season_type_all_star="Regular Season",
        measure_type_detailed_defense="Advanced"
    )
    df_regular_adv = player_stats_regular_adv.get_data_frames()[0]
    df_regular_adv["Season Type"] = "Regular Season"

    # Fetch playoff advanced stats
    player_stats_playoffs_adv = LeagueDashPlayerStats(
        season=season,
        season_type_all_star="Playoffs",
        measure_type_detailed_defense="Advanced"
    )
    df_playoffs_adv = player_stats_playoffs_adv.get_data_frames()[0]
    df_playoffs_adv["Season Type"] = "Playoffs"

    return df_regular_adv, df_playoffs_adv

In [6]:
# Basic Stats
season_years = {
    2023: "2023-24",
    2022: "2022-23",
    2021: "2021-22",
    2020: "2020-21",
    2019: "2019-20"
}

regular_season_dfs = []

for start_year, season_str in season_years.items():
    df_regular, df_playoffs = fetch_nba_season_stats(season_str)
    
    # Add 'season' column (use start year)
    df_regular['Season'] = start_year
    
    # Append to list for later merging
    regular_season_dfs.append(df_regular)

# Merge all regular season DataFrames
df_regular_all = pd.concat(regular_season_dfs, ignore_index=True)

In [7]:
# Advance Stats
advanced_season_dfs = []

for start_year, season_str in season_years.items():
    df_regular_adv, df_playoffs_adv = fetch_nba_season_advanced_stats(season_str)
    
    # Add 'Season' column
    df_regular_adv['Season'] = start_year
    
    # Append to list
    advanced_season_dfs.append(df_regular_adv)

# Merge all regular season advanced DataFrames
df_advanced_all = pd.concat(advanced_season_dfs, ignore_index=True)

# Merge on shared keys: PLAYER_ID, TEAM_ID, and Season
df_regular_all = pd.merge(
    df_regular_all,
    df_advanced_all,
    on=["PLAYER_ID", "TEAM_ID", "Season"],
    suffixes=('', '_adv'),
    how='left'
)

In [8]:
df_regular_all.head(1)

Unnamed: 0,PLAYER_ID,PLAYER_NAME,NICKNAME,TEAM_ID,TEAM_ABBREVIATION,AGE,GP,W,L,W_PCT,MIN,FGM,FGA,FG_PCT,FG3M,FG3A,FG3_PCT,FTM,FTA,FT_PCT,OREB,DREB,REB,AST,TOV,STL,BLK,BLKA,PF,PFD,PTS,PLUS_MINUS,NBA_FANTASY_PTS,DD2,TD3,WNBA_FANTASY_PTS,GP_RANK,W_RANK,L_RANK,W_PCT_RANK,MIN_RANK,FGM_RANK,FGA_RANK,FG_PCT_RANK,FG3M_RANK,FG3A_RANK,FG3_PCT_RANK,FTM_RANK,FTA_RANK,FT_PCT_RANK,OREB_RANK,DREB_RANK,REB_RANK,AST_RANK,TOV_RANK,STL_RANK,BLK_RANK,BLKA_RANK,PF_RANK,PFD_RANK,PTS_RANK,PLUS_MINUS_RANK,NBA_FANTASY_PTS_RANK,DD2_RANK,TD3_RANK,WNBA_FANTASY_PTS_RANK,Season Type,Season,PLAYER_NAME_adv,NICKNAME_adv,TEAM_ABBREVIATION_adv,AGE_adv,GP_adv,W_adv,L_adv,W_PCT_adv,MIN_adv,E_OFF_RATING,OFF_RATING,sp_work_OFF_RATING,E_DEF_RATING,DEF_RATING,sp_work_DEF_RATING,E_NET_RATING,NET_RATING,sp_work_NET_RATING,AST_PCT,AST_TO,AST_RATIO,OREB_PCT,DREB_PCT,REB_PCT,TM_TOV_PCT,E_TOV_PCT,EFG_PCT,TS_PCT,USG_PCT,E_USG_PCT,E_PACE,PACE,PACE_PER40,sp_work_PACE,PIE,POSS,FGM_adv,FGA_adv,FGM_PG,FGA_PG,FG_PCT_adv,GP_RANK_adv,W_RANK_adv,L_RANK_adv,W_PCT_RANK_adv,MIN_RANK_adv,E_OFF_RATING_RANK,OFF_RATING_RANK,sp_work_OFF_RATING_RANK,E_DEF_RATING_RANK,DEF_RATING_RANK,sp_work_DEF_RATING_RANK,E_NET_RATING_RANK,NET_RATING_RANK,sp_work_NET_RATING_RANK,AST_PCT_RANK,AST_TO_RANK,AST_RATIO_RANK,OREB_PCT_RANK,DREB_PCT_RANK,REB_PCT_RANK,TM_TOV_PCT_RANK,E_TOV_PCT_RANK,EFG_PCT_RANK,TS_PCT_RANK,USG_PCT_RANK,E_USG_PCT_RANK,E_PACE_RANK,PACE_RANK,sp_work_PACE_RANK,PIE_RANK,FGM_RANK_adv,FGA_RANK_adv,FGM_PG_RANK,FGA_PG_RANK,FG_PCT_RANK_adv,Season Type_adv
0,1630639,A.J. Lawson,A.J.,1610612742,DAL,23.0,42,27,15,0.643,310.9,54,121,0.446,13,50,0.26,15,23,0.652,14,36,50,20,14,10,3,8,22,13,136,18,251.0,0,0,245.0,341,231,168,105,410,378,373,303,346,338,428,382,374,445,389,407,408,414,394,386,428,211,148,415,385,192,400,257,38,405,Regular Season,2023,A.J. Lawson,A.J.,DAL,23.0,42,27,15,0.643,7.4,109.9,106.6,106.6,106.2,105.3,105.3,3.7,1.2,1.2,0.089,1.43,12.1,0.039,0.105,0.072,8.5,8.5,0.5,0.519,0.18,0.188,105.42,107.53,89.61,107.53,0.076,701,54,121,1.3,2.9,0.446,341,231,168,105,486,314,407,407,67,75,75,157,214,214,372,393,445,255,361,334,215,215,383,425,220,217,28,33,33,358,378,373,429,441,303,Regular Season


### Retrieve Player Physical Stats

In [9]:
season = "2023-24"

# Retrieve roster data 
roster_response = commonteamroster.CommonTeamRoster(team_id=team_id, season=season)
roster_df = roster_response.get_data_frames()[0]

roster_df.head(1)

NameError: name 'team_id' is not defined

In [None]:
roster_data = []

# Loop over each season in our mapping
for start_year, season_str in season_years.items():
    teams_in_season = df_regular_all[df_regular_all['Season'] == start_year]['TEAM_ID'].unique()
    
    for team_id in teams_in_season:
        try:
            roster_response = commonteamroster.CommonTeamRoster(team_id=team_id, season=season_str)
            roster_df = roster_response.get_data_frames()[0]
            
            required_cols = ["PLAYER_ID", "POSITION", "HEIGHT", "WEIGHT", "EXP"]
            if all(col in roster_df.columns for col in required_cols):
                tmp = roster_df[required_cols].copy()
                tmp["TEAM_ID"] = team_id
                tmp["Season"] = start_year
                roster_data.append(tmp)
            else:
                print(f"Team {team_id} for season {season_str} missing one or more required columns.")

            time.sleep(0.6)
        except Exception as e:
            print(f"Error retrieving roster for team {team_id} in season {season_str}: {e}")

# Combine roster data from all teams and seasons if any data was collected
if roster_data:
    roster_all = pd.concat(roster_data, ignore_index=True)
    df_regular_all = df_regular_all.merge(roster_all, on=["PLAYER_ID", "TEAM_ID", "Season"], how="left")
else:
    print("No roster data was retrieved.")

df_regular_all.head()

Unnamed: 0,PLAYER_ID,PLAYER_NAME,NICKNAME,TEAM_ID,TEAM_ABBREVIATION,AGE,GP,W,L,W_PCT,MIN,FGM,FGA,FG_PCT,FG3M,FG3A,FG3_PCT,FTM,FTA,FT_PCT,OREB,DREB,REB,AST,TOV,STL,BLK,BLKA,PF,PFD,PTS,PLUS_MINUS,NBA_FANTASY_PTS,DD2,TD3,WNBA_FANTASY_PTS,GP_RANK,W_RANK,L_RANK,W_PCT_RANK,MIN_RANK,FGM_RANK,FGA_RANK,FG_PCT_RANK,FG3M_RANK,FG3A_RANK,FG3_PCT_RANK,FTM_RANK,FTA_RANK,FT_PCT_RANK,OREB_RANK,DREB_RANK,REB_RANK,AST_RANK,TOV_RANK,STL_RANK,BLK_RANK,BLKA_RANK,PF_RANK,PFD_RANK,PTS_RANK,PLUS_MINUS_RANK,NBA_FANTASY_PTS_RANK,DD2_RANK,TD3_RANK,WNBA_FANTASY_PTS_RANK,Season Type,Season,PLAYER_NAME_adv,NICKNAME_adv,TEAM_ABBREVIATION_adv,AGE_adv,GP_adv,W_adv,L_adv,W_PCT_adv,MIN_adv,E_OFF_RATING,OFF_RATING,sp_work_OFF_RATING,E_DEF_RATING,DEF_RATING,sp_work_DEF_RATING,E_NET_RATING,NET_RATING,sp_work_NET_RATING,AST_PCT,AST_TO,AST_RATIO,OREB_PCT,DREB_PCT,REB_PCT,TM_TOV_PCT,E_TOV_PCT,EFG_PCT,TS_PCT,USG_PCT,E_USG_PCT,E_PACE,PACE,PACE_PER40,sp_work_PACE,PIE,POSS,FGM_adv,FGA_adv,FGM_PG,FGA_PG,FG_PCT_adv,GP_RANK_adv,W_RANK_adv,L_RANK_adv,W_PCT_RANK_adv,MIN_RANK_adv,E_OFF_RATING_RANK,OFF_RATING_RANK,sp_work_OFF_RATING_RANK,E_DEF_RATING_RANK,DEF_RATING_RANK,sp_work_DEF_RATING_RANK,E_NET_RATING_RANK,NET_RATING_RANK,sp_work_NET_RATING_RANK,AST_PCT_RANK,AST_TO_RANK,AST_RATIO_RANK,OREB_PCT_RANK,DREB_PCT_RANK,REB_PCT_RANK,TM_TOV_PCT_RANK,E_TOV_PCT_RANK,EFG_PCT_RANK,TS_PCT_RANK,USG_PCT_RANK,E_USG_PCT_RANK,E_PACE_RANK,PACE_RANK,sp_work_PACE_RANK,PIE_RANK,FGM_RANK_adv,FGA_RANK_adv,FGM_PG_RANK,FGA_PG_RANK,FG_PCT_RANK_adv,Season Type_adv,POSITION,HEIGHT,WEIGHT,EXP
0,1630639,A.J. Lawson,A.J.,1610612742,DAL,23.0,42,27,15,0.643,310.9,54,121,0.446,13,50,0.26,15,23,0.652,14,36,50,20,14,10,3,8,22,13,136,18,251.0,0,0,245.0,341,231,168,105,410,378,373,303,346,338,428,382,374,445,389,407,408,414,394,386,428,211,148,415,385,192,400,257,38,405,Regular Season,2023,A.J. Lawson,A.J.,DAL,23.0,42,27,15,0.643,7.4,109.9,106.6,106.6,106.2,105.3,105.3,3.7,1.2,1.2,0.089,1.43,12.1,0.039,0.105,0.072,8.5,8.5,0.5,0.519,0.18,0.188,105.42,107.53,89.61,107.53,0.076,701,54,121,1.3,2.9,0.446,341,231,168,105,486,314,407,407,67,75,75,157,214,214,372,393,445,255,361,334,215,215,383,425,220,217,28,33,33,358,378,373,429,441,303,Regular Season,G,6-6,179,1
1,1631260,AJ Green,AJ,1610612749,MIL,24.0,56,35,21,0.625,614.281667,83,196,0.423,69,169,0.408,17,19,0.895,9,55,64,30,12,9,4,3,49,20,252,50,400.8,0,0,441.0,252,155,253,128,333,327,320,393,191,205,72,372,388,54,424,366,381,373,409,398,410,121,223,375,313,145,369,257,38,343,Regular Season,2023,AJ Green,AJ,MIL,24.0,56,35,21,0.625,11.0,115.2,114.0,114.0,111.2,110.5,110.5,4.0,3.5,3.5,0.068,2.5,12.3,0.015,0.089,0.053,4.9,4.9,0.599,0.617,0.15,0.158,98.18,98.96,82.47,98.96,0.068,1269,83,196,1.5,3.5,0.423,252,155,253,128,422,132,183,183,234,189,189,150,152,152,473,143,438,482,438,466,47,47,85,106,365,360,499,424,424,400,327,320,403,388,393,Regular Season,G,6-4,190,1
2,1631100,AJ Griffin,AJ,1610612737,ATL,20.0,20,8,12,0.4,170.73,18,62,0.29,10,39,0.256,2,2,1.0,2,16,18,5,8,1,2,3,6,1,48,-51,78.1,0,0,87.0,439,423,138,385,445,447,434,541,372,363,429,487,493,1,507,463,475,488,435,509,451,121,77,519,450,396,480,257,38,469,Regular Season,2023,AJ Griffin,AJ,ATL,20.0,20,8,12,0.4,8.5,106.9,106.0,106.0,121.7,120.1,120.1,-14.8,-14.0,-14.0,0.041,0.63,6.6,0.011,0.108,0.055,10.5,10.5,0.371,0.382,0.167,0.175,105.7,106.84,89.03,106.84,0.017,381,18,62,0.9,3.1,0.29,439,423,138,385,469,411,424,424,549,536,536,528,520,520,544,527,542,519,337,453,381,385,530,534,282,272,23,35,35,544,447,434,478,424,541,Regular Season,F,6-6,220,1
3,203932,Aaron Gordon,Aaron,1610612743,DEN,28.0,73,49,24,0.671,2296.81,398,716,0.556,40,138,0.29,177,269,0.658,174,297,471,259,105,56,45,57,142,223,1013,423,2164.7,12,0,1985.0,99,29,282,75,55,78,114,77,258,238,399,69,54,440,21,70,46,83,103,119,79,520,457,52,93,13,79,54,38,87,Regular Season,2023,Aaron Gordon,Aaron,DEN,28.0,73,49,24,0.671,31.5,118.5,119.8,119.8,110.4,111.1,111.1,8.1,8.7,8.7,0.147,2.47,21.7,0.078,0.129,0.104,8.8,8.8,0.584,0.607,0.174,0.178,98.84,98.01,81.68,98.01,0.103,4697,398,716,5.5,9.8,0.556,99,29,282,75,83,50,45,45,197,218,218,71,62,62,211,158,169,108,240,171,244,244,120,135,241,255,458,501,501,179,78,114,97,130,77,Regular Season,F,6-8,235,9
4,1628988,Aaron Holiday,Aaron,1610612745,HOU,27.0,78,39,39,0.5,1269.296667,186,417,0.446,84,217,0.387,58,63,0.921,23,100,123,140,53,42,6,20,125,65,514,85,962.6,0,0,957.0,43,117,497,300,217,216,209,305,152,167,129,217,247,37,318,299,310,157,229,194,370,335,419,238,216,123,243,257,38,233,Regular Season,2023,Aaron Holiday,Aaron,HOU,27.0,78,39,39,0.5,16.3,111.0,110.5,110.5,108.0,107.6,107.6,3.0,2.9,2.9,0.157,2.64,22.1,0.017,0.075,0.045,8.4,8.3,0.547,0.578,0.158,0.165,101.82,102.24,85.2,102.24,0.078,2706,186,417,2.4,5.3,0.446,43,117,497,300,312,275,300,300,94,100,100,179,168,168,190,122,165,466,501,519,201,196,242,233,326,311,206,169,169,334,216,209,290,279,305,Regular Season,G,6-0,185,5


### Read in Injury Data

In [None]:
il_df = pd.read_csv('IL Report 20 to 24 v2.csv', encoding='utf-8')
il_df['Season'] = il_df['Season'].replace(1999, 2019)
il_df

Unnamed: 0,Rank,Player,Pos,Team,Reason,Games Missed,Days Missed,Cash Total Per Days Missed,Season
0,1,Ben Simmons,PG,BKN,"Back, Hip, Knee, Injury Management, Leg",67,"135 11/4-11/5, 11/8-11/15, 11/16-1/28, 1/31-2/...",29400030,2023
1,2,Zach LaVine,SG,CHI,Foot,57,"123 11/22-11/23, 11/30-1/4, 1/20-4/14",28321242,2023
2,3,Joel Embiid,C,PHI,"Hip, Illness, Ankle, Knee",41,"90 11/22-11/24, 11/29-12/5, 12/25-1/1, 1/6-1/1...",24624450,2023
3,4,Lonzo Ball,PG,CHI,Knee,82,173 10/25-4/14,20347568,2023
4,5,Ja Morant,PG,MEM,"Illness, Shoulder",48,"99 12/28-12/28, 1/7-4/14",19347768,2023
...,...,...,...,...,...,...,...,...,...
2085,354,Damion Lee,SG,GSW,"Hand, Personal",11,"21 11/13-12/1, 1/14-1/15",0,2019
2086,355,Kenny Wooten,PF,NYK,Thumb,4,8 2/24-3/2,0,2019
2087,356,Frank Mason III,PG,MIL,"Oblique, Abdominal",3,"6 1/14-1/15, 1/31-2/3",0,2019
2088,357,B.J. Johnson,SF,ORL,Personal,2,4 1/20-1/23,0,2019


### Name Mapping

Streamline the names between two data sources.

In [None]:
name_map_df = pd.read_excel('nba_regular_season_data v2.xlsx', sheet_name='Mapped Name Final')
name_map_df.columns

Index(['Player (IL report)', 'Mapped Name (NBA API)'], dtype='object')

In [None]:
name_mapping = dict(zip(name_map_df['Player (IL report)'], name_map_df['Mapped Name (NBA API)']))
il_df['mapped_name'] = il_df['Player'].map(name_mapping).fillna(il_df['Player'])
il_df

Unnamed: 0,Rank,Player,Pos,Team,Reason,Games Missed,Days Missed,Cash Total Per Days Missed,Season,mapped_name
0,1,Ben Simmons,PG,BKN,"Back, Hip, Knee, Injury Management, Leg",67,"135 11/4-11/5, 11/8-11/15, 11/16-1/28, 1/31-2/...",29400030,2023,Ben Simmons
1,2,Zach LaVine,SG,CHI,Foot,57,"123 11/22-11/23, 11/30-1/4, 1/20-4/14",28321242,2023,Zach LaVine
2,3,Joel Embiid,C,PHI,"Hip, Illness, Ankle, Knee",41,"90 11/22-11/24, 11/29-12/5, 12/25-1/1, 1/6-1/1...",24624450,2023,Joel Embiid
3,4,Lonzo Ball,PG,CHI,Knee,82,173 10/25-4/14,20347568,2023,Lonzo Ball
4,5,Ja Morant,PG,MEM,"Illness, Shoulder",48,"99 12/28-12/28, 1/7-4/14",19347768,2023,Ja Morant
...,...,...,...,...,...,...,...,...,...,...
2085,354,Damion Lee,SG,GSW,"Hand, Personal",11,"21 11/13-12/1, 1/14-1/15",0,2019,Damion Lee
2086,355,Kenny Wooten,PF,NYK,Thumb,4,8 2/24-3/2,0,2019,Kenny Wooten
2087,356,Frank Mason III,PG,MIL,"Oblique, Abdominal",3,"6 1/14-1/15, 1/31-2/3",0,2019,Frank Mason III
2088,357,B.J. Johnson,SF,ORL,Personal,2,4 1/20-1/23,0,2019,BJ Johnson


### Check for shared name, not shared names between injury and performance table

In [None]:
il_df['mapped_name'] = il_df['mapped_name'].astype(str).str.strip()
df_regular_all['PLAYER_NAME'] = df_regular_all['PLAYER_NAME'].astype(str).str.strip()

injury_names = set(il_df['mapped_name'].unique())
performance_names = set(df_regular_all['PLAYER_NAME'].unique())

# Find names only in the injury table
only_in_injury = injury_names - performance_names

# Create a comparison DataFrame
comparison_df = pd.DataFrame({
    'player_name': list(injury_names.union(performance_names)),
    'in_injury_table': [name in injury_names for name in injury_names.union(performance_names)],
    'in_performance_table': [name in performance_names for name in injury_names.union(performance_names)],
    'only_in_injury_table': [name in only_in_injury for name in injury_names.union(performance_names)]
})

# Filter to just players only in the injury table, if needed
only_injury_df = comparison_df[comparison_df['only_in_injury_table']]
only_injury_df

Unnamed: 0,player_name,in_injury_table,in_performance_table,only_in_injury_table
90,Chris Smith,True,False,True
318,Daulton Hommes,True,False,True
349,Pau Gasol,True,False,True
391,Kenny Wooten,True,False,True
445,Gerald Green,True,False,True
589,Nene Hilario,True,False,True
933,Luca Vildoza,True,False,True


In [None]:
# Dropping players that doesn't have stats in NBA Api
exclude_players = list(only_injury_df['player_name'])
il_df = il_df[~il_df['mapped_name'].isin(exclude_players)]
il_df

Unnamed: 0,Rank,Player,Pos,Team,Reason,Games Missed,Days Missed,Cash Total Per Days Missed,Season,mapped_name
0,1,Ben Simmons,PG,BKN,"Back, Hip, Knee, Injury Management, Leg",67,"135 11/4-11/5, 11/8-11/15, 11/16-1/28, 1/31-2/...",29400030,2023,Ben Simmons
1,2,Zach LaVine,SG,CHI,Foot,57,"123 11/22-11/23, 11/30-1/4, 1/20-4/14",28321242,2023,Zach LaVine
2,3,Joel Embiid,C,PHI,"Hip, Illness, Ankle, Knee",41,"90 11/22-11/24, 11/29-12/5, 12/25-1/1, 1/6-1/1...",24624450,2023,Joel Embiid
3,4,Lonzo Ball,PG,CHI,Knee,82,173 10/25-4/14,20347568,2023,Lonzo Ball
4,5,Ja Morant,PG,MEM,"Illness, Shoulder",48,"99 12/28-12/28, 1/7-4/14",19347768,2023,Ja Morant
...,...,...,...,...,...,...,...,...,...,...
2084,353,Naz Mitrou-Long,SG,IND,Ankle,16,"35 12/17-1/18, 2/25-2/26",0,2019,Naz Mitrou-Long
2085,354,Damion Lee,SG,GSW,"Hand, Personal",11,"21 11/13-12/1, 1/14-1/15",0,2019,Damion Lee
2087,356,Frank Mason III,PG,MIL,"Oblique, Abdominal",3,"6 1/14-1/15, 1/31-2/3",0,2019,Frank Mason III
2088,357,B.J. Johnson,SF,ORL,Personal,2,4 1/20-1/23,0,2019,BJ Johnson


### Merge Injury data with Performance data

In [None]:
# il_df['Season_for_merge'] = il_df['Season'] - 1
df_regular_all = pd.merge(
    df_regular_all,
    il_df,
    left_on=['PLAYER_NAME', 'Season'],
    right_on=['mapped_name', 'Season'],
    how='left'
)

df_regular_all['Games Missed'] = df_regular_all['Games Missed'].fillna(0.0)
df_regular_all


Unnamed: 0,PLAYER_ID,PLAYER_NAME,NICKNAME,TEAM_ID,TEAM_ABBREVIATION,AGE,GP,W,L,W_PCT,MIN,FGM,FGA,FG_PCT,FG3M,FG3A,FG3_PCT,FTM,FTA,FT_PCT,OREB,DREB,REB,AST,TOV,STL,BLK,BLKA,PF,PFD,PTS,PLUS_MINUS,NBA_FANTASY_PTS,DD2,TD3,WNBA_FANTASY_PTS,GP_RANK,W_RANK,L_RANK,W_PCT_RANK,MIN_RANK,FGM_RANK,FGA_RANK,FG_PCT_RANK,FG3M_RANK,FG3A_RANK,FG3_PCT_RANK,FTM_RANK,FTA_RANK,FT_PCT_RANK,OREB_RANK,DREB_RANK,REB_RANK,AST_RANK,TOV_RANK,STL_RANK,BLK_RANK,BLKA_RANK,PF_RANK,PFD_RANK,PTS_RANK,PLUS_MINUS_RANK,NBA_FANTASY_PTS_RANK,DD2_RANK,TD3_RANK,WNBA_FANTASY_PTS_RANK,Season Type,Season,PLAYER_NAME_adv,NICKNAME_adv,TEAM_ABBREVIATION_adv,AGE_adv,GP_adv,W_adv,L_adv,W_PCT_adv,MIN_adv,E_OFF_RATING,OFF_RATING,sp_work_OFF_RATING,E_DEF_RATING,DEF_RATING,sp_work_DEF_RATING,E_NET_RATING,NET_RATING,sp_work_NET_RATING,AST_PCT,AST_TO,AST_RATIO,OREB_PCT,DREB_PCT,REB_PCT,TM_TOV_PCT,E_TOV_PCT,EFG_PCT,TS_PCT,USG_PCT,E_USG_PCT,E_PACE,PACE,PACE_PER40,sp_work_PACE,PIE,POSS,FGM_adv,FGA_adv,FGM_PG,FGA_PG,FG_PCT_adv,GP_RANK_adv,W_RANK_adv,L_RANK_adv,W_PCT_RANK_adv,MIN_RANK_adv,E_OFF_RATING_RANK,OFF_RATING_RANK,sp_work_OFF_RATING_RANK,E_DEF_RATING_RANK,DEF_RATING_RANK,sp_work_DEF_RATING_RANK,E_NET_RATING_RANK,NET_RATING_RANK,sp_work_NET_RATING_RANK,AST_PCT_RANK,AST_TO_RANK,AST_RATIO_RANK,OREB_PCT_RANK,DREB_PCT_RANK,REB_PCT_RANK,TM_TOV_PCT_RANK,E_TOV_PCT_RANK,EFG_PCT_RANK,TS_PCT_RANK,USG_PCT_RANK,E_USG_PCT_RANK,E_PACE_RANK,PACE_RANK,sp_work_PACE_RANK,PIE_RANK,FGM_RANK_adv,FGA_RANK_adv,FGM_PG_RANK,FGA_PG_RANK,FG_PCT_RANK_adv,Season Type_adv,POSITION,HEIGHT,WEIGHT,EXP,Rank,Player,Pos,Team,Reason,Games Missed,Days Missed,Cash Total Per Days Missed,mapped_name
0,1630639,A.J. Lawson,A.J.,1610612742,DAL,23.0,42,27,15,0.643,310.900000,54,121,0.446,13,50,0.260,15,23,0.652,14,36,50,20,14,10,3,8,22,13,136,18,251.0,0,0,245.0,341,231,168,105,410,378,373,303,346,338,428,382,374,445,389,407,408,414,394,386,428,211,148,415,385,192,400,257,38,405,Regular Season,2023,A.J. Lawson,A.J.,DAL,23.0,42,27,15,0.643,7.4,109.9,106.6,106.6,106.2,105.3,105.3,3.7,1.2,1.2,0.089,1.43,12.1,0.039,0.105,0.072,8.5,8.5,0.500,0.519,0.180,0.188,105.42,107.53,89.61,107.53,0.076,701,54,121,1.3,2.9,0.446,341,231,168,105,486,314,407,407,67,75,75,157,214,214,372,393,445,255,361,334,215,215,383,425,220,217,28,33,33,358,378,373,429,441,303,Regular Season,G,6-6,179,1,,,,,,0.0,,,
1,1631260,AJ Green,AJ,1610612749,MIL,24.0,56,35,21,0.625,614.281667,83,196,0.423,69,169,0.408,17,19,0.895,9,55,64,30,12,9,4,3,49,20,252,50,400.8,0,0,441.0,252,155,253,128,333,327,320,393,191,205,72,372,388,54,424,366,381,373,409,398,410,121,223,375,313,145,369,257,38,343,Regular Season,2023,AJ Green,AJ,MIL,24.0,56,35,21,0.625,11.0,115.2,114.0,114.0,111.2,110.5,110.5,4.0,3.5,3.5,0.068,2.50,12.3,0.015,0.089,0.053,4.9,4.9,0.599,0.617,0.150,0.158,98.18,98.96,82.47,98.96,0.068,1269,83,196,1.5,3.5,0.423,252,155,253,128,422,132,183,183,234,189,189,150,152,152,473,143,438,482,438,466,47,47,85,106,365,360,499,424,424,400,327,320,403,388,393,Regular Season,G,6-4,190,1,347.0,AJ Green,SG,MIL,"Nose, Illness, Ankle",4.0,"7 1/3-1/4, 3/24-3/25, 4/12-4/14",76510.0,AJ Green
2,1631100,AJ Griffin,AJ,1610612737,ATL,20.0,20,8,12,0.400,170.730000,18,62,0.290,10,39,0.256,2,2,1.000,2,16,18,5,8,1,2,3,6,1,48,-51,78.1,0,0,87.0,439,423,138,385,445,447,434,541,372,363,429,487,493,1,507,463,475,488,435,509,451,121,77,519,450,396,480,257,38,469,Regular Season,2023,AJ Griffin,AJ,ATL,20.0,20,8,12,0.400,8.5,106.9,106.0,106.0,121.7,120.1,120.1,-14.8,-14.0,-14.0,0.041,0.63,6.6,0.011,0.108,0.055,10.5,10.5,0.371,0.382,0.167,0.175,105.70,106.84,89.03,106.84,0.017,381,18,62,0.9,3.1,0.290,439,423,138,385,469,411,424,424,549,536,536,528,520,520,544,527,542,519,337,453,381,385,530,534,282,272,23,35,35,544,447,434,478,424,541,Regular Season,F,6-6,220,1,160.0,A.J. Griffin,SF,ATL,"Illness, Personal, Ankle",25.0,"56 11/21-11/21, 12/13-12/29, 2/12-2/23, 3/17-4/11",1194984.0,AJ Griffin
3,203932,Aaron Gordon,Aaron,1610612743,DEN,28.0,73,49,24,0.671,2296.810000,398,716,0.556,40,138,0.290,177,269,0.658,174,297,471,259,105,56,45,57,142,223,1013,423,2164.7,12,0,1985.0,99,29,282,75,55,78,114,77,258,238,399,69,54,440,21,70,46,83,103,119,79,520,457,52,93,13,79,54,38,87,Regular Season,2023,Aaron Gordon,Aaron,DEN,28.0,73,49,24,0.671,31.5,118.5,119.8,119.8,110.4,111.1,111.1,8.1,8.7,8.7,0.147,2.47,21.7,0.078,0.129,0.104,8.8,8.8,0.584,0.607,0.174,0.178,98.84,98.01,81.68,98.01,0.103,4697,398,716,5.5,9.8,0.556,99,29,282,75,83,50,45,45,197,218,218,71,62,62,211,158,169,108,240,171,244,244,120,135,241,255,458,501,501,179,78,114,97,130,77,Regular Season,F,6-8,235,9,124.0,Aaron Gordon,PF,DEN,"Heel, Face, Foot",9.0,"16 11/26-12/1, 12/28-12/31, 3/25-3/26, 4/6-4/9",1955504.0,Aaron Gordon
4,1628988,Aaron Holiday,Aaron,1610612745,HOU,27.0,78,39,39,0.500,1269.296667,186,417,0.446,84,217,0.387,58,63,0.921,23,100,123,140,53,42,6,20,125,65,514,85,962.6,0,0,957.0,43,117,497,300,217,216,209,305,152,167,129,217,247,37,318,299,310,157,229,194,370,335,419,238,216,123,243,257,38,233,Regular Season,2023,Aaron Holiday,Aaron,HOU,27.0,78,39,39,0.500,16.3,111.0,110.5,110.5,108.0,107.6,107.6,3.0,2.9,2.9,0.157,2.64,22.1,0.017,0.075,0.045,8.4,8.3,0.547,0.578,0.158,0.165,101.82,102.24,85.20,102.24,0.078,2706,186,417,2.4,5.3,0.446,43,117,497,300,312,275,300,300,94,100,100,179,168,168,190,122,165,466,501,519,201,196,242,233,326,311,206,169,169,334,216,209,290,279,305,Regular Season,G,6-0,185,5,,,,,,0.0,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2780,203897,Zach LaVine,Zach,1610612741,CHI,25.0,60,20,40,0.333,2085.410000,539,1199,0.450,184,484,0.380,268,334,0.802,41,248,289,254,206,88,28,86,131,243,1530,-191,2399.8,2,0,2489.0,151,250,484,396,36,17,13,233,12,11,122,20,25,191,188,98,113,56,10,22,121,525,404,34,15,490,30,142,29,17,Regular Season,2019,Zach LaVine,Zach,CHI,25.0,60,20,40,0.333,34.8,105.5,106.8,106.8,110.7,111.2,111.2,-5.1,-4.4,-4.4,0.218,1.23,14.2,0.019,0.125,0.070,11.5,11.4,0.526,0.568,0.312,0.318,101.71,100.85,84.04,100.85,0.130,4382,539,1199,9.0,20.0,0.450,151,250,484,396,14,338,303,303,374,388,388,382,362,362,81,341,299,389,272,317,349,344,232,198,11,11,323,375,375,61,17,13,12,8,233,Regular Season,G-F,6-6,200,5,135.0,Zach LaVine,SG,CHI,Quad,5.0,9 3/2-3/11,991521.0,Zach LaVine
2781,1629668,Zach Norvell Jr.,Zach,1610612744,GSW,22.0,5,2,3,0.400,40.801667,3,12,0.250,3,8,0.375,1,1,1.000,0,6,6,3,3,2,0,0,4,1,10,-27,24.7,0,0,26.0,487,480,37,339,489,498,486,511,399,425,134,490,501,1,504,472,486,475,462,460,480,1,37,495,496,321,496,239,29,495,Regular Season,2019,Zach Norvell Jr.,Zach,GSW,22.0,5,2,3,0.400,8.2,101.6,103.3,103.3,140.1,135.6,135.6,-38.5,-32.3,-32.3,0.091,1.00,16.7,0.000,0.207,0.086,16.7,16.3,0.375,0.402,0.147,0.158,106.21,107.05,89.21,107.05,0.026,92,3,12,0.6,2.4,0.250,487,480,37,339,458,433,405,405,526,526,526,523,523,523,319,392,212,504,65,237,499,499,487,489,382,344,73,60,60,501,498,486,492,453,511,Regular Season,,,,,,,,,,0.0,,,
2782,1629015,Zhaire Smith,Zhaire,1610612755,PHI,21.0,7,3,4,0.429,32.331667,3,11,0.273,0,3,0.000,2,4,0.500,0,2,2,2,2,3,0,1,4,2,8,-10,20.4,0,0,18.0,474,462,55,319,498,498,490,507,463,460,463,468,462,464,504,504,511,487,481,439,480,52,37,486,501,272,500,239,29,499,Regular Season,2019,Zhaire Smith,Zhaire,PHI,21.0,7,3,4,0.429,4.6,103.2,102.8,102.8,121.9,115.1,115.1,-18.6,-12.3,-12.3,0.083,1.00,11.8,0.000,0.080,0.031,11.8,11.9,0.273,0.313,0.165,0.174,104.37,107.63,89.70,107.63,-0.008,72,3,11,0.4,1.6,0.273,474,462,55,319,508,403,415,415,514,485,485,509,487,487,348,392,390,504,441,516,374,382,515,515,293,273,161,44,44,518,498,490,511,497,507,Regular Season,G,6-3,205,1,297.0,Zhaire Smith,SG,PHI,Leg,3.0,5 12/7-12/11,86405.0,Zhaire Smith
2783,1629627,Zion Williamson,Zion,1610612740,NOP,19.0,24,11,13,0.458,668.141667,210,360,0.583,6,14,0.429,114,178,0.640,64,86,150,50,59,16,9,39,42,133,540,62,811.0,2,0,796.0,385,362,153,278,311,172,209,49,362,395,33,108,88,409,109,304,253,294,191,322,289,441,174,107,181,114,250,142,29,247,Regular Season,2019,Zion Williamson,Zion,NOP,19.0,24,11,13,0.458,27.8,110.6,112.0,112.0,106.4,106.9,106.9,4.1,5.1,5.1,0.119,0.85,9.1,0.092,0.114,0.104,10.8,10.8,0.592,0.616,0.291,0.297,107.93,107.01,89.17,107.01,0.141,1483,210,360,8.8,15.0,0.583,385,362,153,278,131,124,101,101,199,200,200,129,107,107,235,449,463,62,317,152,294,293,73,78,19,19,31,61,61,44,172,209,15,42,49,Regular Season,F,6-6,284,R,32.0,Zion Williamson,PF,NOP,"Knee, Toe",45.0,"95 10/22-1/21, 2/8-2/10",5237065.0,Zion Williamson


### Impute Players Missing Roster Info

In [None]:
# Step 1: Replace 'R' with 0 in EXP and convert to numeric
df_regular_all['EXP'] = df_regular_all['EXP'].replace('R', 0)
df_regular_all['EXP'] = pd.to_numeric(df_regular_all['EXP'], errors='coerce')

# Step 2: Create a helper table with known physical features
phys_ref = (
    df_regular_all[df_regular_all[['POSITION', 'HEIGHT', 'WEIGHT']].notna().all(axis=1)]
    .sort_values(by='Season')
    .groupby('PLAYER_ID')
    .apply(lambda x: x.set_index('Season')[['POSITION', 'HEIGHT', 'WEIGHT']])
)

# Step 3: Impute missing POSITION, HEIGHT, WEIGHT using closest known season
def impute_phys(row):
    if pd.notna(row['POSITION']) and pd.notna(row['HEIGHT']) and pd.notna(row['WEIGHT']):
        return row[['POSITION', 'HEIGHT', 'WEIGHT']]
    try:
        player_data = phys_ref.loc[row['PLAYER_ID']]
        closest_season = player_data.index.to_series().sub(row['Season']).abs().idxmin()
        return player_data.loc[closest_season]
    except:
        return pd.Series([row['POSITION'], row['HEIGHT'], row['WEIGHT']], index=['POSITION', 'HEIGHT', 'WEIGHT'])

df_regular_all[['POSITION', 'HEIGHT', 'WEIGHT']] = df_regular_all.apply(impute_phys, axis=1)

# Step 4: Impute EXP using the earliest known season for that player
rookie_exp_map = (
    df_regular_all[df_regular_all['EXP'].notna()]
    .groupby('PLAYER_ID')
    .apply(lambda x: x.loc[x['EXP'] == 0, 'Season'].min() if (x['EXP'] == 0).any() else x['Season'].min())
).to_dict()

def impute_exp(row):
    if pd.notna(row['EXP']):
        return row['EXP']
    rookie_season = rookie_exp_map.get(row['PLAYER_ID'])
    if rookie_season is not None:
        return max(0, row['Season'] - rookie_season)
    return np.nan

df_regular_all['EXP'] = df_regular_all.apply(impute_exp, axis=1)

  .apply(lambda x: x.set_index('Season')[['POSITION', 'HEIGHT', 'WEIGHT']])
  .apply(lambda x: x.loc[x['EXP'] == 0, 'Season'].min() if (x['EXP'] == 0).any() else x['Season'].min())


In [None]:
missing_player_df = pd.read_csv('miss_player.csv')
# Ensure clean player names
# Clean names just in case
df_regular_all["PLAYER_NAME"] = df_regular_all["PLAYER_NAME"].str.strip()
missing_player_df["Player Name"] = missing_player_df["Player Name"].str.strip()

# Filter trusted names
trusted_info = missing_player_df[
    missing_player_df["Player Name"].isin(df_regular_all["PLAYER_NAME"].unique())
].copy()

# Columns to impute
cols_to_impute = ["POSITION", "HEIGHT", "WEIGHT", "EXP"]

# Loop through rows in trusted_info
for _, info_row in trusted_info.iterrows():
    name = info_row["Player Name"]
    year_since = info_row["Year Since"]
    
    # Subset of df_regular_all that matches the player and has missing values
    mask = (
        (df_regular_all["PLAYER_NAME"] == name) &
        (
            df_regular_all["POSITION"].isna() |
            df_regular_all["HEIGHT"].isna() |
            df_regular_all["WEIGHT"].isna() |
            df_regular_all["EXP"].isna()
        )
    )
    
    # Fill only missing values
    for col, source_col in zip(["POSITION", "HEIGHT", "WEIGHT"], ["Position", "Height", "Weight"]):
        df_regular_all.loc[mask & df_regular_all[col].isna(), col] = info_row[source_col]
    
    # EXP: calculate from Season - Year Since, only if EXP is missing
    if not pd.isna(year_since):
        df_regular_all.loc[mask & df_regular_all["EXP"].isna(), "EXP"] = (
            df_regular_all.loc[mask & df_regular_all["EXP"].isna(), "Season"] - year_since
        )

# Final EXP check: clip negatives and set rookies to 0
df_regular_all["EXP"] = df_regular_all["EXP"].clip(lower=0)

### Additional Cleaning: Weight and Height

In [None]:
import re

def height_to_inches(x):
    """
    Converts height formats like '6-5', '6'5"', or '6‚Ä≤5‚Ä≥' to inches.
    If already a float or int (i.e., already in inches), returns as-is.
    Idempotent ‚Äî safe to apply multiple times.
    """
    # If already numeric, return it as is
    if isinstance(x, (int, float)) and not np.isnan(x):
        return x

    if pd.isna(x):
        return np.nan

    # Normalize common height formats
    x_str = str(x).strip()
    match = re.match(r"(\d+)[\'‚Ä≤-](\d+)", x_str)
    if match:
        feet, inches = map(int, match.groups())
        return feet * 12 + inches

    return np.nan


df['HEIGHT'] = df['HEIGHT'].apply(height_to_inches).astype('float')

In [None]:
import numpy as np

def parse_weight(value):
    """
    Cleans up weight strings like '180 lbs' or '215 lbs' 
    and converts them to a float (pounds).
    """
    # If it's already missing, return NaN
    if pd.isna(value):
        return np.nan

    # Convert to lowercase string
    val_str = str(value).lower().strip()

    # Remove the substring 'lbs'
    val_str = val_str.replace('lbs', '').strip()

    # Attempt to convert to float
    try:
        return float(val_str)
    except ValueError:
        return np.nan

# Example usage on the entire 'WEIGHT' column
df['WEIGHT'] = df['WEIGHT'].apply(parse_weight)

In [None]:
df.to_csv('data/nba_players_stats_2.csv', index=False, encoding='utf-8-sig')

# NBA Player Stats Column Definitions

| Column | Definition |
|--------|------------|
| `PLAYER_ID` | Unique identifier for the player. |
| `PLAYER_NAME` | Full name of the player. |
| `NICKNAME` | Nickname of the player (often empty or not used). |
| `TEAM_ID` | Unique identifier for the team. |
| `TEAM_ABBREVIATION` | Short abbreviation of the team name (e.g., LAL for Lakers). |
| `AGE` | Player‚Äôs age during the season. |
| `GP` | Games played by the player. |
| `W` | Number of games won when the player participated. |
| `L` | Number of games lost when the player participated. |
| `W_PCT` | Win percentage when the player was active (`W / GP`). |
| `MIN` | Average minutes played per game. |
| `FGM` | Field goals made per game. |
| `FGA` | Field goals attempted per game. |
| `FG_PCT` | Field goal percentage (`FGM / FGA`). |
| `FG3M` | Three-point field goals made per game. |
| `FG3A` | Three-point field goals attempted per game. |
| `FG3_PCT` | Three-point field goal percentage (`FG3M / FG3A`). |
| `FTM` | Free throws made per game. |
| `FTA` | Free throws attempted per game. |
| `FT_PCT` | Free throw percentage (`FTM / FTA`). |
| `OREB` | Offensive rebounds per game. |
| `DREB` | Defensive rebounds per game. |
| `REB` | Total rebounds per game (`OREB + DREB`). |
| `AST` | Assists per game. |
| `TOV` | Turnovers per game. |
| `STL` | Steals per game. |
| `BLK` | Blocks per game. |
| `BLKA` | Blocks against (how many times the player's shot was blocked). |
| `PF` | Personal fouls committed per game. |
| `PFD` | Personal fouls drawn per game (fouls suffered by the player). |
| `PTS` | Points per game. |
| `PLUS_MINUS` | Player‚Äôs net impact on the scoreboard (`team points - opponent points`) while they are on the court. |
| `NBA_FANTASY_PTS` | Fantasy points based on standard NBA fantasy scoring systems. |
| `DD2` | Number of double-doubles (when a player records 10+ in two statistical categories). |
| `TD3` | Number of triple-doubles (when a player records 10+ in three statistical categories). |
| `WNBA_FANTASY_PTS` | Likely a placeholder, as this dataset is NBA-only. |
| `GP_RANK` | Rank among all players for games played. |
| `W_RANK` | Rank among all players for wins. |
| `L_RANK` | Rank among all players for losses. |
| `W_PCT_RANK` | Rank among all players for win percentage. |
| `MIN_RANK` | Rank among all players for minutes played. |
| `FGM_RANK` | Rank among all players for field goals made. |
| `FGA_RANK` | Rank among all players for field goals attempted. |
| `FG_PCT_RANK` | Rank among all players for field goal percentage. |
| `FG3M_RANK` | Rank among all players for three-point field goals made. |
| `FG3A_RANK` | Rank among all players for three-point field goals attempted. |
| `FG3_PCT_RANK` | Rank among all players for three-point percentage. |
| `FTM_RANK` | Rank among all players for free throws made. |
| `FTA_RANK` | Rank among all players for free throws attempted. |
| `FT_PCT_RANK` | Rank among all players for free throw percentage. |
| `OREB_RANK` | Rank among all players for offensive rebounds. |
| `DREB_RANK` | Rank among all players for defensive rebounds. |
| `REB_RANK` | Rank among all players for total rebounds. |
| `AST_RANK` | Rank among all players for assists. |
| `TOV_RANK` | Rank among all players for turnovers. |
| `STL_RANK` | Rank among all players for steals. |
| `BLK_RANK` | Rank among all players for blocks. |
| `BLKA_RANK` | Rank among all players for blocked attempts. |
| `PF_RANK` | Rank among all players for personal fouls committed. |
| `PFD_RANK` | Rank among all players for personal fouls drawn. |
| `PTS_RANK` | Rank among all players for points scored. |
| `PLUS_MINUS_RANK` | Rank among all players for plus/minus impact. |
| `NBA_FANTASY_PTS_RANK` | Rank among all players for fantasy points. |
| `DD2_RANK` | Rank among all players for double-doubles. |
| `TD3_RANK` | Rank among all players for triple-doubles. |
| `WNBA_FANTASY_PTS_RANK` | Placeholder for WNBA stats (likely unused in this dataset). |
| `Season Type` | Type of season (`Regular Season`, `Playoffs`, etc.). |

## üìä Undocumented NBA Stats Column Definitions

| Column | Definition |
|--------|------------|
| `E_OFF_RATING` | Estimated Offensive Rating: A version of offensive rating that estimates points produced per 100 possessions using adjusted metrics. |
| `OFF_RATING` | Offensive Rating: Points produced per 100 possessions while the player is on the court. |
| `sp_work_OFF_RATING` | A special/internal calculation of offensive rating used in SportsVu or internal tools‚Äîtypically close to OFF_RATING. Often for testing/validation. |
| `E_DEF_RATING` | Estimated Defensive Rating: An estimate of points allowed per 100 possessions using adjusted formulas. |
| `DEF_RATING` | Defensive Rating: Points allowed per 100 possessions while the player is on the court. |
| `sp_work_DEF_RATING` | Internal variant of DEF_RATING based on tracking data. |
| `E_NET_RATING` | Estimated Net Rating: The difference between `E_OFF_RATING` and `E_DEF_RATING`. |
| `NET_RATING` | Net Rating: The difference between `OFF_RATING` and `DEF_RATING`. Higher = better. |
| `sp_work_NET_RATING` | Internal net rating calculation using `sp_work_` stats. |
| `AST_PCT` | Assist Percentage: % of teammate field goals assisted by the player while on the court. |
| `AST_TO` | Assist-to-Turnover Ratio: Assists divided by turnovers (`AST / TOV`). |
| `AST_RATIO` | Assist Ratio: Assists per 100 possessions. |
| `OREB_PCT` | Offensive Rebound Percentage: % of available offensive rebounds grabbed. |
| `DREB_PCT` | Defensive Rebound Percentage: % of available defensive rebounds grabbed. |
| `REB_PCT` | Total Rebound Percentage: % of available rebounds (offensive + defensive) grabbed. |
| `TM_TOV_PCT` | Team Turnover Percentage: Estimate of turnovers committed per 100 team possessions while the player is on the court. |
| `E_TOV_PCT` | Estimated Turnover Percentage: Estimate of player‚Äôs turnover rate using adjusted methods. |
| `EFG_PCT` | Effective Field Goal Percentage: Adjusts FG% to account for 3-point shots being worth more (`(FGM + 0.5 * 3PM) / FGA`). |
| `TS_PCT` | True Shooting Percentage: Shooting efficiency including FG, 3PT, and FT (`PTS / (2 * (FGA + 0.44 * FTA))`). |
| `USG_PCT` | Usage Percentage: % of team plays used by the player while on court. | Basically, How often is this guy finishing the play when he‚Äôs out there?‚Äù
| `E_USG_PCT` | Estimated Usage Percentage: Adjusted version of USG_PCT, possibly using tracking data. |
| `E_PACE` | Estimated Pace: Estimate of possessions per 48 minutes, team-adjusted. |
| `PACE` | Team Pace: Estimate of possessions per 48 minutes while the player is on the court. |
| `PACE_PER40` | Projected possessions per 40 minutes, used for comparative purposes. |
| `sp_work_PACE` | Internal pace metric, often based on player tracking systems. |
| `PIE` | Player Impact Estimate: NBA's holistic player impact metric showing overall contribution to team success. |
| `POSS` | Estimated number of possessions the player was on the floor for. Estimated Possessions = FGA + (0.44 √ó FTA) - ORB + TO  Possessions measure how often a team or player is involved in plays| 
| `FGM_PG` | Field Goals Made per Game. |
| `FGA_PG` | Field Goals Attempted per Game. |
| `_RANK` Columns (e.g., `AST_PCT_RANK`) | Rank of the player in that metric compared to other players in the dataset. |

| Feature Name      | Description                                                                 |
|-------------------|-----------------------------------------------------------------------------|
| `HEIGHT`          | Player's height, usually in feet-inches format (e.g., "6-7").                |
| `WEIGHT`          | Player's weight in pounds.                                                   |
| `POSITION`        | Player‚Äôs official NBA position (e.g., "G", "F", "C", "G-F", etc.).           |
| `AGE`             | Age of the player during the season.                                         |
| `EXP`             | Years of NBA experience (0 = rookie).                                        |