In [1]:
import pandas as pd

In [None]:
nba_1982_to_2022 = pd.read_csv("NBA_Dataset.csv")
per_game_2023 = pd.read_csv("nba_per_game_2023.csv")
adv_2023 = pd.read_csv("nba_adv_2023.csv")
per_game_2024 = pd.read_csv("2023-2024_NBA_per_game.csv")
adv_2024 = pd.read_csv("2023-2024_NBA.csv")

In [None]:
per_game_2023.drop(columns=['Player-additional'], inplace=True)
adv_2023.drop(columns=['Player-additional'], inplace=True)
per_game_2024.drop(columns=['Player-additional'], inplace=True)
adv_2024.drop(columns=['Player-additional'], inplace=True)
adv_2024.drop(columns=adv_2024.columns[24], inplace=True)
adv_2024.drop(columns=adv_2024.columns[19], inplace=True)
nba_1982_to_2022 = nba_1982_to_2022.fillna(0.0)
per_game_2023 = per_game_2023.fillna(0.0)
adv_2023 = adv_2023.fillna(0.0)
per_game_2024 = per_game_2024.fillna(0.0)
adv_2024 = adv_2024.fillna(0.0)

In [None]:
per_game_mappings = {"MP":"mp_per_g", "FG":"fg_per_g", "FGA":"fga_per_g", "FG%":"fg_pct", "3P":"fg3_per_g",
                     "3PA":"fg3a_per_g", "3P%":"fg3_pct", "2P":"fg2_per_g", "2PA":"fg2a_per_g", "2P%":"fg2_pct",
                     "eFG%":"efg_pct", "FT":"ft_per_g", "FTA":"fta_per_g", "FT%":"ft_pct", "ORB":"orb_per_g",
                     "DRB":"drb_per_g", "TRB":"trb_per_g", "AST":"ast_per_g", "STL":"stl_per_g", "BLK":"blk_per_g",
                     "TOV":"tov_per_g", "PF":"pf_per_g", "PTS":"pts_per_g", "Player":"player", "Pos":"pos",
                     "Age":"age", "Tm":"team_id", "G":"g", "GS":"gs"}
advanced_mappings = {"Player":"player", "Pos":"pos", "Age":"age", "Tm":"team_id",
            "G":"g", "GS":"gs" ,"MP":"mp", "PER":"per", "TS%":"ts_pct", "3PAr":"fg3a_per_fga_pct", "FTr":"fta_per_fga_pct",
            "ORB%":"orb_pct", "DRB%":"drb_pct", "TRB%":"trb_pct", "AST%":"ast_pct", "STL%":"stl_pct", "BLK%":"blk_pct",
            "TOV%":"tov_pct", "USG%":"usg_pct", "OWS":"ows", "DWS":"dws", "WS":"ws", "WS/48":"ws_per_48", "OBPM":"obpm",
            "DBPM":"dbpm", "BPM":"bpm", "VORP":"vorp"}

In [None]:
per_game_2023 = per_game_2023.rename(columns=per_game_mappings)
per_game_2024 = per_game_2024.rename(columns=per_game_mappings)
adv_2023 = adv_2023.rename(columns=advanced_mappings)
adv_2024 = adv_2024.rename(columns=advanced_mappings)

In [None]:
data_2023 = pd.merge(per_game_2023, adv_2023, how='left')
data_2024 = pd.merge(per_game_2024, adv_2024, how='left')

In [None]:
data_2023['season'] = 2023
data_2024['season'] = 2024

In [None]:
player_counts = data_2023.groupby('player').size()

multi_row_players = player_counts[player_counts > 1].index

multi_row_tot_rows = data_2023[(data_2023['player'].isin(multi_row_players)) & (data_2023['team_id'] == 'TOT')]

single_row_players = player_counts[player_counts == 1].index
single_row_data = data_2023[data_2023['player'].isin(single_row_players)]

data_2023 = pd.concat([multi_row_tot_rows, single_row_data])

In [None]:
player_counts = data_2024.groupby('player').size()

multi_row_players = player_counts[player_counts > 1].index

multi_row_tot_rows = data_2024[(data_2024['player'].isin(multi_row_players)) & (data_2024['team_id'] == 'TOT')]

single_row_players = player_counts[player_counts == 1].index
single_row_data = data_2024[data_2024['player'].isin(single_row_players)]

data_2024 = pd.concat([multi_row_tot_rows, single_row_data])

In [None]:
data_2023_and_2024 = pd.concat([data_2023, data_2024], ignore_index=True)

In [None]:
data_2023_and_2024.drop(columns=['Rk'], inplace=True)

In [None]:
data_2023_and_2024 = data_2023_and_2024.fillna(0.0)

In [None]:
nba_data = pd.merge(data_2023_and_2024, nba_1982_to_2022, how='outer')

In [None]:
min_season_dict = nba_data.groupby('player')['season'].min().to_dict()

def calculate_experience(row):
    return row['season'] - min_season_dict[row['player']] + 1

nba_data['experience'] = nba_data.apply(calculate_experience, axis=1)

In [None]:
complete_data_after_2000 = nba_data[nba_data['season'] >= 2000]

In [None]:
complete_data_after_2000.to_csv("complete_data_after_2000.csv")