In [1]:
import pandas as pd
import numpy as np

In [4]:
# get all stars df
allstars = pd.read_csv('./final_data/allstars.csv')

# AS column to indentify if a player was an all star in respective season
allstars['AS'] = ''
allstars = allstars.rename(columns={'Player': 'PLAYER_NAME', 'Years' : 'SEASON'})
allstars['SEASON'] = allstars['SEASON'].apply(lambda x: str(x))
allstars        

Unnamed: 0,PLAYER_NAME,SEASON,AS
0,John Stockton,2000,
1,John Stockton,1996,
2,John Stockton,1997,
3,Reggie Miller,2000,
4,Reggie Miller,1996,
...,...,...,...
697,Anthony Edwards,2023,
698,Jaren Jackson Jr.,2023,
699,Tyrese Haliburton,2023,
700,De'Aaron Fox,2023,


In [5]:
# get final player stats df
stats = pd.read_csv('./final_data/final_player_stats.csv')

# convert season to the year that the all star game took place in the season (1996-97 --> 1997)
stats['SEASON'] = stats['SEASON'].str[:2] + stats['SEASON'].str[5:7]
stats['SEASON'] = stats['SEASON'].replace('1900', '2000')
stats

Unnamed: 0.1,Unnamed: 0,SEASON,PLAYER_ID,PLAYER_NAME,TEAM_ID,TEAM_ABBREVIATION,GP,MIN,PTS,REB,AST,STL,BLK,TOV,FG3M,TS_PCT,USG_PCT,PIE,PER,WS
0,0,1997,920,A.C. Green,1610612742,DAL,45,1027.570000,253,267,35,25,3,31,1,0.497385,0.119885,0.097000,13.987283,1.901209
1,1,1997,243,Aaron McKie,1610612765,DET,47,886.238333,208,107,96,40,17,51,24,0.412417,0.123583,0.102542,13.323805,1.495907
2,2,1997,1425,Aaron Williams,1610612763,VAN,4,38.856667,18,16,1,1,5,5,0,0.395750,0.117571,0.147500,25.123179,0.088643
3,3,1997,768,Acie Earl,1610612749,MIL,36,447.558333,154,83,18,12,25,33,0,0.261850,0.110455,0.005550,12.554787,0.392317
4,4,1997,228,Adam Keefe,1610612762,UTA,43,629.941667,153,155,26,22,8,31,0,0.421500,0.089367,0.067875,14.151856,1.359562
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12219,12219,2023,1628380,Zach Collins,1610612759,SAS,48,1015.523333,481,283,126,21,30,92,29,0.696929,0.188714,0.096500,17.892393,2.526414
12220,12220,2023,203897,Zach LaVine,1610612741,CHI,55,1981.761667,1319,269,224,54,11,145,150,0.556048,0.268333,0.099714,20.038261,6.650431
12221,12221,2023,1630192,Zeke Nnaji,1610612743,DEN,44,583.638333,223,102,11,15,19,20,13,0.437267,0.149632,0.097000,15.245232,1.618647
12222,12222,2023,1630533,Ziaire Williams,1610612763,MEM,28,486.075000,167,69,24,12,5,29,20,0.350667,0.121429,0.048167,7.382890,0.157676


In [6]:
# merge allstars with player stats to get only all stars, make AS == 1 for all rows
AS_stats = pd.merge(allstars, stats, on = ['PLAYER_NAME', 'SEASON'])
AS_stats['AS'] = 1
AS_stats

Unnamed: 0.1,PLAYER_NAME,SEASON,AS,Unnamed: 0,PLAYER_ID,TEAM_ID,TEAM_ABBREVIATION,GP,MIN,PTS,...,AST,STL,BLK,TOV,FG3M,TS_PCT,USG_PCT,PIE,PER,WS
0,John Stockton,2000,1,1456,304,1610612762,UTA,47,1343.430000,563,...,403,75,8,110,23,0.578560,0.191560,0.154720,27.023188,7.543398
1,John Stockton,1997,1,228,304,1610612762,UTA,47,1696.560000,661,...,488,94,6,144,53,0.658625,0.180875,0.164125,24.881897,9.737464
2,Reggie Miller,2000,1,1561,397,1610612754,IND,48,1792.980000,924,...,112,52,17,78,105,0.627917,0.190333,0.106333,21.931338,8.585097
3,Reggie Miller,1998,1,749,397,1610612754,IND,81,2795.630000,1578,...,171,78,11,128,164,0.616531,0.245656,0.126344,23.079726,14.954412
4,Hakeem Olajuwon,1997,1,184,165,1610612745,HOU,44,1609.386667,1068,...,129,63,95,161,4,0.536286,0.302767,0.146536,27.811006,6.888368
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
670,Anthony Edwards,2023,1,11759,1630162,1610612750,MIN,61,2210.935000,1518,...,274,100,38,205,162,0.567375,0.275313,0.122125,19.982443,6.796596
671,Jaren Jackson Jr.,2023,1,11942,1628991,1610612763,MEM,40,1088.261667,677,...,35,44,131,63,62,0.606385,0.224846,0.118615,24.884790,3.532671
672,Tyrese Haliburton,2023,1,12198,1630169,1610612754,IND,48,1604.820000,955,...,483,84,19,126,138,0.591700,0.241800,0.151450,26.089906,8.708077
673,De'Aaron Fox,2023,1,11833,1628368,1610612758,SAC,52,1769.668333,1291,...,322,55,18,138,80,0.578063,0.268937,0.129938,23.534423,8.125302


In [7]:
# join dataframes together, drop duplicates, replace NaN in AS column with 0
final_df = pd.concat([stats, AS_stats], ignore_index=True)
final_df = final_df.drop_duplicates(subset=['PLAYER_NAME', 'SEASON'], keep='last')
final_df['AS'] = final_df['AS'].fillna(0, inplace=False)
final_df

Unnamed: 0.1,Unnamed: 0,SEASON,PLAYER_ID,PLAYER_NAME,TEAM_ID,TEAM_ABBREVIATION,GP,MIN,PTS,REB,...,STL,BLK,TOV,FG3M,TS_PCT,USG_PCT,PIE,PER,WS,AS
0,0,1997,920,A.C. Green,1610612742,DAL,45,1027.570000,253,267,...,25,3,31,1,0.497385,0.119885,0.097000,13.987283,1.901209,0.0
1,1,1997,243,Aaron McKie,1610612765,DET,47,886.238333,208,107,...,40,17,51,24,0.412417,0.123583,0.102542,13.323805,1.495907,0.0
2,2,1997,1425,Aaron Williams,1610612763,VAN,4,38.856667,18,16,...,1,5,5,0,0.395750,0.117571,0.147500,25.123179,0.088643,0.0
3,3,1997,768,Acie Earl,1610612749,MIL,36,447.558333,154,83,...,12,25,33,0,0.261850,0.110455,0.005550,12.554787,0.392317,0.0
4,4,1997,228,Adam Keefe,1610612762,UTA,43,629.941667,153,155,...,22,8,31,0,0.421500,0.089367,0.067875,14.151856,1.359562,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12894,11759,2023,1630162,Anthony Edwards,1610612750,MIN,61,2210.935000,1518,360,...,100,38,205,162,0.567375,0.275313,0.122125,19.982443,6.796596,1.0
12895,11942,2023,1628991,Jaren Jackson Jr.,1610612763,MEM,40,1088.261667,677,267,...,44,131,63,62,0.606385,0.224846,0.118615,24.884790,3.532671,1.0
12896,12198,2023,1630169,Tyrese Haliburton,1610612754,IND,48,1604.820000,955,181,...,84,19,126,138,0.591700,0.241800,0.151450,26.089906,8.708077,1.0
12897,11833,2023,1628368,De'Aaron Fox,1610612758,SAC,52,1769.668333,1291,223,...,55,18,138,80,0.578063,0.268937,0.129938,23.534423,8.125302,1.0


In [9]:
# save as CSV
final_df.to_csv('./final_data/final_data.csv')