In [3]:
import pandas as pd

## College Data

Read in scraped college data

In [4]:
df = pd.read_csv("../data/interim/college_player_stats_00-24.csv")
df

Unnamed: 0,Rk,Player,PTS/G,Draft Team,Round,Pick,Draft Year,Draft College,Season,Team,...,PTS,FG%,2P%,3P%,FT%,TS%,eFG%,Pos,Class,-9999
0,1,Jimmer Fredette,28.9,MIL,1,10,2011,Brigham Young,2010-11,Brigham Young,...,28.9,0.452,0.491,0.396,0.894,0.594,0.533,G,SR,jimmer-fredette-1
1,2,Adam Morrison,28.1,CHA,1,3,2006,Gonzaga,2005-06,Gonzaga,...,28.1,0.496,0.523,0.428,0.772,0.605,0.556,F,JR,adam-morrison-1
2,3,Trae Young,27.4,DAL,1,5,2018,Oklahoma,2017-18,Oklahoma,...,27.4,0.422,0.493,0.360,0.861,0.585,0.518,G,FR,trae-young-1
3,4,J.J. Redick,26.8,ORL,1,11,2006,Duke,2005-06,Duke,...,26.8,0.470,0.521,0.421,0.863,0.630,0.578,G,SR,jj-redick-1
4,5,Doug McDermott,26.7,DEN,1,11,2014,Creighton,2013-14,Creighton,...,26.7,0.526,0.567,0.449,0.864,0.644,0.603,F,SR,doug-mcdermott-1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1595,1596,Vernon Macklin,2.9,DET,2,52,2011,Florida,2006-07,Georgetown,...,2.9,0.741,0.741,,0.435,0.693,0.741,F,FR,vernon-macklin-1
1596,1597,Andy Rautins,2.9,NYK,2,38,2010,Syracuse,2005-06,Syracuse,...,2.9,0.377,0.714,0.326,0.500,0.519,0.519,G,FR,andy-rautins-1
1597,1598,Marcus Williams,2.9,NJN,1,22,2006,Connecticut,2003-04,Connecticut,...,2.9,0.370,0.483,0.176,0.692,0.441,0.402,G,FR,marcus-williams-1
1598,1599,Moritz Wagner,2.9,LAL,1,25,2018,Michigan,2015-16,Michigan,...,2.9,0.607,0.714,0.167,0.556,0.618,0.623,F,FR,moritz-wagner-1


In [5]:
df.columns

Index(['Rk', 'Player', 'PTS/G', 'Draft Team', 'Round', 'Pick', 'Draft Year',
       'Draft College', 'Season', 'Team', 'G', 'GS', 'MP', 'FG', 'FGA', '2P',
       '2PA', '3P', '3PA', 'FT', 'FTA', 'ORB', 'DRB', 'TRB', 'AST', 'STL',
       'BLK', 'TOV', 'PF', 'PTS', 'FG%', '2P%', '3P%', 'FT%', 'TS%', 'eFG%',
       'Pos', 'Class', '-9999'],
      dtype='object')

Remove unneeded columns

In [6]:
df = df.drop(['PTS/G', 'Draft Team', 'Round', 'Pick', 'Draft Year', 'Draft College','Rk', '-9999'], axis=1)
df.columns

Index(['Player', 'Season', 'Team', 'G', 'GS', 'MP', 'FG', 'FGA', '2P', '2PA',
       '3P', '3PA', 'FT', 'FTA', 'ORB', 'DRB', 'TRB', 'AST', 'STL', 'BLK',
       'TOV', 'PF', 'PTS', 'FG%', '2P%', '3P%', 'FT%', 'TS%', 'eFG%', 'Pos',
       'Class'],
      dtype='object')

Convert Position into 1 hot encodings

In [7]:
pos_dummies = pd.get_dummies(df['Pos']).astype(int)
pos_dummies

Unnamed: 0,C,F,G
0,0,0,1
1,0,1,0
2,0,0,1
3,0,0,1
4,0,1,0
...,...,...,...
1595,0,1,0
1596,0,0,1
1597,0,0,1
1598,0,1,0


In [8]:
df["Pos_G"] = pos_dummies["G"]
df["Pos_F"] = pos_dummies["F"]
df["Pos_C"] = pos_dummies["C"]
df = df.drop('Pos', axis=1)
df.columns

Index(['Player', 'Season', 'Team', 'G', 'GS', 'MP', 'FG', 'FGA', '2P', '2PA',
       '3P', '3PA', 'FT', 'FTA', 'ORB', 'DRB', 'TRB', 'AST', 'STL', 'BLK',
       'TOV', 'PF', 'PTS', 'FG%', '2P%', '3P%', 'FT%', 'TS%', 'eFG%', 'Class',
       'Pos_G', 'Pos_F', 'Pos_C'],
      dtype='object')

Features to be used for model

In [9]:
features = ['Player', 'Season', 'Team', '2P',
       '3P', 'FT','ORB', 'DRB', 'AST', 'STL', 'BLK',
       'TOV', 'PF', 'PTS', 'FG%', "Pos_G", "Pos_F", "Pos_C"]

In [10]:
df[features]

Unnamed: 0,Player,Season,Team,2P,3P,FT,ORB,DRB,AST,STL,BLK,TOV,PF,PTS,FG%,Pos_G,Pos_F,Pos_C
0,Jimmer Fredette,2010-11,Brigham Young,6.0,3.4,6.8,0.6,2.8,4.3,1.3,0.0,3.5,1.3,28.9,0.452,1,0,0
1,Adam Morrison,2005-06,Gonzaga,7.0,2.2,7.3,1.4,4.1,1.7,1.1,0.3,2.3,2.0,28.1,0.496,0,1,0
2,Trae Young,2017-18,Oklahoma,4.5,3.7,7.4,0.4,3.5,8.7,1.7,0.3,5.2,1.8,27.4,0.422,1,0,0
3,J.J. Redick,2005-06,Duke,4.5,3.9,6.1,0.2,1.8,2.6,1.4,0.1,2.5,1.5,26.8,0.470,1,0,0
4,Doug McDermott,2013-14,Creighton,6.7,2.7,5.1,1.6,5.3,1.6,0.2,0.1,1.8,1.9,26.7,0.526,0,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1595,Vernon Macklin,2006-07,Georgetown,1.3,0.0,0.3,0.7,0.7,0.5,0.0,0.1,0.5,1.0,2.9,0.741,0,1,0
1596,Andy Rautins,2005-06,Syracuse,0.3,0.8,0.2,0.3,0.6,0.7,0.3,0.1,0.2,1.2,2.9,0.377,1,0,0
1597,Marcus Williams,2003-04,Connecticut,0.9,0.2,0.6,0.4,0.9,4.3,1.0,0.1,2.4,1.8,2.9,0.370,1,0,0
1598,Moritz Wagner,2015-16,Michigan,1.2,0.1,0.3,0.7,0.9,0.1,0.2,0.2,0.5,1.6,2.9,0.607,0,1,0


In [11]:
temp = df[features].drop(['Season', 'Team'], axis=1)
temp

Unnamed: 0,Player,2P,3P,FT,ORB,DRB,AST,STL,BLK,TOV,PF,PTS,FG%,Pos_G,Pos_F,Pos_C
0,Jimmer Fredette,6.0,3.4,6.8,0.6,2.8,4.3,1.3,0.0,3.5,1.3,28.9,0.452,1,0,0
1,Adam Morrison,7.0,2.2,7.3,1.4,4.1,1.7,1.1,0.3,2.3,2.0,28.1,0.496,0,1,0
2,Trae Young,4.5,3.7,7.4,0.4,3.5,8.7,1.7,0.3,5.2,1.8,27.4,0.422,1,0,0
3,J.J. Redick,4.5,3.9,6.1,0.2,1.8,2.6,1.4,0.1,2.5,1.5,26.8,0.470,1,0,0
4,Doug McDermott,6.7,2.7,5.1,1.6,5.3,1.6,0.2,0.1,1.8,1.9,26.7,0.526,0,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1595,Vernon Macklin,1.3,0.0,0.3,0.7,0.7,0.5,0.0,0.1,0.5,1.0,2.9,0.741,0,1,0
1596,Andy Rautins,0.3,0.8,0.2,0.3,0.6,0.7,0.3,0.1,0.2,1.2,2.9,0.377,1,0,0
1597,Marcus Williams,0.9,0.2,0.6,0.4,0.9,4.3,1.0,0.1,2.4,1.8,2.9,0.370,1,0,0
1598,Moritz Wagner,1.2,0.1,0.3,0.7,0.9,0.1,0.2,0.2,0.5,1.6,2.9,0.607,0,1,0


Combine Seasons to get college averages across years for player stats

In [12]:
col_career_avgs = temp.groupby('Player').mean().reset_index()
col_career_avgs

Unnamed: 0,Player,2P,3P,FT,ORB,DRB,AST,STL,BLK,TOV,PF,PTS,FG%,Pos_G,Pos_F,Pos_C
0,A.J. Hammons,5.2,0.10,2.70,2.35,5.00,1.00,0.4,2.65,2.05,2.50,13.45,0.5660,0.0,0.0,1.0
1,A.J. Price,2.9,1.95,2.95,0.70,2.85,5.25,1.0,0.05,2.55,1.45,14.60,0.4225,1.0,0.0,0.0
2,AJ Griffin,1.9,1.80,1.10,0.80,3.20,1.00,0.5,0.60,0.60,1.10,10.40,0.4930,0.0,1.0,0.0
3,Aaron Brooks,3.8,2.30,3.30,0.90,3.40,4.30,1.4,0.20,2.50,2.50,17.70,0.4600,1.0,0.0,0.0
4,Aaron Gordon,4.6,0.40,2.00,2.70,5.30,2.00,0.9,1.00,1.40,2.40,12.40,0.4950,0.0,1.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
853,Zach Collins,3.2,0.30,2.80,1.70,4.20,0.40,0.5,1.80,1.50,2.70,10.00,0.6520,0.0,1.0,0.0
854,Zach LaVine,2.1,1.30,1.30,0.50,2.10,1.80,0.9,0.20,1.10,2.00,9.40,0.4410,1.0,0.0,0.0
855,Zach Randolph,4.2,0.00,2.40,3.10,3.60,1.00,0.7,0.70,1.60,1.80,10.80,0.5870,0.0,0.0,1.0
856,Zhaire Smith,3.7,0.50,2.50,2.20,2.80,1.80,1.1,1.10,1.10,1.80,11.30,0.5560,1.0,0.0,0.0


Reduce college data to players that we have NBA data on

In [13]:
temp = pd.read_csv("../data/processed/player_data_clean.csv")
p_in_db = list(temp['player'].unique())
col_career_avgs = col_career_avgs[col_career_avgs["Player"].isin(p_in_db)]
col_career_avgs

Unnamed: 0,Player,2P,3P,FT,ORB,DRB,AST,STL,BLK,TOV,PF,PTS,FG%,Pos_G,Pos_F,Pos_C
2,AJ Griffin,1.90,1.8,1.100000,0.800000,3.200000,1.0,0.5,0.600000,0.600000,1.100000,10.4,0.493000,0.0,1.0,0.0
3,Aaron Brooks,3.80,2.3,3.300000,0.900000,3.400000,4.3,1.4,0.200000,2.500000,2.500000,17.7,0.460000,1.0,0.0,0.0
4,Aaron Gordon,4.60,0.4,2.000000,2.700000,5.300000,2.0,0.9,1.000000,1.400000,2.400000,12.4,0.495000,0.0,1.0,0.0
5,Aaron Gray,4.20,0.0,2.366667,2.733333,4.833333,1.4,0.4,1.266667,1.733333,2.266667,10.7,0.555667,0.0,0.0,1.0
6,Aaron Holiday,3.25,2.1,3.600000,0.400000,2.850000,5.1,1.2,0.200000,3.100000,2.450000,16.3,0.473000,1.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
850,Xavier Henry,2.60,1.9,2.500000,1.200000,3.200000,1.5,1.5,0.500000,1.900000,1.800000,13.4,0.458000,1.0,0.0,0.0
853,Zach Collins,3.20,0.3,2.800000,1.700000,4.200000,0.4,0.5,1.800000,1.500000,2.700000,10.0,0.652000,0.0,1.0,0.0
854,Zach LaVine,2.10,1.3,1.300000,0.500000,2.100000,1.8,0.9,0.200000,1.100000,2.000000,9.4,0.441000,1.0,0.0,0.0
855,Zach Randolph,4.20,0.0,2.400000,3.100000,3.600000,1.0,0.7,0.700000,1.600000,1.800000,10.8,0.587000,0.0,0.0,1.0


## Prospects

Loading in list of draftable players

In [14]:
prospects = pd.read_csv("../data/interim/draft_prospects.csv", encoding='ISO-8859-1')
prospects

Unnamed: 0,NAME,POS,AGE,SCHOOL/CLUB,HEIGHT,WEIGHT,STATUS,COUNTRY
0,Melvin Ajinca,F,20,Saint-Quentin Basketball (France),45451,218,International,France
1,Trey Alexander,G,21,Creighton,45446,187,Junior,USA
2,Reece Beekman,G,22,Virginia,45444,196,Senior,USA
3,Adem Bona,C,21,UCLA,45451,243,Sophomore,Nigeria
4,Jalen Bridges,G,23,Baylor,45450,213,Senior,USA
...,...,...,...,...,...,...,...,...
73,Kel'el Ware,C,20,Indiana,36708,230,Sophomore,USA
74,Anton Watson,F,23,Gonzaga,45451,230,Senior,USA
75,Jaylen Wells,F,20,Washington State,45450,206,Junior,USA
76,Cody Williams,F,19,Colorado,45450,178,Freshman,USA


Select prospect stats from college data

In [15]:
prospect_stats = col_career_avgs[col_career_avgs['Player'].isin(prospects['NAME'])]

In [16]:
# prospect_stats.to_csv("../data/interim/prospect_stats_needsIntl.csv", index=False)

Manually loading data from internet on International students

In [17]:
prospect_stats = pd.read_csv("../data/interim/prospect_stats_w_Intl.csv")
prospect_stats

Unnamed: 0,Player,2P,3P,FT,ORB,DRB,AST,STL,BLK,TOV,PF,PTS,FG%,Pos_G,Pos_F,Pos_C
0,Adem Bona,3.2,0.0,1.4,2.2,3.0,0.7,0.6,1.7,1.3,3.3,7.7,0.675,0,1,0
1,Ajay Mitchell,5.1,0.6,4.5,0.3,2.4,5.1,1.3,0.3,2.2,2.0,16.3,0.506,1,0,0
2,Anton Watson,3.625,0.325,1.775,1.725,3.6,2.025,1.45,0.575,1.15,2.35,9.95,0.58875,0,1,0
3,Antonio Reeves,3.6,2.45,2.75,0.55,2.55,1.35,0.55,0.2,1.35,1.95,17.3,0.464,1,0,0
4,Baylor Scheierman,2.766667,2.633333,2.433333,0.7,7.633333,3.9,1.066667,0.133333,2.1,1.933333,15.833333,0.46,1,0,0
5,Blake Hinson,1.8,2.05,1.9,1.05,3.4,1.15,0.55,0.4,1.3,2.85,11.8,0.417,1,0,0
6,Cam Spencer,2.4,2.5,2.1,1.1,3.8,3.6,1.5,0.3,1.0,2.2,14.3,0.484,1,0,0
7,Cody Williams,3.8,0.7,2.3,0.7,2.3,1.6,0.6,0.7,2.0,2.0,11.9,0.552,0,1,0
8,DaRon Holmes II,5.6,1.0,6.2,1.7,6.8,2.6,0.9,2.1,2.2,2.0,20.4,0.544,0,1,0
9,Dalton Knecht,4.8,2.6,4.4,0.8,4.1,1.8,0.7,0.6,1.7,1.8,21.7,0.458,1,0,0


Remove prospects from college dataset

In [18]:
player_stats_wout_prospects = col_career_avgs[~col_career_avgs['Player'].isin(prospects['NAME'])]
player_stats_wout_prospects

Unnamed: 0,Player,2P,3P,FT,ORB,DRB,AST,STL,BLK,TOV,PF,PTS,FG%,Pos_G,Pos_F,Pos_C
2,AJ Griffin,1.90,1.8,1.100000,0.800000,3.200000,1.0,0.5,0.600000,0.600000,1.100000,10.4,0.493000,0.0,1.0,0.0
3,Aaron Brooks,3.80,2.3,3.300000,0.900000,3.400000,4.3,1.4,0.200000,2.500000,2.500000,17.7,0.460000,1.0,0.0,0.0
4,Aaron Gordon,4.60,0.4,2.000000,2.700000,5.300000,2.0,0.9,1.000000,1.400000,2.400000,12.4,0.495000,0.0,1.0,0.0
5,Aaron Gray,4.20,0.0,2.366667,2.733333,4.833333,1.4,0.4,1.266667,1.733333,2.266667,10.7,0.555667,0.0,0.0,1.0
6,Aaron Holiday,3.25,2.1,3.600000,0.400000,2.850000,5.1,1.2,0.200000,3.100000,2.450000,16.3,0.473000,1.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
850,Xavier Henry,2.60,1.9,2.500000,1.200000,3.200000,1.5,1.5,0.500000,1.900000,1.800000,13.4,0.458000,1.0,0.0,0.0
853,Zach Collins,3.20,0.3,2.800000,1.700000,4.200000,0.4,0.5,1.800000,1.500000,2.700000,10.0,0.652000,0.0,1.0,0.0
854,Zach LaVine,2.10,1.3,1.300000,0.500000,2.100000,1.8,0.9,0.200000,1.100000,2.000000,9.4,0.441000,1.0,0.0,0.0
855,Zach Randolph,4.20,0.0,2.400000,3.100000,3.600000,1.0,0.7,0.700000,1.600000,1.800000,10.8,0.587000,0.0,0.0,1.0


In [19]:
player_stats_wout_prospects = player_stats_wout_prospects.reset_index(drop=True)
player_stats_wout_prospects

Unnamed: 0,Player,2P,3P,FT,ORB,DRB,AST,STL,BLK,TOV,PF,PTS,FG%,Pos_G,Pos_F,Pos_C
0,AJ Griffin,1.90,1.8,1.100000,0.800000,3.200000,1.0,0.5,0.600000,0.600000,1.100000,10.4,0.493000,0.0,1.0,0.0
1,Aaron Brooks,3.80,2.3,3.300000,0.900000,3.400000,4.3,1.4,0.200000,2.500000,2.500000,17.7,0.460000,1.0,0.0,0.0
2,Aaron Gordon,4.60,0.4,2.000000,2.700000,5.300000,2.0,0.9,1.000000,1.400000,2.400000,12.4,0.495000,0.0,1.0,0.0
3,Aaron Gray,4.20,0.0,2.366667,2.733333,4.833333,1.4,0.4,1.266667,1.733333,2.266667,10.7,0.555667,0.0,0.0,1.0
4,Aaron Holiday,3.25,2.1,3.600000,0.400000,2.850000,5.1,1.2,0.200000,3.100000,2.450000,16.3,0.473000,1.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
566,Xavier Henry,2.60,1.9,2.500000,1.200000,3.200000,1.5,1.5,0.500000,1.900000,1.800000,13.4,0.458000,1.0,0.0,0.0
567,Zach Collins,3.20,0.3,2.800000,1.700000,4.200000,0.4,0.5,1.800000,1.500000,2.700000,10.0,0.652000,0.0,1.0,0.0
568,Zach LaVine,2.10,1.3,1.300000,0.500000,2.100000,1.8,0.9,0.200000,1.100000,2.000000,9.4,0.441000,1.0,0.0,0.0
569,Zach Randolph,4.20,0.0,2.400000,3.100000,3.600000,1.0,0.7,0.700000,1.600000,1.800000,10.8,0.587000,0.0,0.0,1.0


In [20]:
prospect_stats = prospect_stats.sort_values(['Player']).reset_index(drop=True)
prospect_stats.head()

Unnamed: 0,Player,2P,3P,FT,ORB,DRB,AST,STL,BLK,TOV,PF,PTS,FG%,Pos_G,Pos_F,Pos_C
0,AJ Johnson,0.8,0.3,0.3,0.3,1.0,0.7,0.2,0.1,0.6,0.8,2.8,0.355,1,0,0
1,Adem Bona,3.2,0.0,1.4,2.2,3.0,0.7,0.6,1.7,1.3,3.3,7.7,0.675,0,1,0
2,Ajay Mitchell,5.1,0.6,4.5,0.3,2.4,5.1,1.3,0.3,2.2,2.0,16.3,0.506,1,0,0
3,Alexandre Sarr,3.1,0.6,1.7,1.4,3.0,0.9,0.5,1.3,1.1,1.4,9.7,0.52,0,1,0
4,Anton Watson,3.625,0.325,1.775,1.725,3.6,2.025,1.45,0.575,1.15,2.35,9.95,0.58875,0,1,0


In [21]:
player_stats_wout_prospects.columns

Index(['Player', '2P', '3P', 'FT', 'ORB', 'DRB', 'AST', 'STL', 'BLK', 'TOV',
       'PF', 'PTS', 'FG%', 'Pos_G', 'Pos_F', 'Pos_C'],
      dtype='object')

Scaling Numerical data for NBA and prospect datasets separately

In [22]:
from sklearn.preprocessing import StandardScaler

num_features = ['2P', '3P', 'FT', 'ORB', 'DRB', 'AST', 'STL', 'BLK', 'TOV',
       'PF', 'PTS', 'FG%']

players_num = player_stats_wout_prospects[num_features]
prospects_num = prospect_stats[num_features]

scaler = StandardScaler()
scaler.fit(players_num)
players_scaled = scaler.transform(players_num)
prospects_scaled = scaler.transform(prospects_num)


players_scaled = pd.DataFrame(players_scaled, columns=num_features)
prospects_scaled = pd.DataFrame(prospects_scaled, columns=num_features)

Readding in position features since we didn't want them to be scaled.

In [23]:
players_scaled["Pos_G"] = list(player_stats_wout_prospects["Pos_G"])
players_scaled["Pos_F"] = list(player_stats_wout_prospects["Pos_F"])
players_scaled["Pos_C"] = list(player_stats_wout_prospects["Pos_C"])
players_scaled

Unnamed: 0,2P,3P,FT,ORB,DRB,AST,STL,BLK,TOV,PF,PTS,FG%,Pos_G,Pos_F,Pos_C
0,-1.338390,0.961822,-1.469176,-0.914384,-0.544627,-0.840002,-1.155990,-0.319348,-1.955124,-2.410587,-0.762677,0.020852,0.0,1.0,0.0
1,0.060985,1.583758,0.358036,-0.804885,-0.405676,1.247274,0.612678,-0.811510,0.747265,0.619594,1.107060,-0.471014,1.0,0.0,0.0
2,0.650195,-0.779598,-0.721680,1.166082,0.914360,-0.207494,-0.369915,0.172813,-0.817276,0.403152,-0.250420,0.050662,0.0,1.0,0.0
3,0.355590,-1.277147,-0.417145,1.202582,0.590141,-0.586999,-1.352509,0.500921,-0.343173,0.114564,-0.685839,0.954901,0.0,0.0,1.0
4,-0.344097,1.334983,0.607202,-1.352376,-0.787791,1.753280,0.219641,-0.811510,1.600651,0.511373,0.748480,-0.277248,1.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
566,-0.822831,1.086209,-0.306404,-0.476391,-0.544627,-0.523748,0.809197,-0.442389,-0.106121,-0.895497,0.005708,-0.500824,1.0,0.0,0.0
567,-0.380923,-0.903985,-0.057239,0.071100,0.150129,-1.219507,-1.155990,1.157136,-0.675045,1.052477,-0.865128,2.390752,0.0,1.0,0.0
568,-1.191088,0.339886,-1.303066,-1.242878,-1.308858,-0.333996,-0.369915,-0.811510,-1.243969,-0.462614,-1.018805,-0.754209,1.0,0.0,0.0
569,0.355590,-1.277147,-0.389460,1.604075,-0.266725,-0.840002,-0.762953,-0.196308,-0.532814,-0.895497,-0.660226,1.421925,0.0,0.0,1.0


In [24]:
prospects_scaled["Pos_G"] = list(prospect_stats["Pos_G"])
prospects_scaled["Pos_F"] = list(prospect_stats["Pos_F"])
prospects_scaled["Pos_C"] = list(prospect_stats["Pos_C"])
prospects_scaled.head()

Unnamed: 0,2P,3P,FT,ORB,DRB,AST,STL,BLK,TOV,PF,PTS,FG%,Pos_G,Pos_F,Pos_C
0,-2.148555,-0.903985,-2.133617,-1.461875,-2.073089,-1.029754,-1.745546,-0.93455,-1.955124,-3.059912,-2.709252,-2.036042,1,0,0
1,-0.380923,-1.277147,-1.220011,0.618591,-0.683578,-1.029754,-0.959471,1.034096,-0.959507,2.351126,-1.454224,2.733568,0,1,0
2,1.018452,-0.530824,1.354698,-1.461875,-1.100431,1.75328,0.41616,-0.688469,0.320572,-0.462614,0.74848,0.214618,1,0,0
3,-0.454574,-0.530824,-0.970845,-0.257394,-0.683578,-0.903253,-1.15599,0.541934,-1.243969,-1.761263,-0.941967,0.423288,0,1,0
4,-0.067905,-0.872888,-0.908554,0.098475,-0.266725,-0.191682,0.710938,-0.350108,-1.172853,0.294932,-0.877935,1.448009,0,1,0


Use KNN to find the closest NBA player comparisons for each of the prospects

In [25]:
from sklearn.neighbors import NearestNeighbors
import json

rookie_college_comparisons = {}
knn = NearestNeighbors(n_neighbors=4)
knn.fit(players_scaled)
for i in range(len(prospects_scaled)):
    prospect = prospects_scaled.iloc[i].to_numpy().reshape(1,-1)
    dists, idxs = knn.kneighbors(prospect)
    idxs = idxs[0].tolist()
    dists = dists[0].tolist()
    p_name = prospect_stats.iloc[i]["Player"]
    rookie_college_comparisons[p_name] = []
    for j in range(len(idxs)):
        comp_idx = idxs[j]
        comp_dist = dists[j]
        rookie_college_comparisons[p_name].append(player_stats_wout_prospects.iloc[comp_idx]["Player"])
with open('../data/interim/rookie_player_comps.json', 'w') as f:
    json.dump(rookie_college_comparisons, f)



Closest player comparison for each comparisons for each prospect

In [26]:
rookie_college_comparisons

{'AJ Johnson': ['Landry Fields',
  'Chandler Hutchison',
  'Immanuel Quickley',
  'Peyton Watson'],
 'Adem Bona': ['Zach Collins', 'Jeremy Evans', 'Quincy Acy', 'Daniel Gafford'],
 'Ajay Mitchell': ['Semaj Christon',
  "De'Aaron Fox",
  'Shai Gilgeous-Alexander',
  'Ray McCallum'],
 'Alexandre Sarr': ['Jon Leuer',
  'Cory Jefferson',
  'Nassir Little',
  'D.J. Wilson'],
 'Anton Watson': ['Derrick Brown',
  'Mitch McGary',
  'Kendall Brown',
  'James Augustine'],
 'Antonio Reeves': ['Luke Kennard',
  'Arron Afflalo',
  'Tim Hardaway Jr.',
  'Tyler Dorsey'],
 'Ariel Hukporti': ['Markieff Morris',
  'Festus Ezeli',
  'Daniel Oturu',
  'Cedric Simmons'],
 'Baylor Scheierman': ['Daniel Hamilton',
  'Allen Crabbe',
  'Denzel Valentine',
  'Jordan Nwora'],
 'Blake Hinson': ['Joe Harris',
  'Quentin Grimes',
  'Frank Jackson',
  'Hamidou Diallo'],
 'Bobi Klintman': ['Abdel Nader',
  'DaJuan Summers',
  'Eric Paschall',
  'Ryan Kelly'],
 'Cam Spencer': ['Landry Shamet',
  'Gary Harris',
  'Patr

## Convert College to NBA Numbers

In [27]:
player_clean = pd.read_csv("../data/processed/player_data_clean.csv")
player_clean = player_clean.drop(['Unnamed: 0'], axis=1)
player_clean

Unnamed: 0,seas_id,season,player_id,player,age,experience,tm,g,gs,mp,...,bpm,vorp,person_id,height,weight,pos_PG,pos_SG,pos_PF,pos_SF,pos_C
0,31137,2024,5026,A.J. Lawson,23,2,DAL,42,0.0,311,...,-4.6,-0.2,1630639,78,179.0,False,True,False,False,False
1,30459,2023,5026,A.J. Lawson,22,1,TOT,15,0.0,108,...,-4.4,-0.1,1630639,78,179.0,False,True,False,False,False
2,30461,2023,5026,A.J. Lawson,22,1,DAL,14,0.0,106,...,-4.6,-0.1,1630639,78,179.0,False,True,False,False,False
3,31138,2024,5027,AJ Griffin,20,2,ATL,20,0.0,171,...,-9.6,-0.3,1631100,78,220.0,False,False,False,True,False
4,30467,2023,5027,AJ Griffin,19,1,ATL,72,12.0,1401,...,-0.8,0.4,1631100,78,220.0,False,False,False,True,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11602,15597,1997,2546,Stacey King,30,8,TOT,11,0.0,103,...,-7.3,-0.1,351,83,250.0,False,False,False,False,True
11603,15605,1997,3027,Steve Hamer,23,1,BOS,35,3.0,268,...,-6.5,-0.3,984,84,245.0,False,False,False,False,True
11604,15633,1997,2692,Tracy Moore,31,5,HOU,27,1.0,237,...,-2.9,-0.1,929,76,200.0,False,True,False,False,False
11605,15652,1997,2277,Wayman Tisdale,32,12,PHO,53,15.0,778,...,-8.3,-1.2,47,81,260.0,False,False,True,False,False


In [28]:
player_clean = player_clean.drop(["seas_id", "season", "player_id", "age", "experience", "tm", "person_id"], axis = 1)
player_clean

Unnamed: 0,player,g,gs,mp,fg_per_100_poss,fga_per_100_poss,x3p_per_100_poss,x3pa_per_100_poss,x2p_per_100_poss,x2pa_per_100_poss,...,dbpm,bpm,vorp,height,weight,pos_PG,pos_SG,pos_PF,pos_SF,pos_C
0,A.J. Lawson,42,0.0,311,8.3,18.7,2.0,7.7,6.3,11.0,...,-1.5,-4.6,-0.2,78,179.0,False,True,False,False,False
1,A.J. Lawson,15,0.0,108,10.1,20.2,4.6,11.5,5.5,8.7,...,-2.9,-4.4,-0.1,78,179.0,False,True,False,False,False
2,A.J. Lawson,14,0.0,106,9.8,20.2,4.7,11.7,5.2,8.4,...,-3.1,-4.6,-0.1,78,179.0,False,True,False,False,False
3,AJ Griffin,20,0.0,171,5.0,17.4,2.8,10.9,2.2,6.5,...,-3.5,-9.6,-0.3,78,220.0,False,False,False,True,False
4,AJ Griffin,72,12.0,1401,8.4,18.1,3.4,8.8,5.0,9.3,...,-0.5,-0.8,0.4,78,220.0,False,False,False,True,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11602,Stacey King,11,0.0,103,5.6,11.2,0.0,0.0,5.6,11.2,...,-2.1,-7.3,-0.1,83,250.0,False,False,False,False,True
11603,Steve Hamer,35,3.0,268,5.6,10.7,0.0,0.4,5.6,10.3,...,-2.2,-6.5,-0.3,84,245.0,False,False,False,False,True
11604,Tracy Moore,27,1.0,237,7.3,18.8,2.4,9.5,4.9,9.3,...,-1.5,-2.9,-0.1,76,200.0,False,True,False,False,False
11605,Wayman Tisdale,53,15.0,778,10.5,24.6,0.0,0.0,10.5,24.6,...,-2.4,-8.3,-1.2,81,260.0,False,False,True,False,False


In [29]:
rookie_projections = pd.DataFrame(columns=player_clean.columns)
num_cols = player_clean.select_dtypes(include=['number']).columns.tolist()

for pspct in rookie_college_comparisons.keys():
    new_row = {'player': pspct}
    t2 = pd.DataFrame(columns=num_cols)
    for comp in rookie_college_comparisons[pspct]:
        temp = player_clean[player_clean['player'] == comp]
        temp = temp[num_cols]
        t2 = pd.concat([t2, temp], ignore_index=True)
    for c in t2.columns:
        new_row[c] = t2[c].mean()
    new_row.update({"pos_PG":None,"pos_SG":None,"pos_SF":None,"pos_PF":None,"pos_C":None})
    rookie_projections = pd.concat([rookie_projections, pd.DataFrame(new_row, index=[0])], ignore_index=True)
rookie_projections


  t2 = pd.concat([t2, temp], ignore_index=True)
  rookie_projections = pd.concat([rookie_projections, pd.DataFrame(new_row, index=[0])], ignore_index=True)
  t2 = pd.concat([t2, temp], ignore_index=True)
  t2 = pd.concat([t2, temp], ignore_index=True)
  t2 = pd.concat([t2, temp], ignore_index=True)
  t2 = pd.concat([t2, temp], ignore_index=True)
  t2 = pd.concat([t2, temp], ignore_index=True)
  t2 = pd.concat([t2, temp], ignore_index=True)
  t2 = pd.concat([t2, temp], ignore_index=True)
  t2 = pd.concat([t2, temp], ignore_index=True)
  t2 = pd.concat([t2, temp], ignore_index=True)
  t2 = pd.concat([t2, temp], ignore_index=True)
  t2 = pd.concat([t2, temp], ignore_index=True)
  t2 = pd.concat([t2, temp], ignore_index=True)
  t2 = pd.concat([t2, temp], ignore_index=True)
  t2 = pd.concat([t2, temp], ignore_index=True)
  t2 = pd.concat([t2, temp], ignore_index=True)
  t2 = pd.concat([t2, temp], ignore_index=True)
  t2 = pd.concat([t2, temp], ignore_index=True)
  t2 = pd.concat([t2, temp],

Unnamed: 0,player,g,gs,mp,fg_per_100_poss,fga_per_100_poss,x3p_per_100_poss,x3pa_per_100_poss,x2p_per_100_poss,x2pa_per_100_poss,...,dbpm,bpm,vorp,height,weight,pos_PG,pos_SG,pos_PF,pos_SF,pos_C
0,AJ Johnson,48.941176,18.294118,1123.117647,7.035294,16.088235,2.047059,5.635294,4.988235,10.452941,...,0.070588,-1.029412,0.523529,77.0,203.235294,,,,,
1,Adem Bona,47.9,13.4,811.266667,7.05,12.613333,0.783333,2.35,6.26,10.25,...,0.76,-0.073333,0.363333,81.1,232.2,,,,,
2,Ajay Mitchell,58.631579,46.526316,1703.578947,9.726316,21.2,1.757895,5.231579,7.947368,15.947368,...,-0.189474,0.568421,1.594737,75.947368,189.736842,,,,,
3,Alexandre Sarr,42.6,6.15,617.65,7.275,15.655,1.665,5.11,5.61,10.54,...,-0.35,-2.18,0.055,80.7,226.4,,,,,
4,Anton Watson,38.0,2.625,476.375,7.125,13.7375,0.1375,0.9625,6.9875,12.775,...,-0.6125,-3.0125,0.0375,80.625,235.25,,,,,
5,Antonio Reeves,57.615385,29.948718,1499.205128,7.687179,17.907692,3.341026,8.74359,4.353846,9.158974,...,-1.064103,-1.471795,0.276923,76.897436,204.333333,,,,,
6,Ariel Hukporti,45.321429,18.678571,928.107143,7.382143,16.871429,1.653571,4.735714,5.721429,12.132143,...,-0.242857,-2.45,0.05,81.178571,245.535714,,,,,
7,Baylor Scheierman,44.608696,9.0,875.26087,7.056522,17.065217,3.373913,9.095652,3.665217,7.973913,...,-0.647826,-2.56087,0.004348,78.217391,217.869565,,,,,
8,Blake Hinson,51.666667,19.791667,1130.5,7.420833,16.383333,2.9125,7.695833,4.520833,8.679167,...,-0.470833,-2.208333,0.145833,77.041667,210.583333,,,,,
9,Bobi Klintman,40.266667,7.0,640.4,6.486667,15.573333,1.84,5.34,4.633333,10.24,...,-0.36,-3.313333,-0.106667,79.4,235.333333,,,,,


In [30]:
rookie_projections.to_csv('../data/interim/rookie_projection_stats.csv', index=False)

## NBA Players for All Star Comparisons

In [31]:
nba_df = pd.read_csv('../data/interim/all_player_stats_00-24.csv')
nba_df = nba_df.drop(['Rk', 'MP/G', '-9999'], axis=1)
temp = nba_df["Pos"].str.get_dummies(sep='-')
nba_df["Pos_G"] = list(temp["G"])
nba_df["Pos_F"] = list(temp["F"])
nba_df["Pos_C"] = list(temp["C"])
nba_df = nba_df.drop(["Pos"], axis=1)
nba_df

Unnamed: 0,Player,WS,Season,Age,Team,G,GS,AS,MP,FG,...,PTS,FG%,2P%,3P%,FT%,TS%,eFG%,Pos_G,Pos_F,Pos_C
0,LeBron James,20.3,2008-09,24,CLE,81,81,1,37.7,9.7,...,28.4,0.489,0.535,0.344,0.780,0.591,53.007,1,1,0
1,LeBron James,19.3,2012-13,28,MIA,76,76,1,37.9,10.1,...,26.8,0.565,0.602,0.406,0.753,0.640,60.303,1,1,0
2,Kevin Durant,19.2,2013-14,25,OKC,81,81,1,38.5,10.5,...,32.0,0.503,0.549,0.391,0.873,0.635,55.983,1,1,0
3,Kevin Durant,18.9,2012-13,24,OKC,81,81,1,38.5,9.0,...,28.1,0.510,0.539,0.416,0.905,0.647,55.862,1,1,0
4,LeBron James,18.5,2009-10,25,CLE,76,76,1,39.0,10.1,...,29.7,0.503,0.560,0.333,0.767,0.604,54.483,1,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9995,Stephon Marbury,-0.3,2008-09,31,BOS,23,4,0,18.0,1.7,...,3.8,0.342,0.372,0.240,0.462,0.377,36.937,1,0,0
9996,Art Long,-0.3,2002-03,30,PHITOR,26,0,0,8.1,1.1,...,2.3,0.373,0.361,0.667,0.200,0.378,38.667,0,1,1
9997,Jamaal Tinsley,-0.3,2009-10,31,MEM,38,1,0,15.5,1.4,...,3.5,0.371,0.446,0.179,0.815,0.438,39.643,1,0,0
9998,Eddy Curry,-0.3,2009-10,27,NYK,7,0,0,8.9,1.1,...,3.7,0.381,0.381,,0.588,0.456,38.095,0,0,1


In [32]:
num_features = ['2P', '3P', 'FT', 'ORB', 'DRB', 'AST', 'STL', 'BLK', 'TOV',
       'PF', 'PTS', 'FG%']

nba_num = nba_df[num_features]

nba_scaler = StandardScaler()
nba_scaler.fit(nba_num)
nba_scaled = nba_scaler.transform(nba_num)
nba_scaled = pd.DataFrame(nba_scaled, columns=num_features)
nba_scaled["Pos_G"] = nba_df["Pos_G"]
nba_scaled["Pos_F"] = nba_df["Pos_F"]
nba_scaled["Pos_C"] = nba_df["Pos_C"]
nba_scaled

Unnamed: 0,2P,3P,FT,ORB,DRB,AST,STL,BLK,TOV,PF,PTS,FG%,Pos_G,Pos_F,Pos_C
0,2.858490,1.107670,3.978301,0.346113,1.926263,2.804234,2.412826,1.376541,2.243211,-0.360287,3.222512,0.534411,1,1,0
1,3.176958,0.847106,2.575201,0.346113,2.212675,2.858788,2.412826,0.950954,2.243211,-0.786226,2.952487,1.564896,1,1,0
2,2.858490,2.149925,4.960471,-0.393550,2.155393,1.876825,1.446401,0.525366,2.888223,0.207631,3.830067,0.724238,1,1,0
3,2.433867,1.237952,4.750006,-0.516827,2.499087,1.385844,1.688007,1.802129,2.888223,-0.218308,3.171882,0.819151,1,1,0
4,3.017724,1.237952,4.329076,-0.146996,1.983546,3.567983,2.171220,1.163748,2.759221,-0.502267,3.441907,0.724238,1,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9995,-0.697731,-0.585995,-0.932549,-1.133213,-1.052421,0.676649,-0.728057,-0.751396,0.437176,-0.644247,-0.929118,-1.458763,1,0,0
9996,-0.910042,-0.846559,-1.072859,-0.270273,-0.823291,-1.014509,-1.211269,-0.113015,-0.594843,-0.928206,-1.182266,-1.038434,0,1,1
9997,-0.803886,-0.716277,-0.722084,-0.886658,-0.880573,0.403881,0.479975,-0.751396,0.566179,-0.218308,-0.979747,-1.065552,1,0,0
9998,-0.856964,-0.976841,-0.160844,-0.516827,-0.937856,-1.123616,-1.694482,-0.751396,0.824184,-0.076328,-0.945994,-0.929962,0,0,1


##  - Unfinished -

# All Star Comparisons

In [33]:
all_star_stats = pd.read_csv("../data/interim/allstar_stats_00-24.csv")
all_star_stats = all_star_stats.drop(['Rk', '-9999'], axis=1)
all_star_stats

Unnamed: 0,Player,WS,Season,Age,Team,G,GS,AS,MP,FG,...,TOV,PF,PTS,FG%,2P%,3P%,FT%,TS%,eFG%,Pos
0,LeBron James,20.3,2008-09,24,CLE,81,81,1,37.7,9.7,...,3.0,1.7,28.4,0.489,0.535,0.344,0.780,0.591,53.007,F-G
1,LeBron James,19.3,2012-13,28,MIA,76,76,1,37.9,10.1,...,3.0,1.4,26.8,0.565,0.602,0.406,0.753,0.640,60.303,F-G
2,Kevin Durant,19.2,2013-14,25,OKC,81,81,1,38.5,10.5,...,3.5,2.1,32.0,0.503,0.549,0.391,0.873,0.635,55.983,F-G
3,Kevin Durant,18.9,2012-13,24,OKC,81,81,1,38.5,9.0,...,3.5,1.8,28.1,0.510,0.539,0.416,0.905,0.647,55.862,F-G
4,LeBron James,18.5,2009-10,25,CLE,76,76,1,39.0,10.1,...,3.4,1.6,29.7,0.503,0.560,0.333,0.767,0.604,54.483,F-G
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
595,Deron Williams,4.1,2011-12,27,NJN,55,55,1,36.3,7.1,...,4.0,2.2,21.0,0.407,0.446,0.336,0.843,0.527,46.670,G
596,Tracy McGrady,4.1,2005-06,26,HOU,47,47,1,37.1,8.7,...,2.6,1.9,24.4,0.406,0.434,0.312,0.747,0.494,44.164,G-F
597,Julius Randle,3.8,2023-24,29,NYK,46,46,1,35.4,8.6,...,3.5,2.7,24.0,0.472,0.539,0.311,0.781,0.569,51.794,F-C
598,Anthony Edwards,3.8,2022-23,21,MIN,79,79,1,36.0,8.9,...,3.3,2.4,24.6,0.459,0.513,0.369,0.756,0.564,52.790,G-F


In [34]:
num_features = ['2P', '3P', 'FT', 'ORB', 'DRB', 'AST', 'STL', 'BLK', 'TOV',
       'PF', 'PTS', 'FG%']

all_stars_num = all_star_stats[num_features]

AS_scaler = StandardScaler()
AS_scaler.fit(all_stars_num)
AS_scaled = scaler.transform(all_stars_num)

AS_scaled = pd.DataFrame(AS_scaled, columns=num_features)
AS_scaled

Unnamed: 0,2P,3P,FT,ORB,DRB,AST,STL,BLK,TOV,PF,PTS,FG%
0,3.227992,0.713048,3.680240,-0.366893,1.609116,3.081546,1.202234,0.295853,1.458420,-1.111938,3.847633,-0.038768
1,3.669900,0.464273,2.019138,-0.366893,1.956493,3.144797,1.202234,0.049773,1.458420,-1.761263,3.437827,1.094014
2,3.227992,1.708145,4.843012,-1.023882,1.887018,2.006283,0.416160,-0.196308,2.169574,-0.246172,4.769695,0.169903
3,2.638781,0.837435,4.593846,-1.133380,2.303871,1.437026,0.612678,0.541934,2.169574,-0.895497,3.770794,0.274238
4,3.448946,0.837435,4.095516,-0.804885,1.678591,3.967057,1.005716,0.172813,2.027343,-1.328380,4.180600,0.169903
...,...,...,...,...,...,...,...,...,...,...,...,...
595,0.944801,1.334983,1.520808,-1.352376,-0.753053,4.030308,0.219641,-0.565429,2.880729,-0.029731,1.952283,-1.260980
596,2.565130,0.713048,2.102193,-0.695387,1.122787,1.563527,0.416160,0.049773,0.889496,-0.679055,2.823119,-1.275885
597,2.344176,0.837435,1.853028,0.618591,2.095445,1.690029,-1.155990,-0.688469,2.169574,1.052477,2.720668,-0.292153
598,1.902268,2.081306,0.939422,-1.133380,0.844884,1.310524,1.005716,-0.196308,1.885112,0.403152,2.874345,-0.485919


In [35]:
temp = all_star_stats["Pos"].str.get_dummies(sep='-')
all_star_stats["Pos_G"] = list(temp["G"])
all_star_stats["Pos_F"] = list(temp["F"])
all_star_stats["Pos_C"] = list(temp["C"])
all_star_stats = all_star_stats.drop(["Pos"], axis=1)
AS_scaled["Pos_G"] = all_star_stats["Pos_G"]
AS_scaled["Pos_F"] = all_star_stats["Pos_F"]
AS_scaled["Pos_C"] = all_star_stats["Pos_C"]
AS_scaled

Unnamed: 0,2P,3P,FT,ORB,DRB,AST,STL,BLK,TOV,PF,PTS,FG%,Pos_G,Pos_F,Pos_C
0,3.227992,0.713048,3.680240,-0.366893,1.609116,3.081546,1.202234,0.295853,1.458420,-1.111938,3.847633,-0.038768,1,1,0
1,3.669900,0.464273,2.019138,-0.366893,1.956493,3.144797,1.202234,0.049773,1.458420,-1.761263,3.437827,1.094014,1,1,0
2,3.227992,1.708145,4.843012,-1.023882,1.887018,2.006283,0.416160,-0.196308,2.169574,-0.246172,4.769695,0.169903,1,1,0
3,2.638781,0.837435,4.593846,-1.133380,2.303871,1.437026,0.612678,0.541934,2.169574,-0.895497,3.770794,0.274238,1,1,0
4,3.448946,0.837435,4.095516,-0.804885,1.678591,3.967057,1.005716,0.172813,2.027343,-1.328380,4.180600,0.169903,1,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
595,0.944801,1.334983,1.520808,-1.352376,-0.753053,4.030308,0.219641,-0.565429,2.880729,-0.029731,1.952283,-1.260980,1,0,0
596,2.565130,0.713048,2.102193,-0.695387,1.122787,1.563527,0.416160,0.049773,0.889496,-0.679055,2.823119,-1.275885,1,1,0
597,2.344176,0.837435,1.853028,0.618591,2.095445,1.690029,-1.155990,-0.688469,2.169574,1.052477,2.720668,-0.292153,0,1,1
598,1.902268,2.081306,0.939422,-1.133380,0.844884,1.310524,1.005716,-0.196308,1.885112,0.403152,2.874345,-0.485919,1,1,0


In [36]:
def find_AS_comparison(player_row_scaled, comp_df):
    knn = NearestNeighbors(n_neighbors=3)
    knn.fit(comp_df)
    dists, idxs = knn.kneighbors(player_row_scaled)
    idxs = idxs[0].tolist()
    return idxs

In [37]:
for i, name in enumerate(prospect_stats['Player']):
    print(f"{i}: {name}")

0: AJ Johnson
1: Adem Bona
2: Ajay Mitchell
3: Alexandre Sarr
4: Anton Watson
5: Antonio Reeves
6: Ariel Hukporti
7: Baylor Scheierman
8: Blake Hinson
9: Bobi Klintman
10: Cam Spencer
11: Cody Williams
12: DaRon Holmes II
13: Dalton Knecht
14: Devin Carter
15: Donovan Clingan
16: Enrique Freeman
17: Grant Nelson
18: Harrison Ingram
19: Isaac Jones
20: Jalen Bridges
21: Jamal Shead
22: Jared McCain
23: Jaylen Wells
24: Johnny Furphy
25: Justin Edwards
26: KJ Simpson
27: Keshad Johnson
28: Kevin McCullar Jr.
29: Kyle Filipowski
30: Melvin Ajinca
31: N'Faly Dante
32: Nikola Djurisic
33: Nikola Topic
34: Oso Ighodaro
35: PJ Hall
36: Pacome Dadiet
37: Pelle Larsson
38: Reece Beekman
39: Reed Sheppard
40: Rob Dillingham
41: Ryan Dunn
42: Stephon Castle
43: Terrence Shannon Jr.
44: Tidjane Salaun
45: Trey Alexander
46: Tristen Newton
47: Tyler Kolek
48: Tyler Smith
49: Tyon Grant-Foster
50: Yves Missi
51: Zaccharie Risacher
52: Zach Edey
53: Zyon Pullin


In [38]:
# Update Based on Player Index above
player_index = 52

player_in = prospects_scaled.iloc[player_index].to_numpy().reshape(1,-1)
comps = find_AS_comparison(player_in, AS_scaled)
print(f"All Star Comparison for {prospect_stats.iloc[player_index, 0]}:")
for i in comps:
    print(f"\t{all_star_stats.iloc[int(i),0]}")


All Star Comparison for Zach Edey:
	Brook Lopez
	Pau Gasol
	Jarrett Allen




In [44]:
AS_scaled.index = all_star_stats["Player"]
AS_scaled.to_csv("../data/processed/all_stars_scaled.csv", index=True)

In [53]:
rookie_stats = prospect_stats.round(2)
rookie_stats['Pos'] = rookie_stats[['Pos_G', 'Pos_F', 'Pos_C']].idxmax(axis=1)
rookie_stats["Pos"] = rookie_stats["Pos"].replace({'Pos_G': 'G', 'Pos_F': 'F', 'Pos_C': 'C'})
rookie_stats = rookie_stats.drop(['Pos_G', 'Pos_F', 'Pos_C'], axis=1)
cols = list(rookie_stats.columns)
cols.insert(1, cols.pop())
rookie_stats = rookie_stats[cols]
rookie_stats.to_csv("../data/processed/rookie_stats_raw.csv", index=False)

In [57]:
prospects_scaled.index = rookie_stats["Player"]
prospects_scaled.to_csv('../data/processed/rookie_stats_scaled.csv')