In [24]:
import numpy as np
import pandas as pd

ncaa = pd.read_csv("ncaa_players.csv")
prospects = pd.read_csv("prospects_raw.csv")

In [25]:
prospects["HEIGHT"] = prospects["HEIGHT"].str.split(" ").str[0].str[0].astype(int)*12 + prospects["HEIGHT"].str.split(" ").str[1].str[:-2].astype(float)

In [26]:
prospects.loc[prospects.WINGSPAN.notna(), "WINGSPAN"] = prospects.loc[prospects.WINGSPAN.notna(), "WINGSPAN"].str.split(" ").str[0].str[0].astype(int)*12 + prospects.loc[prospects.WINGSPAN.notna(), "WINGSPAN"].str.split(" ").str[1].str[:-2].astype(float)

In [27]:
prospects.loc[prospects["STANDING REACH"].notna(), "STANDING REACH"] = prospects.loc[prospects["STANDING REACH"].notna(), "STANDING REACH"].str.split(" ").str[0].str[0].astype(int)*12 + prospects.loc[prospects["STANDING REACH"].notna(), "STANDING REACH"].str.split(" ").str[1].str[:-2].astype(float)

In [28]:
def lin_fill_na(df, fill_column_name, feature_column_list):
    test = df[df[fill_column_name].isnull()]
    train = df[df[fill_column_name].notnull()]
    
    train_x = train[feature_column_list]
    train_y = train[fill_column_name]
    
    from sklearn.linear_model import LinearRegression
    model = LinearRegression()
    
    model.fit(train_x, train_y)
    
    test_x = test[feature_column_list]
    test_y_pred = model.predict(test_x)
    
    df.loc[df[fill_column_name].isnull(), fill_column_name] = np.round(test_y_pred, 1)
    return df

In [29]:
prospects = prospects.rename({"MAX VERTICAL": "VERTICAL LEAP"}, axis=1)

In [30]:
prospects["ncaa_year"] = 2023

In [31]:
prospects = lin_fill_na(prospects, "WEIGHT", ["HEIGHT"])
prospects = lin_fill_na(prospects, "WINGSPAN", ["HEIGHT", "WEIGHT"])
prospects = lin_fill_na(prospects, "STANDING REACH", ["HEIGHT", "WINGSPAN"])
prospects = lin_fill_na(prospects, "STANDING VERTICAL", ["HEIGHT", "WEIGHT"])
prospects = lin_fill_na(prospects, "VERTICAL LEAP", ["HEIGHT", "WEIGHT"])
prospects = lin_fill_na(prospects, "LANE AGILITY", ["HEIGHT", "WEIGHT"])
prospects = lin_fill_na(prospects, "THREE QUARTER SPRINT", ["HEIGHT", "WEIGHT"])

In [33]:
prospects_2 = ncaa.merge(prospects, how="inner", on=["ncaa_player_name", "ncaa_year"])

In [35]:
prospects_2 = prospects_2.sort_values("consensus_tier").reset_index(drop=True)

In [37]:
pf = prospects_2.loc[:,["ncaa_player_name", "rookie_Age", "ncaa_team", "Full Role", "scouting_report", "projected_range"]]

In [38]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
scaled_shooting = pd.DataFrame(scaler.fit_transform(prospects_2.loc[:,["ncaa_rim_fg%", "ncaa_mid_fg%", "ncaa_3p%"]]), columns = ["ncaa_rim_fg%", "ncaa_mid_fg%", "ncaa_3p%"])
pf = pd.concat([pf, scaled_shooting], axis=1)

------

# Creating Single Number Representation of Physical Ability

In [39]:
physicals = prospects_2[["HEIGHT", "WINGSPAN", "STANDING REACH", 
                       "VERTICAL LEAP", "LANE AGILITY", "THREE QUARTER SPRINT"
                      ]]

from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()

scaled_phys = scaler.fit_transform(physicals)

from sklearn.decomposition import PCA

physical_pca = PCA(n_components=2)

physical_pca.fit(scaled_phys)

pf["length_pc1"] = physical_pca.transform(scaled_phys)[:,0]
pf["explosive_pc2"] = physical_pca.transform(scaled_phys)[:,1]

--------

# Predicting Bust and Star Probability, then Inputting Results

In [40]:
import joblib

import os
import sys

current = os.path.dirname(os.path.realpath("nba_prospects"))
parent = os.path.dirname(current)

bust = joblib.load(parent + "/prospect_modeling/prospect_models/bust_model.sav")

bust_X_prospects = prospects_2.loc[:,["rookie_Age", "consensus_tier", "ncaa_conf", "ncaa_usg", "ncaa_efg%", 
                                    "ncaa_ts%", "ncaa_oreb%", "ncaa_dreb%", "ncaa_ast%", 
                                    "ncaa_to%", "ncaa_ft%", "ncaa_2p%", "ncaa_3p%", 
                                    "ncaa_blk%", "ncaa_stl%", "ncaa_ftr", "ncaa_foul_rate",
                                    "ncaa_recruit_rank", "ncaa_ast_tov_ratio", "ncaa_rim_fg%",
                                    "ncaa_mid_fg%", "ncaa_dunk_fg%", "ncaa_ogbpm",
                                    "ncaa_dgbpm", "ncaa_ppp",  "ncaa_rim_prop",
                                    "ncaa_mid_prop", "ncaa_3pt_prop", "ncaa_ast_fg_ratio", 
                                    "GMM_Primary_Cluster", "GMM_Secondary_Cluster", 
                                    "HEIGHT", "WEIGHT", "WINGSPAN", "STANDING VERTICAL", 
                                    "VERTICAL LEAP", "LANE AGILITY", "THREE QUARTER SPRINT"
                                   ]
                                ]

bust_X_prospects["WINGSPAN DIFFERENTIAL"] = bust_X_prospects["WINGSPAN"] - bust_X_prospects["HEIGHT"]

bust_X_prospects = bust_X_prospects.drop(["GMM_Primary_Cluster", "GMM_Secondary_Cluster"], axis=1)

bust_X_prospects.loc[bust_X_prospects["ncaa_conf"].isin(["SEC", "B10", "BE", "ACC", "P12", "P10", "B12"]), "ncaa_conf"] = 3
bust_X_prospects.loc[bust_X_prospects["ncaa_conf"].isin(["WCC", "Amer", "A10", "MWC"]), "ncaa_conf"] = 2
bust_X_prospects.loc[bust_X_prospects["ncaa_conf"].isin(["CAA", "SB", "MVC", "CUSA", "MAC", "SC", "WAC"]), "ncaa_conf"] = 1
bust_X_prospects.loc[bust_X_prospects["ncaa_conf"].isin(["Sum", "MEAC", "OVC", "BSky", "Horz", "ASun", "MAAC", "BW", "NEC", "Pat", "Ivy", "BSth"]), "ncaa_conf"] = 0

pf["P(Bust)"] = bust.predict_proba(bust_X_prospects)[:,1]

In [41]:
star = joblib.load(parent + "/prospect_modeling/prospect_models/star_model.sav")

star_X_prospects = prospects_2.loc[:,["rookie_Age", "consensus_tier", "ncaa_conf", "ncaa_usg", "ncaa_efg%", 
                                    "ncaa_ts%", "ncaa_oreb%", "ncaa_dreb%", "ncaa_ast%", 
                                    "ncaa_to%", "ncaa_ft%", "ncaa_2p%", "ncaa_3p%", 
                                    "ncaa_blk%", "ncaa_stl%", "ncaa_ftr", "ncaa_foul_rate",
                                    "ncaa_recruit_rank", "ncaa_ast_tov_ratio", "ncaa_rim_fg%",
                                    "ncaa_mid_fg%", "ncaa_dunk_fg%", "ncaa_ogbpm",
                                    "ncaa_dgbpm", "ncaa_ppp",  "ncaa_rim_prop",
                                    "ncaa_mid_prop", "ncaa_3pt_prop", "ncaa_ast_fg_ratio", 
                                    "GMM_Primary_Cluster", "GMM_Secondary_Cluster", 
                                    "HEIGHT", "WEIGHT", "WINGSPAN", "STANDING VERTICAL", 
                                    "VERTICAL LEAP", "LANE AGILITY", "THREE QUARTER SPRINT"
                                   ]
                                ]

star_X_prospects.loc[star_X_prospects["ncaa_conf"].isin(["SEC", "B10", "BE", "ACC", "P12", "P10", "B12"]), "ncaa_conf"] = 3
star_X_prospects.loc[star_X_prospects["ncaa_conf"].isin(["WCC", "Amer", "A10", "MWC"]), "ncaa_conf"] = 2
star_X_prospects.loc[star_X_prospects["ncaa_conf"].isin(["CAA", "SB", "MVC", "CUSA", "MAC", "SC", "WAC"]), "ncaa_conf"] = 1
star_X_prospects.loc[star_X_prospects["ncaa_conf"].isin(["Sum", "MEAC", "OVC", "BSky", "Horz", "ASun", "MAAC", "BW", "NEC", "Pat", "Ivy", "BSth"]), "ncaa_conf"] = 0

star_X_prospects["WINGSPAN DIFFERENTIAL"] = star_X_prospects["WINGSPAN"] - star_X_prospects["HEIGHT"]
star_X_prospects["TWITCH"] = star_X_prospects["VERTICAL LEAP"] / star_X_prospects["LANE AGILITY"]

star_X_prospects["3pt_value"] = star_X_prospects["ncaa_3p%"] * star_X_prospects["ncaa_3pt_prop"]
star_X_prospects["ncaa_mid_value"] = star_X_prospects["ncaa_mid_fg%"] * star_X_prospects["ncaa_mid_prop"]
star_X_prospects["ncaa_rim_value"] = star_X_prospects["ncaa_rim_fg%"] * star_X_prospects["ncaa_rim_prop"]

# better at younger age -> stardom
star_X_prospects["age_rim"] = star_X_prospects["ncaa_rim_fg%"] / star_X_prospects["rookie_Age"]
star_X_prospects["age_rr"] = star_X_prospects["ncaa_recruit_rank"] / star_X_prospects["rookie_Age"]
star_X_prospects["age_ftr"] = star_X_prospects["ncaa_ftr"] / star_X_prospects["rookie_Age"]
star_X_prospects["age_ppp"] = star_X_prospects["ncaa_ppp"] / star_X_prospects["rookie_Age"]

# higher usage at younger age indicates star?
star_X_prospects["age_usg"] = star_X_prospects["ncaa_usg"] / star_X_prospects["rookie_Age"]

# drop irrelevant features to success (based on expertise)
star_X_prospects = star_X_prospects.drop(["ncaa_efg%", "ncaa_ts%", "ncaa_rim_prop", "ncaa_mid_prop", 
                        "ncaa_3pt_prop", "GMM_Secondary_Cluster", "WEIGHT", 
                        "STANDING VERTICAL", "THREE QUARTER SPRINT"], axis=1)

# ID if they had a supporting or main role in college
star_X_prospects["GMM_Primary_Cluster"] = np.where(star_X_prospects["GMM_Primary_Cluster"].isin(["Skilled Forward", "Lead Guard"]), 1, 0)

pf["P(Star)"] = star.predict_proba(star_X_prospects)[:,1]

-------

# Predicting Career Stats

In [42]:
X_col_drop = ["ncaa_team", "ncaa_gp", "ncaa_minutes_%", "ncaa_ortg", "ncaa_efg%", "ncaa_ts%",
              "ncaa_ftm", "ncaa_fta", "ncaa_2pm", "ncaa_2pa", "ncaa_3pm", "ncaa_3pa", 
              "ncaa_yr", "ncaa_ht", "ncaa_porpag", "ncaa_adjoe", "ncaa_year", 
              "ncaa_rim_makes", "ncaa_rim_att", "ncaa_mid_range_makes", "ncaa_mid_range_att",
              "ncaa_dunks_made", "ncaa_dunks_att", "ncaa_draft_pick", "ncaa_drtg", 
              "ncaa_adrtg", "ncaa_dporpag", "ncaa_stops", "ncaa_bpm", "ncaa_obpm", 
              "ncaa_dbpm", "ncaa_gbpm", "ncaa_min_per_game", "ncaa_oreb", "ncaa_dreb", 
              "ncaa_treb", "ncaa_ast", "ncaa_stl", "ncaa_blk", "ncaa_pts", "ncaa_fga",
              "ncaa_tov", "GMM_Primary_Cluster", "GMM_Secondary_Cluster", "Full Role",
              "scouting_report", "projected_range"]

prospect_stats = prospects_2.drop(["ncaa_player_name"] + X_col_drop, axis=1)

prospect_stats.loc[prospect_stats["ncaa_conf"].isin(["SEC", "B10", "BE", "ACC", "P12", "P10", "B12"]), "ncaa_conf"] = 3
prospect_stats.loc[prospect_stats["ncaa_conf"].isin(["WCC", "Amer", "A10", "MWC"]), "ncaa_conf"] = 2
prospect_stats.loc[prospect_stats["ncaa_conf"].isin(["CAA", "SB", "MVC", "CUSA", "MAC", "SC", "WAC"]), "ncaa_conf"] = 1
prospect_stats.loc[prospect_stats["ncaa_conf"].isin(["Sum", "MEAC", "OVC", "BSky", "Horz", "ASun", "MAAC", "BW", "NEC", "Pat", "Ivy", "BSth", "AE"]), "ncaa_conf"] = 0

prospect_stats["WINGSPAN DIFFERENTIAL"] = prospect_stats["WINGSPAN"] - prospect_stats["HEIGHT"]

prospect_stats.insert(0, "prospect_age", prospect_stats["rookie_Age"])
prospect_stats = prospect_stats.drop("rookie_Age", axis=1)

rf_fifty = joblib.load(parent + "/prospect_modeling/prospect_models/peak_models/rf_peak_pred.sav")
mean_preds = rf_fifty.predict(prospect_stats)

mean_columns = ["mean_AST_2_TOV", "mean_AST_2_FG", "mean_AST%", "mean_TOV%", "mean_USG%", "mean_PPP", "mean_0-3_prop", "mean_3-10_prop", "mean_10-16_prop", "mean_16-3P_prop", "mean_corner_%3PA", "mean_2P_%astd", "mean_3P_%astd", "mean_0-3_fg%", "mean_3-10_fg%", "mean_10-16_fg%", "mean_16-3P_fg%", "mean_3P_fg%", "mean_corner_3P%", "mean_STL%", "mean_BLK%", "mean_ORB%", "mean_DRB%", "mean_3P_prop_not_corner"]
mean_outcomes = pd.DataFrame(rf_fifty.predict(prospect_stats), columns=mean_columns)

pf = pd.concat([pf, mean_outcomes], axis=1)

gbr_floor = joblib.load(parent + "/prospect_modeling/prospect_models/peak_models/gbr_floor.sav")
gbr_ceiling = joblib.load(parent + "/prospect_modeling/prospect_models/peak_models/gbr_ceiling.sav")

floor_columns = ["floor_AST_2_TOV", "floor_AST_2_FG", "floor_AST%", "floor_TOV%", "floor_USG%", "floor_PPP", "floor_0-3_prop", "floor_3-10_prop", "floor_10-16_prop", "floor_16-3P_prop", "floor_corner_%3PA", "floor_2P_%astd", "floor_3P_%astd", "floor_0-3_fg%", "floor_3-10_fg%", "floor_10-16_fg%", "floor_16-3P_fg%", "floor_3P_fg%", "floor_corner_3P%", "floor_STL%", "floor_BLK%", "floor_ORB%", "floor_DRB%", "floor_3P_prop_not_corner"]
ceiling_columns = ["ceiling_AST_2_TOV", "ceiling_AST_2_FG", "ceiling_AST%", "ceiling_TOV%", "ceiling_USG%", "ceiling_PPP", "ceiling_0-3_prop", "ceiling_3-10_prop", "ceiling_10-16_prop", "ceiling_16-3P_prop", "ceiling_corner_%3PA", "ceiling_2P_%astd", "ceiling_3P_%astd", "ceiling_0-3_fg%", "ceiling_3-10_fg%", "ceiling_10-16_fg%", "ceiling_16-3P_fg%", "ceiling_3P_fg%", "ceiling_corner_3P%", "ceiling_STL%", "ceiling_BLK%", "ceiling_ORB%", "ceiling_DRB%", "ceiling_3P_prop_not_corner"]

floor_outcomes = pd.DataFrame(gbr_floor.predict(mean_preds), columns=floor_columns)
ceiling_outcomes = pd.DataFrame(gbr_ceiling.predict(mean_preds), columns=ceiling_columns)

pf = pd.concat([pf, floor_outcomes], axis=1)
pf = pd.concat([pf, ceiling_outcomes], axis=1)

Feature names must be in the same order as they were in fit.

Feature names must be in the same order as they were in fit.



--------

# Finding Roles for Each Player 

In [43]:
mean_cluster_inputs = ["mean_AST_2_FG", "mean_PPP", "mean_ORB%", "mean_DRB%", "mean_AST%",
                           "mean_STL%", "mean_BLK%", "mean_TOV%", "mean_USG%", "mean_2P_%astd",
                           "mean_3P_%astd", "mean_corner_%3PA", "mean_0-3_prop", "mean_3-10_prop",
                           "mean_10-16_prop", "mean_16-3P_prop", "mean_3P_prop_not_corner"]

mean_cluster_attributes = pf.loc[:,mean_cluster_inputs]
mean_cluster_attributes["mean_close_prop"] = mean_cluster_attributes["mean_0-3_prop"] + mean_cluster_attributes["mean_3-10_prop"]
mean_cluster_attributes["mean_mid_prop"] = mean_cluster_attributes["mean_10-16_prop"] + mean_cluster_attributes["mean_16-3P_prop"]

mean_cluster_attributes = mean_cluster_attributes.drop(["mean_0-3_prop", "mean_3-10_prop",
                           "mean_10-16_prop", "mean_16-3P_prop"], axis=1)

ceiling_cluster_inputs = ["ceiling_AST_2_FG", "ceiling_PPP", "ceiling_ORB%", "ceiling_DRB%", "ceiling_AST%",
                           "ceiling_STL%", "ceiling_BLK%", "ceiling_TOV%", "ceiling_USG%", "ceiling_2P_%astd",
                           "ceiling_3P_%astd", "ceiling_corner_%3PA", "ceiling_0-3_prop", "ceiling_3-10_prop",
                           "ceiling_10-16_prop", "ceiling_16-3P_prop", "ceiling_3P_prop_not_corner"]

ceiling_cluster_attributes = pf.loc[:,ceiling_cluster_inputs]
ceiling_cluster_attributes["ceiling_close_prop"] = ceiling_cluster_attributes["ceiling_0-3_prop"] + ceiling_cluster_attributes["ceiling_3-10_prop"]
ceiling_cluster_attributes["ceiling_mid_prop"] = ceiling_cluster_attributes["ceiling_10-16_prop"] + ceiling_cluster_attributes["ceiling_16-3P_prop"]

ceiling_cluster_attributes = ceiling_cluster_attributes.drop(["ceiling_0-3_prop", "ceiling_3-10_prop",
                           "ceiling_10-16_prop", "ceiling_16-3P_prop"], axis=1)

floor_cluster_inputs = ["floor_AST_2_FG", "floor_PPP", "floor_ORB%", "floor_DRB%", "floor_AST%",
                           "floor_STL%", "floor_BLK%", "floor_TOV%", "floor_USG%", "floor_2P_%astd",
                           "floor_3P_%astd", "floor_corner_%3PA", "floor_0-3_prop", "floor_3-10_prop",
                           "floor_10-16_prop", "floor_16-3P_prop", "floor_3P_prop_not_corner"]

floor_cluster_attributes = pf.loc[:,floor_cluster_inputs]
floor_cluster_attributes["floor_close_prop"] = floor_cluster_attributes["floor_0-3_prop"] + floor_cluster_attributes["floor_3-10_prop"]
floor_cluster_attributes["floor_mid_prop"] = floor_cluster_attributes["floor_10-16_prop"] + floor_cluster_attributes["floor_16-3P_prop"]

floor_cluster_attributes = floor_cluster_attributes.drop(["floor_0-3_prop", "floor_3-10_prop",
                           "floor_10-16_prop", "floor_16-3P_prop"], axis=1)

mean_cluster_attributes = mean_cluster_attributes.loc[:,["mean_AST_2_FG", "mean_PPP", "mean_ORB%", "mean_DRB%", "mean_AST%",
                           "mean_STL%", "mean_BLK%", "mean_TOV%", "mean_USG%", "mean_2P_%astd",
                           "mean_3P_%astd", "mean_corner_%3PA", "mean_close_prop", "mean_mid_prop", "mean_3P_prop_not_corner"]]

ceiling_cluster_attributes = ceiling_cluster_attributes.loc[:,["ceiling_AST_2_FG", "ceiling_PPP", "ceiling_ORB%", "ceiling_DRB%", "ceiling_AST%",
                           "ceiling_STL%", "ceiling_BLK%", "ceiling_TOV%", "ceiling_USG%", "ceiling_2P_%astd",
                           "ceiling_3P_%astd", "ceiling_corner_%3PA", "ceiling_close_prop", "ceiling_mid_prop", "ceiling_3P_prop_not_corner"]]

floor_cluster_attributes = floor_cluster_attributes.loc[:,["floor_AST_2_FG", "floor_PPP", "floor_ORB%", "floor_DRB%", "floor_AST%",
                           "floor_STL%", "floor_BLK%", "floor_TOV%", "floor_USG%", "floor_2P_%astd",
                           "floor_3P_%astd", "floor_corner_%3PA", "floor_close_prop", "floor_mid_prop", "floor_3P_prop_not_corner"]]

In [44]:
role_scaler = joblib.load(parent + "/prospect_modeling/prospect_models/nba_role_models/nba_role_scaler.sav")
role_pca = joblib.load(parent + "/prospect_modeling/prospect_models/nba_role_models/nba_role_pca.sav")

floor_c = role_scaler.fit_transform(floor_cluster_attributes)
mean_c = role_scaler.fit_transform(mean_cluster_attributes)
ceiling_c = role_scaler.fit_transform(ceiling_cluster_attributes)

floor_pca = role_pca.transform(floor_c)
mean_pca = role_pca.transform(mean_c)
ceiling_pca = role_pca.transform(ceiling_c)

role_cluster = joblib.load(parent + "/prospect_modeling/prospect_models/nba_role_models/nba_role_cluster.sav")

mean_clusters = pd.DataFrame(np.round(role_cluster.predict_proba(mean_pca), 3), 
                          columns=["P(1)", "P(2)", "P(3)", "P(4)", "P(5)", "P(6)"])

arank = mean_clusters.apply(np.argsort, axis=1)
brank = mean_clusters.apply(np.sort, axis=1)

mean_clusters["Primary Cluster"] = (arank.values[:, ::-1][:,:2][:,0]+1)
mean_clusters["P(Primary Cluster)"] = brank.str[-1]
mean_clusters["Secondary Cluster"] = (arank.values[:, ::-1][:,:2][:,1]+1)
mean_clusters["P(Secondary Cluster)"] = brank.str[-2]

mean_clusters.loc[mean_clusters["P(Secondary Cluster)"] < .2, "Secondary Cluster"] = 0

pf = pf.assign(Mean_Primary_Cluster =  mean_clusters["Primary Cluster"])
pf = pf.assign(Mean_Primary_Cluster_Prob =  mean_clusters["P(Primary Cluster)"])

pf = pf.assign(Mean_Secondary_Cluster =  mean_clusters["Secondary Cluster"])
pf = pf.assign(Mean_Secondary_Cluster_Prob =  mean_clusters["P(Secondary Cluster)"])

pf["Mean_Primary_Cluster"] = pf["Mean_Primary_Cluster"].replace({1: "Floor Spacer",
                                                                     2: "Pass-First Playmaker",
                                                                     3: "Rim Runner",
                                                                     4: "Skilled Big",
                                                                     5: "Interior Offensive Hub",
                                                                     6: "Scoring Guard"
                                                                     })

pf["Mean_Secondary_Cluster"] = pf["Mean_Secondary_Cluster"].replace({0: "None",
                                                                         1: "Floor Spacer",
                                                                         2: "Pass-First Playmaker",
                                                                         3: "Rim Runner",
                                                                         4: "Skilled Big",
                                                                         5: "Interior Offensive Hub",
                                                                         6: "Scoring Guard"
                                                                         })

pf["Mean Full Role"] = pf["Mean_Primary_Cluster"] + " " + pf["Mean_Secondary_Cluster"]

pf["Mean Full Role"] = pf["Mean Full Role"].replace({"Pass-First Playmaker None": "Pass-First Playmaker",
                                                 "Scoring Guard None": "Scoring Guard",
                                                 "Skilled Big None": "Skilled Big",
                                                 "Rim Runner None": "Rim Runner",
                                                 "Floor Spacer None": "Floor Spacer",
                                                 "Interior Offensive Hub None": "Interior Offensive Hub",
                                                 "Pass-First Playmaker Interior Offensive Hub": "Floor General",
                                                 "Interior Offensive Hub Pass-First Playmaker": "Floor General",
                                                 "Scoring Guard Interior Offensive Hub": "Perimeter Offensive Hub",
                                                 "Interior Offensive Hub Scoring Guard": "Perimeter Offensive Hub",
                                                 "Scoring Guard Floor Spacer": "Shooting Guard",
                                                 "Floor Spacer Scoring Guard": "Shooting Guard",
                                                 "Skilled Big Rim Runner": "Limited Big",
                                                 "Rim Runner Skilled Big": "Limited Big",
                                                 "Scoring Guard Pass-First Playmaker": "Combo Guard",
                                                 "Pass-First Playmaker Scoring Guard": "Combo Guard",
                                                 "Skilled Big Floor Spacer": "Stretch Four",
                                                 "Floor Spacer Skilled Big": "Stretch Four",
                                                 "Skilled Big Interior Offensive Hub": "Versatile Big",
                                                 "Interior Offensive Hub Skilled Big": "Versatile Big",
                                                 "Skilled Big Pass-First Playmaker": "Glue Guy",
                                                 "Pass-First Playmaker Skilled Big": "Glue Guy",
                                                 "Interior Offensive Hub Floor Spacer": "Catch-and-Score",
                                                 "Floor Spacer Interior Offensive Hub": "Catch-and-Score",
                                                 "Pass-First Playmaker Floor Spacer": "Glue Guy",
                                                 "Floor Spacer Pass-First Playmaker": "Glue Guy",
                                                 "Scoring Guard Skilled Big": "Catch-and-Score",
                                                 "Skilled Big Scoring Guard": "Catch-and-Score",
                                                 "Rim Runner Pass-First Playmaker": "Versatile Big",
                                                 "Pass-First Playmaker Rim Runner": "Versatile Big",
                                                 "Interior Offensive Hub Rim Runner": "Versatile Big",
                                                 "Rim Runner Interior Offensive Hub": "Versatile Big",
                                                 "Rim Runner Floor Spacer": "Limited Big",
                                                 "Floor Spacer Rim Runner": "Limited Big"
                                                })

floor_clusters = pd.DataFrame(np.round(role_cluster.predict_proba(floor_pca), 3), 
                          columns=["P(1)", "P(2)", "P(3)", "P(4)", "P(5)", "P(6)"])

arank = floor_clusters.apply(np.argsort, axis=1)
brank = floor_clusters.apply(np.sort, axis=1)

floor_clusters["Primary Cluster"] = (arank.values[:, ::-1][:,:2][:,0]+1)
floor_clusters["P(Primary Cluster)"] = brank.str[-1]
floor_clusters["Secondary Cluster"] = (arank.values[:, ::-1][:,:2][:,1]+1)
floor_clusters["P(Secondary Cluster)"] = brank.str[-2]

floor_clusters.loc[floor_clusters["P(Secondary Cluster)"] < .2, "Secondary Cluster"] = 0

pf = pf.assign(floor_Primary_Cluster =  floor_clusters["Primary Cluster"])
pf = pf.assign(floor_Primary_Cluster_Prob =  floor_clusters["P(Primary Cluster)"])

pf = pf.assign(floor_Secondary_Cluster =  floor_clusters["Secondary Cluster"])
pf = pf.assign(floor_Secondary_Cluster_Prob =  floor_clusters["P(Secondary Cluster)"])

pf["floor_Primary_Cluster"] = pf["floor_Primary_Cluster"].replace({1: "Floor Spacer",
                                                                     2: "Pass-First Playmaker",
                                                                     3: "Rim Runner",
                                                                     4: "Skilled Big",
                                                                     5: "Interior Offensive Hub",
                                                                     6: "Scoring Guard"
                                                                     })

pf["floor_Secondary_Cluster"] = pf["floor_Secondary_Cluster"].replace({0: "None",
                                                                         1: "Floor Spacer",
                                                                         2: "Pass-First Playmaker",
                                                                         3: "Rim Runner",
                                                                         4: "Skilled Big",
                                                                         5: "Interior Offensive Hub",
                                                                         6: "Scoring Guard"
                                                                         })

pf["floor Full Role"] = pf["floor_Primary_Cluster"] + " " + pf["floor_Secondary_Cluster"]

pf["floor Full Role"] = pf["floor Full Role"].replace({"Pass-First Playmaker None": "Pass-First Playmaker",
                                                 "Scoring Guard None": "Scoring Guard",
                                                 "Skilled Big None": "Skilled Big",
                                                 "Rim Runner None": "Rim Runner",
                                                 "Floor Spacer None": "Floor Spacer",
                                                 "Interior Offensive Hub None": "Interior Offensive Hub",
                                                 "Pass-First Playmaker Interior Offensive Hub": "Floor General",
                                                 "Interior Offensive Hub Pass-First Playmaker": "Floor General",
                                                 "Scoring Guard Interior Offensive Hub": "Perimeter Offensive Hub",
                                                 "Interior Offensive Hub Scoring Guard": "Perimeter Offensive Hub",
                                                 "Scoring Guard Floor Spacer": "Shooting Guard",
                                                 "Floor Spacer Scoring Guard": "Shooting Guard",
                                                 "Skilled Big Rim Runner": "Limited Big",
                                                 "Rim Runner Skilled Big": "Limited Big",
                                                 "Scoring Guard Pass-First Playmaker": "Combo Guard",
                                                 "Pass-First Playmaker Scoring Guard": "Combo Guard",
                                                 "Skilled Big Floor Spacer": "Stretch Four",
                                                 "Floor Spacer Skilled Big": "Stretch Four",
                                                 "Skilled Big Interior Offensive Hub": "Versatile Big",
                                                 "Interior Offensive Hub Skilled Big": "Versatile Big",
                                                 "Skilled Big Pass-First Playmaker": "Glue Guy",
                                                 "Pass-First Playmaker Skilled Big": "Glue Guy",
                                                 "Interior Offensive Hub Floor Spacer": "Catch-and-Score",
                                                 "Floor Spacer Interior Offensive Hub": "Catch-and-Score",
                                                 "Pass-First Playmaker Floor Spacer": "Glue Guy",
                                                 "Floor Spacer Pass-First Playmaker": "Glue Guy",
                                                 "Scoring Guard Skilled Big": "Catch-and-Score",
                                                 "Skilled Big Scoring Guard": "Catch-and-Score",
                                                 "Rim Runner Pass-First Playmaker": "Versatile Big",
                                                 "Pass-First Playmaker Rim Runner": "Versatile Big",
                                                 "Interior Offensive Hub Rim Runner": "Versatile Big",
                                                 "Rim Runner Interior Offensive Hub": "Versatile Big",
                                                 "Rim Runner Floor Spacer": "Limited Big",
                                                 "Floor Spacer Rim Runner": "Limited Big"
                                                })

ceiling_clusters = pd.DataFrame(np.round(role_cluster.predict_proba(ceiling_pca), 3), 
                          columns=["P(1)", "P(2)", "P(3)", "P(4)", "P(5)", "P(6)"])

arank = ceiling_clusters.apply(np.argsort, axis=1)
brank = ceiling_clusters.apply(np.sort, axis=1)

ceiling_clusters["Primary Cluster"] = (arank.values[:, ::-1][:,:2][:,0]+1)
ceiling_clusters["P(Primary Cluster)"] = brank.str[-1]
ceiling_clusters["Secondary Cluster"] = (arank.values[:, ::-1][:,:2][:,1]+1)
ceiling_clusters["P(Secondary Cluster)"] = brank.str[-2]

ceiling_clusters.loc[ceiling_clusters["P(Secondary Cluster)"] < .2, "Secondary Cluster"] = 0

pf = pf.assign(ceiling_Primary_Cluster =  ceiling_clusters["Primary Cluster"])
pf = pf.assign(ceiling_Primary_Cluster_Prob =  ceiling_clusters["P(Primary Cluster)"])

pf = pf.assign(ceiling_Secondary_Cluster =  ceiling_clusters["Secondary Cluster"])
pf = pf.assign(ceiling_Secondary_Cluster_Prob =  ceiling_clusters["P(Secondary Cluster)"])

pf["ceiling_Primary_Cluster"] = pf["ceiling_Primary_Cluster"].replace({1: "Floor Spacer",
                                                                     2: "Pass-First Playmaker",
                                                                     3: "Rim Runner",
                                                                     4: "Skilled Big",
                                                                     5: "Interior Offensive Hub",
                                                                     6: "Scoring Guard"
                                                                     })

pf["ceiling_Secondary_Cluster"] = pf["ceiling_Secondary_Cluster"].replace({0: "None",
                                                                         1: "Floor Spacer",
                                                                         2: "Pass-First Playmaker",
                                                                         3: "Rim Runner",
                                                                         4: "Skilled Big",
                                                                         5: "Interior Offensive Hub",
                                                                         6: "Scoring Guard"
                                                                         })

pf["ceiling Full Role"] = pf["ceiling_Primary_Cluster"] + " " + pf["ceiling_Secondary_Cluster"]

pf["ceiling Full Role"] = pf["ceiling Full Role"].replace({"Pass-First Playmaker None": "Pass-First Playmaker",
                                                 "Scoring Guard None": "Scoring Guard",
                                                 "Skilled Big None": "Skilled Big",
                                                 "Rim Runner None": "Rim Runner",
                                                 "Floor Spacer None": "Floor Spacer",
                                                 "Interior Offensive Hub None": "Interior Offensive Hub",
                                                 "Pass-First Playmaker Interior Offensive Hub": "Floor General",
                                                 "Interior Offensive Hub Pass-First Playmaker": "Floor General",
                                                 "Scoring Guard Interior Offensive Hub": "Perimeter Offensive Hub",
                                                 "Interior Offensive Hub Scoring Guard": "Perimeter Offensive Hub",
                                                 "Scoring Guard Floor Spacer": "Shooting Guard",
                                                 "Floor Spacer Scoring Guard": "Shooting Guard",
                                                 "Skilled Big Rim Runner": "Limited Big",
                                                 "Rim Runner Skilled Big": "Limited Big",
                                                 "Scoring Guard Pass-First Playmaker": "Combo Guard",
                                                 "Pass-First Playmaker Scoring Guard": "Combo Guard",
                                                 "Skilled Big Floor Spacer": "Stretch Four",
                                                 "Floor Spacer Skilled Big": "Stretch Four",
                                                 "Skilled Big Interior Offensive Hub": "Versatile Big",
                                                 "Interior Offensive Hub Skilled Big": "Versatile Big",
                                                 "Skilled Big Pass-First Playmaker": "Glue Guy",
                                                 "Pass-First Playmaker Skilled Big": "Glue Guy",
                                                 "Interior Offensive Hub Floor Spacer": "Catch-and-Score",
                                                 "Floor Spacer Interior Offensive Hub": "Catch-and-Score",
                                                 "Pass-First Playmaker Floor Spacer": "Glue Guy",
                                                 "Floor Spacer Pass-First Playmaker": "Glue Guy",
                                                 "Scoring Guard Skilled Big": "Catch-and-Score",
                                                 "Skilled Big Scoring Guard": "Catch-and-Score",
                                                 "Rim Runner Pass-First Playmaker": "Versatile Big",
                                                 "Pass-First Playmaker Rim Runner": "Versatile Big",
                                                 "Interior Offensive Hub Rim Runner": "Versatile Big",
                                                 "Rim Runner Interior Offensive Hub": "Versatile Big",
                                                 "Rim Runner Floor Spacer": "Limited Big",
                                                 "Floor Spacer Rim Runner": "Limited Big"
                                                })



---------

# Player Comps

In [45]:
peaks = pd.read_csv("ncaa_to_peak.csv")

knn_pca_attributes = ["rookie_Age", "ncaa_usg", "ncaa_oreb%", "ncaa_dreb%", "ncaa_ast%", "ncaa_to%",
                      "ncaa_3p%", "ncaa_blk%", "ncaa_stl%", "ncaa_ast_tov_ratio", "ncaa_rim_fg%", "ncaa_mid_fg%",
                      "ncaa_dgbpm", "ncaa_ppp", "ncaa_rim_prop", "ncaa_mid_prop", "ncaa_3pt_prop", "STANDING REACH",
                      "VERTICAL LEAP", "LANE AGILITY", "consensus_tier"
                     ]

knn_X = prospects_2.loc[:,knn_pca_attributes]
knn_X = knn_X.rename({"rookie_Age": "prospect_age"},axis=1)

knn_scaler = joblib.load(parent + "/prospect_modeling/prospect_models/knn_models/knn_scaler.sav")
knn_pca = joblib.load(parent + "/prospect_modeling/prospect_models/knn_models/knn_pca.sav")

knn_X = knn_scaler.transform(knn_X)

knn_pca_X = knn_pca.transform(knn_X)

knn_neighbors = joblib.load(parent + "/prospect_modeling/prospect_models/knn_models/knn_players.sav")

knn_comps = knn_neighbors.kneighbors(knn_pca_X)[1]

floor_comps = pd.DataFrame(columns=["floor_comp", "floor_comp_id", "floor_comp_pts", "floor_comp_reb", "floor_comp_ast", "floor_comp_dbpm"])
med_comps = pd.DataFrame(columns=["med_comp", "med_comp_id", "med_comp_pts", "med_comp_reb", "med_comp_ast", "med_comp_dbpm"])
ceiling_comps = pd.DataFrame(columns=["ceiling_comp", "ceiling_comp_id", "ceiling_comp_pts", "ceiling_comp_reb", "ceiling_comp_ast", "ceiling_comp_dbpm"])

for i in range(pf.shape[0]):
    outcomes = peaks.iloc[knn_comps[i]].sort_values("peak_WS")
    outcome_columns = ["ncaa_player_name", "peak_Player-additional", "peak_PTS", "peak_TRB", "peak_AST", "peak_DBPM"]
    
    floor_outcome = outcomes[outcome_columns].iloc[1].tolist()
    med_outcome = outcomes[outcome_columns].iloc[3].tolist()
    ceiling_outcome = outcomes[outcome_columns].iloc[5].tolist()
    
    floor_comps.loc[len(floor_comps), :] = floor_outcome
    med_comps.loc[len(med_comps), :] = med_outcome
    ceiling_comps.loc[len(ceiling_comps), :] = ceiling_outcome
    
pf = pd.concat([pf, floor_comps], axis=1)
pf = pd.concat([pf, med_comps], axis=1)
pf = pd.concat([pf, ceiling_comps], axis=1)

-------

In [46]:
pf.to_csv("prospects.csv", index=False)