In [1]:
import pandas as pd
import numpy as np
from scipy.spatial import distance
from sklearn import preprocessing as pre
import plotly.express as px


<h3> First we retrive the data </h3>


In [2]:
#loading dataframes
past_df = pd.read_csv('csv_files/LotteryPickStatsHistory.csv')
current_df = pd.read_csv('csv_files/CurrentRookies.csv')

past_df = past_df.fillna(0.000).replace(' ',0)
current_df = current_df.fillna(0.000).replace(' ',0)

past_df.columns



Index(['Player', 'School', 'Year', 'Pick', 'G', 'GS', 'MP', 'FG', 'FGA', 'FG%',
       '2P', '2PA', '2P%', '3P', '3PA', '3P%', 'FT', 'FTA', 'FT%', 'ORB',
       'DRB', 'TRB', 'AST', 'STL', 'BLK', 'TOV', 'PF', 'PTS', 'SOS', 'PER',
       'TS%', 'eFG%', '3PAr', 'FTr', 'PProd', 'ORB%', 'DRB%', 'TRB%', 'AST%',
       'STL%', 'BLK%', 'TOV%', 'USG%', 'OWS', 'DWS', 'WS', 'WS/40', 'OBPM',
       'DBPM', 'BPM'],
      dtype='object')

In [3]:
def make_per_36(df): 
    for index , player in df.iterrows():
        if player['MP'] == 0:
            continue
        else:
            multiplier = 36 / player['MP']

        df.loc[index,['PTS', 'AST', 'TRB','STL','BLK','PF']] *= multiplier
       
    
        
        if index % int(len(df)*0.25) == 0:
            print(str(index) + " players stats adjusted")
        
    return df

In [4]:
# past_df = make_per_36(past_df)
# current_df = make_per_36(current_df)

<h3> Normalize values </h3>

In [5]:
#here we are going to take the stats that are important for our comparisons
normalized_past_df = past_df[['Pick','PTS', 'AST', 'TRB','STL','BLK','TS%','3PAr','3P%', 'FTr', 'FT%', 'TOV%','PF','SOS']]
normalized_current_df = current_df[[ 'Pick','PTS', 'AST', 'TRB','STL','BLK','TS%','3PAr','3P%', 'FTr','FT%', 'TOV%','PF','SOS']]


normalized_df = pd.concat([normalized_past_df, normalized_current_df], ignore_index=True)


In [6]:
#now we have to normalize the data 
x = normalized_df.values #returns a numpy array
min_max_scaler = pre.MinMaxScaler()
x_scaled = min_max_scaler.fit_transform(x)
normalized_df = pd.DataFrame(x_scaled)

In [7]:
normalized_past_df = normalized_df.iloc[0:412]
normalized_current_df = normalized_df.iloc[412:]


normalized_past_df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13
0,0.000000,0.357798,0.238095,0.758621,0.333333,0.385965,0.479452,0.066343,0.291,0.532333,0.607812,0.540351,0.731707,0.787121
1,0.076923,0.500000,0.892857,0.181034,0.757576,0.087719,0.311644,0.456311,0.369,0.132794,0.607812,0.592982,0.853659,0.681213
2,0.153846,1.000000,0.392857,0.094828,0.454545,0.017544,0.386986,0.533981,0.372,0.139723,0.887500,0.442105,0.512195,0.697179
3,0.230769,0.651376,0.309524,0.293103,0.393939,0.087719,0.393836,0.817152,0.422,0.050808,0.753125,0.389474,0.512195,0.783928
4,0.307692,0.220183,0.321429,0.077586,0.515152,0.052632,0.376712,0.339806,0.374,0.217090,0.718750,0.519298,0.414634,0.866418
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
407,0.230769,0.091743,0.083333,0.181034,0.242424,0.175439,0.291096,0.391586,0.320,0.213626,0.848437,0.600000,0.390244,0.730176
408,0.307692,0.261468,0.202381,0.215517,0.212121,0.157895,0.407534,0.466019,0.286,0.437644,0.589062,0.533333,0.658537,0.716871
409,0.384615,0.412844,0.095238,0.577586,0.303030,0.473684,0.606164,0.022654,0.250,0.383372,0.664062,0.480702,0.658537,0.707823
410,0.538462,0.587156,0.226190,0.482759,0.242424,0.210526,0.739726,0.343042,0.390,0.221709,0.635938,0.456140,0.390244,0.519957


<h3> Get Similarities </h3>

In [30]:
past_names = past_df['Player']
current_names = current_df['Player']

player_distances = dict()

i = 0 
for _, curr_player in normalized_current_df.iterrows():
     

    cos_dist = []
    e_dist = []
    

    
    j=0
    for j,past_player in normalized_past_df.iterrows():
       
    

        e_dist.append([past_names[j], 1/distance.euclidean( list(curr_player) , past_player.tolist())])
        cos_dist.append([past_names[j], distance.cosine( list(curr_player) , past_player.tolist())])
        j+=1
    
    e_most_simlar = sorted(e_dist, key=lambda x: x[1], reverse=True)
    cos_most_simlar = sorted(cos_dist, key=lambda x: x[1])
    
    player_distances[current_names[i]] = (e_most_simlar,cos_most_simlar)

    i+=1



In [31]:
for key in player_distances.keys():
    
    print(f"{key}: \n {player_distances[key][0][:10]}")


Cade Cunningham: 
 [['O.J. Mayo', 3.3912845539217926], ["D'Angelo Russell", 2.7405785003667837], ['Ben Gordon', 2.713363334222214], ['Jayson Tatum', 2.6195487715654298], ['Chauncey Billups', 2.6002713299851328], ['Anthon Edwards', 2.439505712208161], ['James Harden', 2.332746160384692], ['Bradley Beal', 2.21909616210765], ['Andrew Wiggins', 2.193725877964551], ['Jim Jackson', 2.105282422660119]]
Jalen Green: 
 [['O.J. Mayo', 2.9575207914287995], ['Buddy Hield', 2.7304919824950318], ['Ben Gordon', 2.6882788260244634], ['Dennis Scott', 2.651602969249742], ['Mike Dunleavy', 2.5613123935116753], ['Ray Allen', 2.405242959113175], ["D'Angelo Russell", 2.346168426236998], ['Stephon Marbury', 2.239950494985339], ['Richard Hamilton', 2.176732020118525], ['Brandon Knight', 2.158621249722975]]
Evan Mobley: 
 [['Chris Bosh', 2.7654205711378057], ['Wendell Carter', 2.3603703564690632], ['Tim Duncan', 2.251362171658879], ['Kevin Love', 2.2289786923305313], ['Jerry Stackhouse', 2.1797266654081313], [

In [41]:
for player in player_distances.keys():

    fig = px.bar(player_distances[player][0][0:10], x=0, y=1,title=f"{player} Draft Comparisons", labels={
                         "0": "Player",
                         "1": "Similarity"    
                     },)
    fig.show()


    fig.write_image(f"images/{player.replace(' ','')}Graph.png")


In [22]:
player_distances[player][0][0]

['O.J. Mayo', 0.29487351594945554]