In [None]:
import pandas as pd
from scipy import spatial
from operator import itemgetter
from sklearn.preprocessing import MinMaxScaler
import matplotlib.pyplot as plt
import numpy as np

In [None]:
df_historical = pd.read_csv('historical-drafts.csv')
df_current = pd.read_csv('current-draft.csv')

In [None]:
df_historical.head()

In [None]:
df_historical.columns.values

In [None]:
def df_prep(df, features, counting_features):
    
    df = df.dropna(subset = features)
    
    df[counting_features] * 40 / df['MP']
    
    names = df['Player']
    df = df[features].dropna(subset = features)
    
    scaler = MinMaxScaler()
    
    df_scaled = pd.DataFrame(scaler.fit_transform(df), columns = df.columns)
    return df_scaled, names

In [None]:
features = ['G', 'PTS', 'AST', 'TRB', 'STL', 'BLK', 'TOV', 'PF', 'TS%', '3PAr', 'FTr', 'WS']
counting_features = ['PTS', 'AST', 'TRB', 'STL', 'BLK', 'TOV', 'PF']

df_historical_sim, historical_names = df_prep(df_historical, features, counting_features)
df_current_sim, current_names = df_prep(df_current, features, counting_features)

In [None]:
def list_sort(list_1, reverse_bool):
    
    names_list = [[i, j] for i, j in zip(list_1, historical_names)]
    sorted_list = sorted(names_list, key = itemgetter(0), reverse = reverse_bool)
    
    return sorted_list

In [None]:
def similarity(name):
    
    x = list(current_names).index(name)
    test_stat = df_current_sim.iloc[[x]]
    
    cos = []
    euclid = []

    for i in df_historical_sim.values:

        cos.append(1 - spatial.distance.cosine(test_stat, i))
        euclid.append(spatial.distance.euclidean(test_stat, i))

        cos_list = list_sort(cos, True)
        euclid_list = list_sort(euclid, False)
        
    print(name + "\n")
    print("Cosine:")
    for i in cos_list[0:5]:
        print(i[0], i[1])
        
    print("\nEuclid:")
    for i in euclid_list[0:5]:
        print(i[0], i[1])
        
    print("-" * 20)

In [None]:
for i in current_names:
    similarity(i)

In [None]:
def similarity_plot(name):
    
    x = list(current_names).index(name)
    test_stat = df_current_sim.iloc[[x]]
    
    cos = []
    euclid = []

    for i in df_historical_sim.values:

        cos.append(1 - spatial.distance.cosine(test_stat, i))
        euclid.append(spatial.distance.euclidean(test_stat, i))

        cos_list = list_sort(cos, True)
        euclid_list = list_sort(euclid, False)
        
    plt.style.use('fivethirtyeight')
    fig, ax = plt.subplots()
    
    y_cos = [i[0] for i in cos_list[0:5]]
    y_euclid = [i[0] for i in euclid_list[0:5]]
    x = np.arange(len(y_cos))
    
    ax.bar(x - .22, y_cos, width = .4, color = 'C0', edgecolor = 'white', linewidth = 1.5, label = 'Cosine')
    ax.bar(x + .22, y_euclid, width = .4, color = 'C1', edgecolor = 'white', linewidth = 1.5, label = 'Euclid')
    
    ax.xaxis.set_visible(False)
    
    labels = [i[1] for i in cos_list[0:5]]
    labels2 = [i[1] for i in euclid_list[0:5]]
    
    labels += labels2

    rects = ax.patches
    for rect, label in zip(rects, labels):
        ax.text(rect.get_x() + rect.get_width() / 1.75, .04, label,
        ha='center', va='bottom', rotation = 'vertical', color = 'black')
        
    fig.suptitle("%s similarity" % name, size = 18, weight = 'bold', y = 1.005)
    ax.set_title('Most similar players on left, least similar on right', size = 14, fontname = 'Rockwell')
    
    ax.legend(loc = 'best', ncol = 2, prop={'size': 11, "family": "Rockwell"})
    
    fig.text(x = 0, y = 0.01,
        s = '____________________________________________________________',
        fontsize = 14, color = 'grey', horizontalalignment='left')

    fig.text(x = 0, y = -.05,
        s = 'https://dribbleanalytics.blog                     ',
        fontsize = 14, fontname = 'Rockwell', color = 'grey', horizontalalignment='left')
    
    fig.savefig('%s.png' % name.replace(' ', '-').lower(), dpi = 400, bbox_inches = 'tight')

In [None]:
for i in current_names:
    similarity_plot(i)