In [1]:
import pandas as pd
import numpy as np
import math
from sklearn.cluster import KMeans
players = pd.read_csv('./data/nba-player-2014.csv', na_values='-')
players.fillna(0, inplace=True)

In [2]:
distance_excluded_columns = ['fullname']
distance_columns = list(set(players.columns) - set(distance_excluded_columns))
distance_stat = players[distance_columns]
normalized_stat = (distance_stat - distance_stat.mean()) / distance_stat.std()

In [3]:
kmeans = KMeans(n_clusters=5).fit(normalized_stat)
kmeans.labels_

array([3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
       3, 3, 3, 3, 2, 3, 3, 0, 3, 3, 2, 3, 3, 0, 3, 3, 3, 2, 2, 3, 0, 2,
       0, 0, 0, 3, 2, 0, 0, 2, 3, 0, 0, 0, 0, 0, 0, 0, 2, 0, 2, 0, 0, 2,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 2, 0, 0, 2, 0, 0, 0, 0,
       0, 2, 0, 2, 2, 0, 0, 0, 2, 0, 2, 0, 0, 0, 2, 0, 0, 2, 0, 0, 0, 0,
       2, 0, 2, 2, 0, 2, 0, 2, 2, 4, 0, 2, 0, 0, 0, 0, 0, 0, 4, 4, 0, 2,
       0, 2, 2, 2, 0, 0, 0, 0, 2, 0, 2, 4, 0, 4, 4, 4, 0, 2, 4, 4, 4, 2,
       1, 1, 2, 2, 1, 4, 1, 4, 4, 1, 4, 4, 4, 4, 1, 4, 4, 4, 4, 4, 4, 4,
       4, 1, 1, 4, 4, 4, 4, 4, 4, 4, 4, 1, 1, 4, 1, 1, 4, 4, 4, 4, 4, 1,
       4, 1, 4, 4, 4, 4, 4, 4, 1, 1, 1, 1, 4, 4, 4, 1, 4, 4, 1, 4, 4, 4,
       4, 1, 4, 1, 4, 4, 4, 4, 4, 4, 4, 1, 4, 1, 1, 1, 4, 4, 1, 4, 4, 1,
       4, 4, 4, 1, 1, 1, 4, 4, 1, 4, 1, 1, 4, 4, 4, 1, 1, 4, 4, 1, 1, 1,
       1, 1, 1])

In [4]:
players['Group'] = kmeans.labels_

In [5]:
players.groupby(kmeans.labels_).count().max(axis=1)

0    71
1    42
2    37
3    38
4    79
dtype: int64

In [6]:
def calculate_distance(s1, s2):
    return np.sqrt(np.power(s1 - s2, 2).sum(axis=1))

james = players[players.fullname == 'James, LeBron']
james_group = players.Group == james.Group.iloc[0]

distances = calculate_distance(normalized_stat[james_group], normalized_stat.loc[james.index].iloc[0])
distances.name = 'Distance'
pd.concat([players[james_group], distances], axis=1).sort_values(by='Distance')

Unnamed: 0,FTA/G,MPG,FT%,PPG,GP,FGM/G,3FGA/G,3FG%,HIGH,PPS,3FGM/G,FTM/G,FG%,PTS/48,GS,fullname,PTS,FGA/G,Group,Distance
2,7.7,36.1,0.71,25.3,69,9.0,4.9,0.354,42,1.36,1.7,5.4,0.488,33.6,69,"James, LeBron",1743,18.5,3,0.0
11,5.8,35.4,0.858,21.1,68,7.5,3.2,0.359,40,1.28,1.2,5.0,0.455,28.5,67,"Gay, Rudy",1432,16.4,3,2.928869
15,6.1,34.4,0.812,19.3,76,6.4,4.3,0.364,33,1.35,1.6,4.9,0.445,26.8,76,"Hayward, Gordon",1463,14.3,3,3.423804
10,6.0,31.8,0.768,21.5,62,8.2,1.6,0.284,42,1.23,0.5,4.6,0.47,32.4,62,"Wade, Dwyane",1331,17.5,3,3.485929
8,4.9,36.4,0.863,21.7,75,7.7,5.0,0.415,57,1.32,2.1,4.2,0.468,28.6,75,"Irving, Kyrie",1628,16.5,3,3.555443
14,7.1,38.7,0.834,20.0,65,6.5,3.0,0.378,35,1.43,1.1,5.9,0.462,24.8,65,"Butler, Jimmy",1301,14.0,3,3.649827
6,5.1,35.4,0.845,23.4,71,9.3,1.5,0.352,39,1.17,0.5,4.3,0.466,31.7,71,"Aldridge, LaMarcus",1661,19.9,3,3.667809
7,6.4,35.2,0.728,21.9,67,8.6,0.4,0.4,45,1.28,0.1,4.6,0.502,29.9,67,"Griffin, Blake",1469,17.1,3,3.685176
0,9.8,34.4,0.835,28.1,67,9.4,4.3,0.299,54,1.28,1.3,8.1,0.426,39.3,67,"Westbrook, Russell",1886,22.0,3,3.722021
12,4.9,35.7,0.864,21.0,82,7.2,7.0,0.343,43,1.26,2.4,4.2,0.434,28.2,82,"Lillard, Damian",1720,16.6,3,3.967624
