In [1]:
import pandas as pd
import numpy as np
import math
import matplotlib.pyplot as plt

In [2]:
players = pd.read_csv('./data/nba-player-2014.csv', na_values='-')
players.fillna(0, inplace=True)
players.head(50)

Unnamed: 0,FTA/G,MPG,FT%,PPG,GP,FGM/G,3FGA/G,3FG%,HIGH,PPS,3FGM/G,FTM/G,FG%,PTS/48,GS,fullname,PTS,FGA/G
0,9.8,34.4,0.835,28.1,67,9.4,4.3,0.299,54,1.28,1.3,8.1,0.426,39.3,67,"Westbrook, Russell",1886,22.0
1,10.2,36.8,0.868,27.4,81,8.0,6.9,0.375,51,1.51,2.6,8.8,0.44,35.7,81,"Harden, James",2217,18.1
2,7.7,36.1,0.71,25.3,69,9.0,4.9,0.354,42,1.36,1.7,5.4,0.488,33.6,69,"James, LeBron",1743,18.5
3,6.8,36.1,0.805,24.4,68,9.4,0.2,0.083,43,1.38,0.0,5.5,0.535,32.4,68,"Davis, Anthony",1656,17.6
4,9.2,34.1,0.782,24.1,59,8.4,0.1,0.25,39,1.33,0.0,7.2,0.467,33.9,59,"Cousins, DeMarcus",1421,18.1
5,4.2,32.7,0.914,23.8,80,8.2,8.1,0.443,51,1.42,3.6,3.9,0.487,34.9,80,"Curry, Stephen",1900,16.8
6,5.1,35.4,0.845,23.4,71,9.3,1.5,0.352,39,1.17,0.5,4.3,0.466,31.7,71,"Aldridge, LaMarcus",1661,19.9
7,6.4,35.2,0.728,21.9,67,8.6,0.4,0.4,45,1.28,0.1,4.6,0.502,29.9,67,"Griffin, Blake",1469,17.1
8,4.9,36.4,0.863,21.7,75,7.7,5.0,0.415,57,1.32,2.1,4.2,0.468,28.6,75,"Irving, Kyrie",1628,16.5
9,3.3,31.9,0.879,21.7,77,7.8,7.1,0.439,52,1.28,3.1,2.9,0.463,32.6,77,"Thompson, Klay",1668,16.9


In [3]:
james = players[players.fullname == 'James, LeBron']
james

Unnamed: 0,FTA/G,MPG,FT%,PPG,GP,FGM/G,3FGA/G,3FG%,HIGH,PPS,3FGM/G,FTM/G,FG%,PTS/48,GS,fullname,PTS,FGA/G
2,7.7,36.1,0.71,25.3,69,9.0,4.9,0.354,42,1.36,1.7,5.4,0.488,33.6,69,"James, LeBron",1743,18.5


In [4]:
players.dtypes

FTA/G       float64
MPG         float64
FT%         float64
PPG         float64
GP            int64
FGM/G       float64
3FGA/G      float64
3FG%        float64
HIGH          int64
PPS         float64
3FGM/G      float64
FTM/G       float64
FG%         float64
PTS/48      float64
GS            int64
fullname     object
PTS           int64
FGA/G       float64
dtype: object

In [5]:
distance_excluded_columns = ['fullname',]
distance_stat = players.drop(distance_excluded_columns, axis='columns')
james = distance_stat[players.fullname == 'James, LeBron']
james

Unnamed: 0,FTA/G,MPG,FT%,PPG,GP,FGM/G,3FGA/G,3FG%,HIGH,PPS,3FGM/G,FTM/G,FG%,PTS/48,GS,PTS,FGA/G
2,7.7,36.1,0.71,25.3,69,9.0,4.9,0.354,42,1.36,1.7,5.4,0.488,33.6,69,1743,18.5


In [6]:
def euclidean_distance(row1, row2, columns):
    """
    A simple euclidean distance function
    """
    inner_value = 0
    for k in columns:
        inner_value += (row1[k] - row2[k]) ** 2
    return math.sqrt(inner_value)
euclidean_distance(players.iloc[0], james, james.columns)

143.7667516987151

In [7]:
# distance_stat = players[distance_columns]
normalized_stat = (distance_stat - distance_stat.mean()) / distance_stat.std()
james = normalized_stat[players.fullname == 'James, LeBron']
james

Unnamed: 0,FTA/G,MPG,FT%,PPG,GP,FGM/G,3FGA/G,3FG%,HIGH,PPS,3FGM/G,FTM/G,FG%,PTS/48,GS,PTS,FGA/G
2,3.24496,1.667945,-0.274915,2.9322,-0.405478,2.76327,1.357214,0.511876,1.898643,1.177417,1.200016,2.676823,0.567836,2.613151,1.02999,2.609888,2.487154


In [8]:
euclidean_distance(normalized_stat.iloc[0], james, james.columns)

3.7220213073253072

In [9]:
distances = normalized_stat.apply(lambda r: euclidean_distance(r, james, james.columns), axis=1)
type(distances)

pandas.core.series.Series

In [10]:
distance_frame = pd.DataFrame(data={"dist": distances}, index=distances.index)
distance_frame.sort_values(by='dist', inplace=True)
distance_frame[1:11].index

Int64Index([11, 15, 10, 8, 14, 6, 7, 0, 12, 3], dtype='int64')

In [11]:
players.iloc[distance_frame[0:11].index]

Unnamed: 0,FTA/G,MPG,FT%,PPG,GP,FGM/G,3FGA/G,3FG%,HIGH,PPS,3FGM/G,FTM/G,FG%,PTS/48,GS,fullname,PTS,FGA/G
2,7.7,36.1,0.71,25.3,69,9.0,4.9,0.354,42,1.36,1.7,5.4,0.488,33.6,69,"James, LeBron",1743,18.5
11,5.8,35.4,0.858,21.1,68,7.5,3.2,0.359,40,1.28,1.2,5.0,0.455,28.5,67,"Gay, Rudy",1432,16.4
15,6.1,34.4,0.812,19.3,76,6.4,4.3,0.364,33,1.35,1.6,4.9,0.445,26.8,76,"Hayward, Gordon",1463,14.3
10,6.0,31.8,0.768,21.5,62,8.2,1.6,0.284,42,1.23,0.5,4.6,0.47,32.4,62,"Wade, Dwyane",1331,17.5
8,4.9,36.4,0.863,21.7,75,7.7,5.0,0.415,57,1.32,2.1,4.2,0.468,28.6,75,"Irving, Kyrie",1628,16.5
14,7.1,38.7,0.834,20.0,65,6.5,3.0,0.378,35,1.43,1.1,5.9,0.462,24.8,65,"Butler, Jimmy",1301,14.0
6,5.1,35.4,0.845,23.4,71,9.3,1.5,0.352,39,1.17,0.5,4.3,0.466,31.7,71,"Aldridge, LaMarcus",1661,19.9
7,6.4,35.2,0.728,21.9,67,8.6,0.4,0.4,45,1.28,0.1,4.6,0.502,29.9,67,"Griffin, Blake",1469,17.1
0,9.8,34.4,0.835,28.1,67,9.4,4.3,0.299,54,1.28,1.3,8.1,0.426,39.3,67,"Westbrook, Russell",1886,22.0
12,4.9,35.7,0.864,21.0,82,7.2,7.0,0.343,43,1.26,2.4,4.2,0.434,28.2,82,"Lillard, Damian",1720,16.6
