# NBA ROLE PLAYER KNN ANALYSIS

### IMPORTING DATA & LIBRARIES

In [1]:
# nba data
from nba_api.stats.endpoints import leagueleaders

# packages
import pandas as pd
import altair as alt
import matplotlib.pyplot as plt
import seaborn as sn

# scikit
from sklearn.cluster import DBSCAN, KMeans
from sklearn.feature_extraction.text import CountVectorizer

In [2]:
# Pull data for the top 1000 scorers by PTS column
top_1000 = leagueleaders.LeagueLeaders(
    season='2022-23',
    season_type_all_star='Regular Season',
    stat_category_abbreviation='PTS'
).get_data_frames()[0][:1000]
top_1000

Unnamed: 0,PLAYER_ID,RANK,PLAYER,TEAM_ID,TEAM,GP,MIN,FGM,FGA,FG_PCT,...,REB,AST,STL,BLK,TOV,PF,PTS,EFF,AST_TOV,STL_TOV
0,1628369,1,Jayson Tatum,1610612738,BOS,74,2732,727,1559,0.466,...,649,342,78,51,213,160,2225,2209,1.61,0.37
1,203954,2,Joel Embiid,1610612755,PHI,66,2284,728,1328,0.548,...,670,274,66,112,226,205,2183,2369,1.21,0.29
2,1629029,3,Luka Doncic,1610612742,DAL,66,2391,719,1449,0.496,...,569,529,90,33,236,166,2138,2214,2.24,0.38
3,1628983,4,Shai Gilgeous-Alexander,1610612760,OKC,68,2416,704,1381,0.510,...,329,371,112,65,192,192,2135,2073,1.93,0.58
4,203507,5,Giannis Antetokounmpo,1610612749,MIL,63,2024,707,1278,0.553,...,742,359,52,51,246,197,1959,2072,1.46,0.21
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
534,1631214,535,Alondes Williams,1610612751,BKN,1,5,0,0,0.000,...,1,0,0,0,2,1,0,-1,0.00,0.00
535,1629126,535,Deonte Burton,1610612758,SAC,2,7,0,2,0.000,...,0,0,0,0,0,0,0,-2,0.00,0.00
536,1628402,535,Frank Jackson,1610612762,UTA,1,5,0,3,0.000,...,2,1,0,0,0,0,0,0,0.00,0.00
537,1630701,535,Michael Foster Jr.,1610612755,PHI,1,1,0,0,0.000,...,0,0,0,0,0,0,0,0,0.00,0.00


In [3]:
top_1000.columns

Index(['PLAYER_ID', 'RANK', 'PLAYER', 'TEAM_ID', 'TEAM', 'GP', 'MIN', 'FGM',
       'FGA', 'FG_PCT', 'FG3M', 'FG3A', 'FG3_PCT', 'FTM', 'FTA', 'FT_PCT',
       'OREB', 'DREB', 'REB', 'AST', 'STL', 'BLK', 'TOV', 'PF', 'PTS', 'EFF',
       'AST_TOV', 'STL_TOV'],
      dtype='object')

### EDA

In [4]:
# make columns lowercase
top_1000.columns = map(str.lower, top_1000.columns)
top_1000.columns

Index(['player_id', 'rank', 'player', 'team_id', 'team', 'gp', 'min', 'fgm',
       'fga', 'fg_pct', 'fg3m', 'fg3a', 'fg3_pct', 'ftm', 'fta', 'ft_pct',
       'oreb', 'dreb', 'reb', 'ast', 'stl', 'blk', 'tov', 'pf', 'pts', 'eff',
       'ast_tov', 'stl_tov'],
      dtype='object')

In [5]:
# create per game averages
top_1000[['min', 'fgm',
       'fga', 'fg_pct', 'fg3m', 'fg3a', 'fg3_pct', 'ftm', 'fta', 'ft_pct',
       'oreb', 'dreb', 'reb', 'ast', 'stl', 'blk', 'tov', 'pf', 'pts', 'eff',
       'ast_tov', 'stl_tov']] = top_1000[['min', 'fgm',
       'fga', 'fg_pct', 'fg3m', 'fg3a', 'fg3_pct', 'ftm', 'fta', 'ft_pct',
       'oreb', 'dreb', 'reb', 'ast', 'stl', 'blk', 'tov', 'pf', 'pts', 'eff',
       'ast_tov', 'stl_tov']].div(top_1000.gp, axis=0)
top_1000

Unnamed: 0,player_id,rank,player,team_id,team,gp,min,fgm,fga,fg_pct,...,reb,ast,stl,blk,tov,pf,pts,eff,ast_tov,stl_tov
0,1628369,1,Jayson Tatum,1610612738,BOS,74,36.918919,9.824324,21.067568,0.006297,...,8.770270,4.621622,1.054054,0.689189,2.878378,2.162162,30.067568,29.851351,0.021757,0.005000
1,203954,2,Joel Embiid,1610612755,PHI,66,34.606061,11.030303,20.121212,0.008303,...,10.151515,4.151515,1.000000,1.696970,3.424242,3.106061,33.075758,35.893939,0.018333,0.004394
2,1629029,3,Luka Doncic,1610612742,DAL,66,36.227273,10.893939,21.954545,0.007515,...,8.621212,8.015152,1.363636,0.500000,3.575758,2.515152,32.393939,33.545455,0.033939,0.005758
3,1628983,4,Shai Gilgeous-Alexander,1610612760,OKC,68,35.529412,10.352941,20.308824,0.007500,...,4.838235,5.455882,1.647059,0.955882,2.823529,2.823529,31.397059,30.485294,0.028382,0.008529
4,203507,5,Giannis Antetokounmpo,1610612749,MIL,63,32.126984,11.222222,20.285714,0.008778,...,11.777778,5.698413,0.825397,0.809524,3.904762,3.126984,31.095238,32.888889,0.023175,0.003333
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
534,1631214,535,Alondes Williams,1610612751,BKN,1,5.000000,0.000000,0.000000,0.000000,...,1.000000,0.000000,0.000000,0.000000,2.000000,1.000000,0.000000,-1.000000,0.000000,0.000000
535,1629126,535,Deonte Burton,1610612758,SAC,2,3.500000,0.000000,1.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,-1.000000,0.000000,0.000000
536,1628402,535,Frank Jackson,1610612762,UTA,1,5.000000,0.000000,3.000000,0.000000,...,2.000000,1.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
537,1630701,535,Michael Foster Jr.,1610612755,PHI,1,1.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000


In [6]:
# rename columns
top_1000_avg = top_1000.rename(columns={'min':'mpg', 'fgm':'fgm_pg',
       'fga':'fga_pg', 'fg_pct':'fg_pct_pg', 'fg3m':'fg3m_pg', 'fg3a':'fg3a_pg', 'fg3_pct':'fg3_pct_pg', 'ftm':'ftm_pg', 'fta':'fta_pg', 'ft_pct':'ft_pct_pg',
       'oreb':'oreb_pg', 'dreb':'dreb_pg', 'reb':'rpg', 'ast':'apg', 'stl':'spg', 'blk':'bpg', 'tov':'tov_pg', 'pf':'pf_pg', 'pts':'ppg', 'eff':'eff_pg',
       'ast_tov':'ast_tov_pg', 'stl_tov':'stl:tov_pg'})
top_1000_avg

Unnamed: 0,player_id,rank,player,team_id,team,gp,mpg,fgm_pg,fga_pg,fg_pct_pg,...,rpg,apg,spg,bpg,tov_pg,pf_pg,ppg,eff_pg,ast_tov_pg,stl:tov_pg
0,1628369,1,Jayson Tatum,1610612738,BOS,74,36.918919,9.824324,21.067568,0.006297,...,8.770270,4.621622,1.054054,0.689189,2.878378,2.162162,30.067568,29.851351,0.021757,0.005000
1,203954,2,Joel Embiid,1610612755,PHI,66,34.606061,11.030303,20.121212,0.008303,...,10.151515,4.151515,1.000000,1.696970,3.424242,3.106061,33.075758,35.893939,0.018333,0.004394
2,1629029,3,Luka Doncic,1610612742,DAL,66,36.227273,10.893939,21.954545,0.007515,...,8.621212,8.015152,1.363636,0.500000,3.575758,2.515152,32.393939,33.545455,0.033939,0.005758
3,1628983,4,Shai Gilgeous-Alexander,1610612760,OKC,68,35.529412,10.352941,20.308824,0.007500,...,4.838235,5.455882,1.647059,0.955882,2.823529,2.823529,31.397059,30.485294,0.028382,0.008529
4,203507,5,Giannis Antetokounmpo,1610612749,MIL,63,32.126984,11.222222,20.285714,0.008778,...,11.777778,5.698413,0.825397,0.809524,3.904762,3.126984,31.095238,32.888889,0.023175,0.003333
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
534,1631214,535,Alondes Williams,1610612751,BKN,1,5.000000,0.000000,0.000000,0.000000,...,1.000000,0.000000,0.000000,0.000000,2.000000,1.000000,0.000000,-1.000000,0.000000,0.000000
535,1629126,535,Deonte Burton,1610612758,SAC,2,3.500000,0.000000,1.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,-1.000000,0.000000,0.000000
536,1628402,535,Frank Jackson,1610612762,UTA,1,5.000000,0.000000,3.000000,0.000000,...,2.000000,1.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
537,1630701,535,Michael Foster Jr.,1610612755,PHI,1,1.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000


In [7]:
# remove irrelevant cols
top_1000_avg = top_1000_avg.drop(columns = ['player_id','rank','team_id','team'])
top_1000_avg

Unnamed: 0,player,gp,mpg,fgm_pg,fga_pg,fg_pct_pg,fg3m_pg,fg3a_pg,fg3_pct_pg,ftm_pg,...,rpg,apg,spg,bpg,tov_pg,pf_pg,ppg,eff_pg,ast_tov_pg,stl:tov_pg
0,Jayson Tatum,74,36.918919,9.824324,21.067568,0.006297,3.243243,9.270270,0.004730,7.175676,...,8.770270,4.621622,1.054054,0.689189,2.878378,2.162162,30.067568,29.851351,0.021757,0.005000
1,Joel Embiid,66,34.606061,11.030303,20.121212,0.008303,1.000000,3.030303,0.005000,10.015152,...,10.151515,4.151515,1.000000,1.696970,3.424242,3.106061,33.075758,35.893939,0.018333,0.004394
2,Luka Doncic,66,36.227273,10.893939,21.954545,0.007515,2.803030,8.196970,0.005182,7.803030,...,8.621212,8.015152,1.363636,0.500000,3.575758,2.515152,32.393939,33.545455,0.033939,0.005758
3,Shai Gilgeous-Alexander,68,35.529412,10.352941,20.308824,0.007500,0.852941,2.470588,0.005074,9.838235,...,4.838235,5.455882,1.647059,0.955882,2.823529,2.823529,31.397059,30.485294,0.028382,0.008529
4,Giannis Antetokounmpo,63,32.126984,11.222222,20.285714,0.008778,0.746032,2.714286,0.004365,7.904762,...,11.777778,5.698413,0.825397,0.809524,3.904762,3.126984,31.095238,32.888889,0.023175,0.003333
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
534,Alondes Williams,1,5.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,1.000000,0.000000,0.000000,0.000000,2.000000,1.000000,0.000000,-1.000000,0.000000,0.000000
535,Deonte Burton,2,3.500000,0.000000,1.000000,0.000000,0.000000,0.500000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,-1.000000,0.000000,0.000000
536,Frank Jackson,1,5.000000,0.000000,3.000000,0.000000,0.000000,1.000000,0.000000,0.000000,...,2.000000,1.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
537,Michael Foster Jr.,1,1.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000


In [8]:
# filter for 'role players' -> define role players as players playing 10<mpg<34 & avging <23 ppg (as per 6moty) 
role_players_2022 = top_1000_avg[(top_1000_avg['mpg'] < 30) & (top_1000_avg['mpg'] > 10) & (top_1000_avg['ppg'] < 23) & (top_1000_avg['gp'] > 15)]
role_players_2022

Unnamed: 0,player,gp,mpg,fgm_pg,fga_pg,fg_pct_pg,fg3m_pg,fg3a_pg,fg3_pct_pg,ftm_pg,...,rpg,apg,spg,bpg,tov_pg,pf_pg,ppg,eff_pg,ast_tov_pg,stl:tov_pg
18,Jordan Poole,82,29.975610,6.707317,15.585366,0.005244,2.609756,7.768293,0.004098,4.402439,...,2.743902,4.500000,0.768293,0.256098,3.073171,2.609756,20.426829,16.085366,0.017805,0.003049
49,Bennedict Mathurin,78,28.487179,5.294872,12.192308,0.005564,1.282051,3.974359,0.004141,4.820513,...,4.076923,1.487179,0.615385,0.166667,1.948718,2.076923,16.692308,13.192308,0.009744,0.004103
64,Immanuel Quickley,81,28.938272,5.172840,11.555556,0.005531,2.074074,5.604938,0.004568,2.506173,...,4.160494,3.444444,0.987654,0.172840,1.234568,2.049383,14.925926,15.518519,0.034444,0.009877
68,Jaren Jackson Jr.,63,28.380952,6.603175,13.047619,0.008032,1.587302,4.476190,0.005635,3.825397,...,6.761905,0.952381,1.031746,3.000000,1.698413,3.603175,18.619048,21.190476,0.008889,0.009683
71,Russell Westbrook,73,29.123288,5.917808,13.575342,0.005973,1.219178,3.917808,0.004260,2.821918,...,5.794521,7.547945,1.041096,0.452055,3.493151,2.219178,15.876712,18.082192,0.029589,0.004110
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
431,Serge Ibaka,16,11.562500,1.625000,3.375000,0.030062,0.375000,1.125000,0.020813,0.500000,...,2.750000,0.250000,0.125000,0.437500,0.687500,1.437500,4.125000,4.937500,0.022500,0.011250
433,Jeff Dowtin Jr.,25,10.360000,1.000000,2.280000,0.017560,0.200000,0.640000,0.012520,0.240000,...,0.920000,1.240000,0.360000,0.120000,0.200000,0.520000,2.440000,3.480000,0.248000,0.072000
440,Trent Forrest,23,12.043478,1.086957,2.608696,0.018130,0.000000,0.086957,0.000000,0.086957,...,1.608696,1.652174,0.304348,0.086957,0.652174,0.739130,2.260870,3.695652,0.110000,0.020435
451,John Butler Jr.,19,11.631579,0.894737,2.789474,0.016895,0.421053,1.842105,0.012053,0.157895,...,0.894737,0.578947,0.368421,0.473684,0.052632,1.157895,2.368421,2.684211,0.578947,0.368421


In [9]:
# reset index
role_players_2022

Unnamed: 0,player,gp,mpg,fgm_pg,fga_pg,fg_pct_pg,fg3m_pg,fg3a_pg,fg3_pct_pg,ftm_pg,...,rpg,apg,spg,bpg,tov_pg,pf_pg,ppg,eff_pg,ast_tov_pg,stl:tov_pg
18,Jordan Poole,82,29.975610,6.707317,15.585366,0.005244,2.609756,7.768293,0.004098,4.402439,...,2.743902,4.500000,0.768293,0.256098,3.073171,2.609756,20.426829,16.085366,0.017805,0.003049
49,Bennedict Mathurin,78,28.487179,5.294872,12.192308,0.005564,1.282051,3.974359,0.004141,4.820513,...,4.076923,1.487179,0.615385,0.166667,1.948718,2.076923,16.692308,13.192308,0.009744,0.004103
64,Immanuel Quickley,81,28.938272,5.172840,11.555556,0.005531,2.074074,5.604938,0.004568,2.506173,...,4.160494,3.444444,0.987654,0.172840,1.234568,2.049383,14.925926,15.518519,0.034444,0.009877
68,Jaren Jackson Jr.,63,28.380952,6.603175,13.047619,0.008032,1.587302,4.476190,0.005635,3.825397,...,6.761905,0.952381,1.031746,3.000000,1.698413,3.603175,18.619048,21.190476,0.008889,0.009683
71,Russell Westbrook,73,29.123288,5.917808,13.575342,0.005973,1.219178,3.917808,0.004260,2.821918,...,5.794521,7.547945,1.041096,0.452055,3.493151,2.219178,15.876712,18.082192,0.029589,0.004110
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
431,Serge Ibaka,16,11.562500,1.625000,3.375000,0.030062,0.375000,1.125000,0.020813,0.500000,...,2.750000,0.250000,0.125000,0.437500,0.687500,1.437500,4.125000,4.937500,0.022500,0.011250
433,Jeff Dowtin Jr.,25,10.360000,1.000000,2.280000,0.017560,0.200000,0.640000,0.012520,0.240000,...,0.920000,1.240000,0.360000,0.120000,0.200000,0.520000,2.440000,3.480000,0.248000,0.072000
440,Trent Forrest,23,12.043478,1.086957,2.608696,0.018130,0.000000,0.086957,0.000000,0.086957,...,1.608696,1.652174,0.304348,0.086957,0.652174,0.739130,2.260870,3.695652,0.110000,0.020435
451,John Butler Jr.,19,11.631579,0.894737,2.789474,0.016895,0.421053,1.842105,0.012053,0.157895,...,0.894737,0.578947,0.368421,0.473684,0.052632,1.157895,2.368421,2.684211,0.578947,0.368421


### Finding optimal K

In [None]:
K=range(2,15)
wss = []

### KNN

In [10]:
kmeans_rp = KMeans(random_state=22, n_clusters=6)
kmeans_fit = kmeans_rp.fit(role_players_2022.loc[:, 'mpg':'stl:tov_pg'])

kmeans_fit_labels = kmeans_fit.labels_ 
kmeans_fit_labels



array([2, 2, 2, 2, 2, 2, 4, 2, 2, 4, 2, 2, 5, 5, 5, 2, 5, 2, 4, 5, 4, 5,
       5, 4, 2, 2, 4, 5, 2, 2, 5, 5, 5, 5, 5, 4, 5, 4, 1, 5, 2, 1, 5, 4,
       1, 5, 5, 5, 5, 4, 5, 5, 1, 1, 5, 5, 1, 1, 1, 1, 5, 1, 4, 5, 1, 5,
       5, 5, 4, 5, 5, 1, 1, 1, 1, 5, 5, 5, 1, 2, 1, 5, 1, 4, 1, 5, 5, 1,
       1, 1, 5, 1, 4, 5, 4, 1, 1, 1, 1, 1, 4, 4, 1, 0, 1, 1, 1, 5, 1, 1,
       1, 0, 1, 1, 5, 1, 1, 1, 0, 0, 1, 1, 1, 4, 2, 0, 0, 1, 1, 1, 0, 1,
       5, 1, 0, 1, 1, 0, 0, 0, 1, 5, 1, 1, 4, 0, 1, 1, 1, 0, 1, 0, 5, 0,
       1, 0, 0, 1, 0, 5, 1, 5, 0, 0, 0, 1, 3, 0, 1, 0, 0, 1, 0, 1, 0, 4,
       0, 0, 1, 1, 0, 0, 0, 0, 3, 3, 3, 1, 0, 0, 3, 0, 1, 0, 1, 0, 0, 1,
       4, 2, 1, 0, 3, 1, 4, 0, 3, 1, 3, 3, 0, 3, 1, 0, 0, 3, 0, 3, 3, 0,
       0, 3, 0, 3, 0, 0, 0, 0, 3, 0, 3, 0, 0, 3, 0, 3, 0, 1, 3, 3, 0, 3,
       0, 3, 0, 0, 3, 5, 3, 3, 0, 0, 3, 0, 0, 3, 3, 0, 0, 3, 3, 0, 3, 3,
       0, 0, 1, 0, 0, 3, 3, 3, 3, 3, 3, 0, 3, 3, 3, 3, 3, 3, 0, 3, 1, 3,
       3, 3, 3, 3, 3, 0, 3, 3, 3, 3, 3, 3, 3, 3, 3]