In [None]:
import pandas as pd
import numpy as np
pd.set_option('display.max_columns', None)
from sklearn.decomposition import PCA
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans
from sklearn import metrics
from sklearn.metrics import silhouette_score

In [2]:
all_data = pd.read_csv('../data/historical_RAPTOR_by_team.csv')

In [6]:
all_data.columns

Index(['player_name', 'player_id', 'season', 'season_type', 'team', 'poss',
       'mp', 'raptor_offense', 'raptor_defense', 'raptor_total', 'war_total',
       'war_reg_season', 'war_playoffs', 'predator_offense',
       'predator_defense', 'predator_total', 'pace_impact'],
      dtype='object')

In [33]:
def split_season(df):
    reg_season = df[df['season_type']=='RS']
    playoffs = df[df['season_type']=='PO']
    
    reg_season = reg_season[reg_season['mp']>300]
    return reg_season, playoffs

In [34]:
reg_season, playoffs = split_season(all_data)

In [35]:
def dropColumns(df):
    stats_only_df = df.drop(columns=['player_name','player_id','season', 'season_type', 'team', 'poss',
       'mp','raptor_total','war_total','war_playoffs','predator_total','predator_offense',
       'predator_defense'])
    return stats_only_df

In [36]:
reg_season_stats_df = dropColumns(reg_season)

In [37]:
reg_season_stats_df

Unnamed: 0,raptor_offense,raptor_defense,war_reg_season,pace_impact
3,-2.480005,-0.123125,0.070322,0.466668
6,-1.845659,-2.038480,-0.669619,0.444865
8,-2.870680,-0.987076,-0.273787,1.665034
11,4.267101,3.075579,15.456216,-0.513828
13,3.631396,3.605263,11.478972,-0.577012
...,...,...,...,...
27364,-2.073401,-0.690914,-0.007877,-0.331493
27366,-3.358178,-3.040589,-1.141160,0.644128
27367,-2.903709,2.688832,0.531968,0.311576
27369,-3.142927,1.774239,0.369725,0.120889


In [38]:
reg_season

Unnamed: 0,player_name,player_id,season,season_type,team,poss,mp,raptor_offense,raptor_defense,raptor_total,war_total,war_reg_season,war_playoffs,predator_offense,predator_defense,predator_total,pace_impact
3,Alaa Abdelnaby,abdelal01,1992,RS,POR,1948,934,-2.480005,-0.123125,-2.603129,0.070322,0.070322,0.0,-2.416858,-0.634025,-3.050883,0.466668
6,Alaa Abdelnaby,abdelal01,1993,RS,BOS,2304,1152,-1.845659,-2.038480,-3.884140,-0.669619,-0.669619,0.0,-1.804417,-1.907295,-3.711713,0.444865
8,Alaa Abdelnaby,abdelal01,1995,RS,SAC,926,476,-2.870680,-0.987076,-3.857756,-0.273787,-0.273787,0.0,-2.839132,-0.608234,-3.447366,1.665034
11,Kareem Abdul-Jabbar,abdulka01,1977,RS,LAL,6654,3016,4.267101,3.075579,7.342679,15.456216,15.456216,0.0,4.482404,2.556956,7.039361,-0.513828
13,Kareem Abdul-Jabbar,abdulka01,1978,RS,LAL,5064,2265,3.631396,3.605263,7.236659,11.478972,11.478972,0.0,4.130288,2.971010,7.101298,-0.577012
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
27364,Ante Zizic,zizican01,2019,RS,CLE,2240,1082,-2.073401,-0.690914,-2.764315,-0.007877,-0.007877,0.0,-2.392563,-2.024149,-4.416713,-0.331493
27366,Ivica Zubac,zubaciv01,2017,RS,LAL,1305,609,-3.358178,-3.040589,-6.398767,-1.141160,-1.141160,0.0,-3.533604,-2.932103,-6.465707,0.644128
27367,Ivica Zubac,zubaciv01,2018,RS,LAL,871,410,-2.903709,2.688832,-0.214877,0.531968,0.531968,0.0,-2.616331,0.752875,-1.863456,0.311576
27369,Ivica Zubac,zubaciv01,2019,RS,LAC,1141,524,-3.142927,1.774239,-1.368688,0.369725,0.369725,0.0,-3.105152,0.994436,-2.110716,0.120889


In [10]:
def scale_df(df):
    standard_scaler = StandardScaler()
    scaled_data = standard_scaler.fit_transform(df)
    return scaled_data

In [39]:
reg_season_scale_df = scale_df(reg_season_stats_df)

In [40]:
reg_season_stats_df

Unnamed: 0,raptor_offense,raptor_defense,war_reg_season,pace_impact
3,-2.480005,-0.123125,0.070322,0.466668
6,-1.845659,-2.038480,-0.669619,0.444865
8,-2.870680,-0.987076,-0.273787,1.665034
11,4.267101,3.075579,15.456216,-0.513828
13,3.631396,3.605263,11.478972,-0.577012
...,...,...,...,...
27364,-2.073401,-0.690914,-0.007877,-0.331493
27366,-3.358178,-3.040589,-1.141160,0.644128
27367,-2.903709,2.688832,0.531968,0.311576
27369,-3.142927,1.774239,0.369725,0.120889


In [41]:
reg_season_scale_df

array([[-0.79931692,  0.03245956, -0.65679582,  0.68066241],
       [-0.52718347, -1.20464183, -0.88395258,  0.64044233],
       [-0.96691614, -0.5255545 , -0.7624349 ,  2.89130461],
       ...,
       [-0.98108591,  1.84866307, -0.51507363,  0.39456204],
       [-1.08370982,  1.25793994, -0.56488115,  0.04279832],
       [-0.35769147,  1.1015225 , -0.449591  , -0.21969283]])

In [42]:
silhouette = []

for n_clusters in range(2, 21):
    kmeans = KMeans(n_clusters = n_clusters, random_state = 99)
    cluster_labels = kmeans.fit_predict(reg_season_scale_df)
    
    centers = kmeans.cluster_centers_

    score = silhouette_score(reg_season_scale_df, cluster_labels)
    silhouette.append(score)
    
    print("For n_clusters = {}, silhouette score is {}".format(n_clusters, score))

For n_clusters = 2, silhouette score is 0.3133639957428435
For n_clusters = 3, silhouette score is 0.22576861117515062
For n_clusters = 4, silhouette score is 0.2341727139950163
For n_clusters = 5, silhouette score is 0.22582238765363372
For n_clusters = 6, silhouette score is 0.2229203449658741
For n_clusters = 7, silhouette score is 0.22055913725811863
For n_clusters = 8, silhouette score is 0.20866373001025657
For n_clusters = 9, silhouette score is 0.2098903286977371
For n_clusters = 10, silhouette score is 0.20709858784245788
For n_clusters = 11, silhouette score is 0.2089558191637835
For n_clusters = 12, silhouette score is 0.20678662177707446
For n_clusters = 13, silhouette score is 0.20965212534611433
For n_clusters = 14, silhouette score is 0.21163965379436242
For n_clusters = 15, silhouette score is 0.2036088803440211
For n_clusters = 16, silhouette score is 0.2012999164158483
For n_clusters = 17, silhouette score is 0.20371909335138447
For n_clusters = 18, silhouette score i

In [43]:
silhouette_diff = []

for i in range(1, len(silhouette)):
    improvement = 1 - ((1 - silhouette[i]) / (1 - silhouette[i - 1]))
    silhouette_diff.append(improvement)
    print("For n_cluster = {}, percent improvement = {}".format(i + 2, improvement))

For n_cluster = 3, percent improvement = -0.12757179062064883
For n_cluster = 4, percent improvement = 0.010854768924599578
For n_cluster = 5, percent improvement = -0.01090366782952179
For n_cluster = 6, percent improvement = -0.0037485489653519277
For n_cluster = 7, percent improvement = -0.0030385658567420037
For n_cluster = 8, percent improvement = -0.01526146217946156
For n_cluster = 9, percent improvement = 0.001550034712166748
For n_cluster = 10, percent improvement = -0.0035333586673833572
For n_cluster = 11, percent improvement = 0.0023423231348169704
For n_cluster = 12, percent improvement = -0.002742194986398827
For n_cluster = 13, percent improvement = 0.003612525516727527
For n_cluster = 14, percent improvement = 0.0025147514303350826
For n_cluster = 15, percent improvement = -0.010186678577877917
For n_cluster = 16, percent improvement = -0.002899283870932967
For n_cluster = 17, percent improvement = 0.003028892803767058
For n_cluster = 18, percent improvement = -0.003334

In [44]:
kmeans = KMeans(n_clusters = 6)

kmeans.fit(reg_season_scale_df)

y_kmeans = kmeans.predict(reg_season_scale_df)
kmeans.fit(reg_season_scale_df)

y_kmeans = kmeans.predict(reg_season_scale_df)

In [45]:
reg_season['cluster'] = y_kmeans

In [46]:
reg_season

Unnamed: 0,player_name,player_id,season,season_type,team,poss,mp,raptor_offense,raptor_defense,raptor_total,war_total,war_reg_season,war_playoffs,predator_offense,predator_defense,predator_total,pace_impact,cluster
3,Alaa Abdelnaby,abdelal01,1992,RS,POR,1948,934,-2.480005,-0.123125,-2.603129,0.070322,0.070322,0.0,-2.416858,-0.634025,-3.050883,0.466668,0
6,Alaa Abdelnaby,abdelal01,1993,RS,BOS,2304,1152,-1.845659,-2.038480,-3.884140,-0.669619,-0.669619,0.0,-1.804417,-1.907295,-3.711713,0.444865,0
8,Alaa Abdelnaby,abdelal01,1995,RS,SAC,926,476,-2.870680,-0.987076,-3.857756,-0.273787,-0.273787,0.0,-2.839132,-0.608234,-3.447366,1.665034,5
11,Kareem Abdul-Jabbar,abdulka01,1977,RS,LAL,6654,3016,4.267101,3.075579,7.342679,15.456216,15.456216,0.0,4.482404,2.556956,7.039361,-0.513828,1
13,Kareem Abdul-Jabbar,abdulka01,1978,RS,LAL,5064,2265,3.631396,3.605263,7.236659,11.478972,11.478972,0.0,4.130288,2.971010,7.101298,-0.577012,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
27364,Ante Zizic,zizican01,2019,RS,CLE,2240,1082,-2.073401,-0.690914,-2.764315,-0.007877,-0.007877,0.0,-2.392563,-2.024149,-4.416713,-0.331493,4
27366,Ivica Zubac,zubaciv01,2017,RS,LAL,1305,609,-3.358178,-3.040589,-6.398767,-1.141160,-1.141160,0.0,-3.533604,-2.932103,-6.465707,0.644128,0
27367,Ivica Zubac,zubaciv01,2018,RS,LAL,871,410,-2.903709,2.688832,-0.214877,0.531968,0.531968,0.0,-2.616331,0.752875,-1.863456,0.311576,3
27369,Ivica Zubac,zubaciv01,2019,RS,LAC,1141,524,-3.142927,1.774239,-1.368688,0.369725,0.369725,0.0,-3.105152,0.994436,-2.110716,0.120889,3


In [79]:
x = reg_season[reg_season['cluster']==0]
x[x['season']==2019].sort_values(by = "raptor_offense",ascending =False)

Unnamed: 0,player_name,player_id,season,season_type,team,poss,mp,raptor_offense,raptor_defense,raptor_total,war_total,war_reg_season,war_playoffs,predator_offense,predator_defense,predator_total,pace_impact,cluster
23159,Lance Stephenson,stephla01,2019,RS,LAL,2490,1123,-1.110667,-2.107931,-3.218598,-0.270195,-0.270195,0.0,-1.695466,-2.559323,-4.254789,0.648699,0
13254,DeAndre Jordan,jordade01,2019,RS,NYK,1023,493,-1.253551,-1.824208,-3.077759,-0.082933,-0.082933,0.0,-1.876531,-2.087659,-3.96419,0.60861,0
23738,Garrett Temple,templga01,2019,RS,LAC,1139,510,-1.77808,0.333815,-1.444264,0.341136,0.341136,0.0,-1.544739,1.147244,-0.397495,0.414266,0
6362,Cheick Diallo,diallch01,2019,RS,NOP,2001,896,-1.8694,-0.468058,-2.337458,0.189074,0.189074,0.0,-1.21891,-0.751869,-1.970779,0.262004,0
19885,Bobby Portis,portibo01,2019,RS,WAS,1645,768,-1.869837,-2.509392,-4.379229,-0.640922,-0.640922,0.0,-1.31536,-1.503802,-2.819162,0.404715,0
11857,Frank Jackson,jacksfr01,2019,RS,NOP,2616,1169,-1.946424,-2.75981,-4.706234,-1.175333,-1.175333,0.0,-1.85239,-3.027728,-4.880118,0.750137,0
6363,Hamidou Diallo,diallha01,2019,RS,OKC,1141,526,-2.005642,-1.622405,-3.628048,-0.236464,-0.236464,0.0,-3.463481,-0.842753,-4.306234,0.357688,0
15756,Frank Mason,masonfr01,2019,RS,SAC,975,435,-2.125625,-3.616832,-5.742456,-0.669249,-0.669249,0.0,-1.778299,-3.95516,-5.733459,0.784441,0
9472,JaMychal Green,greenja01,2019,RS,MEM,1849,900,-2.131357,-1.12187,-3.253227,-0.232398,-0.232398,0.0,-2.020236,-0.409866,-2.430102,0.584849,0
4715,Ian Clark,clarkia01,2019,RS,NOP,2152,973,-2.23551,-3.038161,-5.273672,-1.25791,-1.25791,0.0,-2.246745,-3.302577,-5.549321,0.414659,0


In [81]:
pd.set_option('display.max_rows', 500)
x = reg_season[reg_season['cluster']==1]
x[x['season']==2019].sort_values(by = "raptor_total",ascending =False)

Unnamed: 0,player_name,player_id,season,season_type,team,poss,mp,raptor_offense,raptor_defense,raptor_total,war_total,war_reg_season,war_playoffs,predator_offense,predator_defense,predator_total,pace_impact,cluster
10012,James Harden,hardeja01,2019,RS,HOU,5857,2867,9.815956,1.121363,10.93732,20.057946,20.057946,0.0,10.864945,1.47762,12.342565,0.187883,1
8719,Paul George,georgpa01,2019,RS,OKC,6176,2841,5.316363,3.787824,9.104186,17.381799,17.381799,0.0,5.347247,4.519897,9.867143,1.18278,1
5621,Stephen Curry,curryst01,2019,RS,GSW,5059,2331,7.48267,0.672822,8.155492,13.263794,13.263794,0.0,8.48609,0.864722,9.350812,2.311394,1
12902,Nikola Jokic,jokicni01,2019,RS,DEN,5133,2504,5.642959,1.762307,7.405266,13.031302,13.031302,0.0,6.05608,2.421618,8.477699,0.451961,1
5843,Anthony Davis,davisan02,2019,RS,NOP,3998,1850,4.105122,3.275085,7.380207,9.609615,9.609615,0.0,4.307411,3.020674,7.328085,0.511793,1
18249,Jusuf Nurkic,nurkiju01,2019,RS,POR,4177,1974,2.061289,5.178597,7.239886,10.121163,10.121163,0.0,2.07262,4.527041,6.599661,0.607768,1
767,Giannis Antetokounmpo,antetgi01,2019,RS,MIL,5171,2358,4.184342,2.567988,6.75233,11.572533,11.572533,0.0,4.257098,2.49931,6.756409,1.254684,1
11740,Kyrie Irving,irvinky01,2019,RS,BOS,4705,2214,5.529101,1.084506,6.613607,10.706792,10.706792,0.0,6.118081,1.586238,7.70432,1.250887,1
14716,Damian Lillard,lillada01,2019,RS,POR,5982,2838,6.778516,-0.381945,6.396571,13.340532,13.340532,0.0,6.937646,-0.156818,6.780827,0.744721,1
4988,Mike Conley,conlemi01,2019,RS,MEM,4787,2342,4.605585,1.701337,6.306922,10.755639,10.755639,0.0,3.512719,2.155744,5.668463,-0.62504,1


In [70]:
x = reg_season[reg_season['cluster']==2]
x[x['season']==2019].sort_values(by = "raptor_offense",ascending =False)

Unnamed: 0,player_name,player_id,season,season_type,team,poss,mp,raptor_offense,raptor_defense,raptor_total,war_total,war_reg_season,war_playoffs,predator_offense,predator_defense,predator_total,pace_impact,cluster
19847,Otto Porter Jr.,porteot01,2019,RS,CHI,1044,492,3.909218,0.442016,4.351234,1.788693,1.788693,0.0,2.419034,0.297887,2.716921,0.35177,2
4917,John Collins,collijo01,2019,RS,ATL,4001,1829,3.248466,-0.516129,2.732337,5.127654,5.127654,0.0,3.462149,-1.044357,2.417792,0.234645,2
16086,CJ McCollum,mccolcj01,2019,RS,POR,5008,2375,3.006827,-0.073844,2.932982,6.872914,6.872914,0.0,2.968283,0.20258,3.170863,-0.196802,2
17972,Raul Neto,netora01,2019,RS,UTA,1032,474,2.888312,0.075572,2.963884,1.382914,1.382914,0.0,1.828303,0.27412,2.102423,0.080975,2
21276,Derrick Rose,rosede01,2019,RS,MIN,3005,1392,2.738555,-1.233261,1.505295,3.014726,3.014726,0.0,1.38432,-1.530388,-0.146068,-0.24879,2
3078,Malcolm Brogdon,brogdma01,2019,RS,MIL,4005,1832,2.533459,0.913138,3.446597,5.770711,5.770711,0.0,2.148576,0.936823,3.085398,-0.372404,2
3625,Trey Burke,burketr01,2019,RS,DAL,914,436,2.452149,-1.834642,0.617507,0.742566,0.742566,0.0,2.085272,-1.745233,0.340039,-0.888005,2
3738,Jimmy Butler,butleji01,2019,RS,MIN,768,361,2.399658,0.664477,3.064135,1.06694,1.06694,0.0,2.977276,1.806215,4.783491,-0.373085,2
2048,Davis Bertans,bertada01,2019,RS,SAS,3458,1632,2.273988,1.216634,3.490623,5.208802,5.208802,0.0,1.988265,0.644357,2.632622,0.24653,2
19935,Dwight Powell,poweldw01,2019,RS,DAL,3495,1662,2.233488,-0.370409,1.863079,3.913731,3.913731,0.0,1.834385,0.056685,1.89107,0.053667,2


In [73]:
x = reg_season[reg_season['cluster']==3]
x[x['season']==2019].sort_values(by = "raptor_offense",ascending =False)

Unnamed: 0,player_name,player_id,season,season_type,team,poss,mp,raptor_offense,raptor_defense,raptor_total,war_total,war_reg_season,war_playoffs,predator_offense,predator_defense,predator_total,pace_impact,cluster
16999,Nikola Mirotic,mirotni01,2019,RS,MIL,710,320,1.68503,1.702021,3.387051,1.00634,1.00634,0.0,2.645808,1.35151,3.997318,0.445546,3
4285,Alex Caruso,carusal01,2019,RS,LAL,1135,531,1.649427,3.495627,5.145054,2.151722,2.151722,0.0,0.750971,2.594953,3.345923,0.61105,3
18413,Victor Oladipo,oladivi01,2019,RS,IND,2348,1147,1.281615,2.471571,3.753186,3.830315,3.830315,0.0,1.399072,3.088803,4.487876,0.660227,3
27160,Delon Wright,wrighde01,2019,RS,MEM,1691,802,0.986496,2.342682,3.329177,2.468851,2.468851,0.0,-0.365843,3.000755,2.634912,-0.763256,3
27325,Cody Zeller,zelleco01,2019,RS,CHA,2606,1243,0.464302,0.551004,1.015305,2.395152,2.395152,0.0,0.277617,0.640045,0.917661,0.310521,3
14917,Kevon Looney,looneke01,2019,RS,GSW,3186,1481,0.409195,2.719189,3.128384,4.405895,4.405895,0.0,0.008721,2.732276,2.740997,-0.822525,3
7296,Wayne Ellington,ellinwa01,2019,RS,DET,1555,764,0.406288,0.875259,1.281547,1.572004,1.572004,0.0,0.917121,0.579669,1.49679,0.034758,3
14591,Caris LeVert,leverca01,2019,RS,BRK,2259,1063,0.387778,1.609149,1.996928,2.575266,2.575266,0.0,0.075562,1.178927,1.254489,0.031862,3
18254,David Nwaba,nwabada01,2019,RS,CLE,2019,984,0.341767,1.20201,1.543777,2.141243,2.141243,0.0,-0.400268,0.770245,0.369977,-0.68039,3
25885,Derrick White,whitede01,2019,RS,SAS,3555,1728,0.308425,2.98297,3.291396,5.291939,5.291939,0.0,0.15065,3.083011,3.233661,-0.656491,3


In [76]:
x = reg_season[reg_season['cluster']==4]
x[x['season']==2019].sort_values(by = "raptor_offense",ascending =False)

Unnamed: 0,player_name,player_id,season,season_type,team,poss,mp,raptor_offense,raptor_defense,raptor_total,war_total,war_reg_season,war_playoffs,predator_offense,predator_defense,predator_total,pace_impact,cluster
22087,Landry Shamet,shamela01,2019,RS,LAC,1499,694,1.294003,-1.160547,0.133456,1.023231,1.023231,0.0,0.768923,-0.8982,-0.129277,0.225664,4
11992,Reggie Jackson,jacksre01,2019,RS,DET,4710,2289,1.256652,-2.190004,-0.933352,2.107069,2.107069,0.0,0.91413,-1.705114,-0.790984,-0.696543,4
6600,Tyler Dorsey,dorsety01,2019,RS,MEM,948,447,0.917079,-3.993364,-3.076285,-0.074239,-0.074239,0.0,-0.810847,-2.997574,-3.808421,-0.237716,4
25411,T.J. Warren,warretj01,2019,RS,PHO,2886,1360,0.894261,-1.211226,-0.316965,1.684935,1.684935,0.0,0.686257,-0.906602,-0.220345,-0.197989,4
16536,Jordan McRae,mcraejo01,2019,RS,WAS,742,333,0.875,-0.638692,0.236308,0.506183,0.506183,0.0,0.112968,-1.390026,-1.277059,-0.237115,4
22088,Landry Shamet,shamela01,2019,RS,PHI,2391,1108,0.753803,-0.89734,-0.143537,1.469884,1.469884,0.0,-0.009441,-0.733755,-0.743196,-0.24579,4
6275,Matthew Dellavedova,dellama01,2019,RS,CLE,1480,715,0.693742,-1.545364,-0.851621,0.694061,0.694061,0.0,0.148976,-2.472317,-2.323341,0.227439,4
20239,Julius Randle,randlju01,2019,RS,NOP,4858,2232,0.599,-2.019237,-1.420237,1.51834,1.51834,0.0,0.222671,-2.183656,-1.960985,0.272567,4
10878,George Hill,hillge01,2019,RS,CLE,689,344,0.574152,-2.584703,-2.01055,0.128519,0.128519,0.0,0.189951,-1.701643,-1.511692,-0.989946,4
910,Trevor Ariza,arizatr01,2019,RS,WAS,3122,1465,0.512549,-1.910558,-1.398009,1.014975,1.014975,0.0,0.663968,-1.607899,-0.943931,0.447631,4


In [77]:
x = reg_season[reg_season['cluster']==5]
x[x['season']==2019].sort_values(by = "raptor_offense",ascending =False)

Unnamed: 0,player_name,player_id,season,season_type,team,poss,mp,raptor_offense,raptor_defense,raptor_total,war_total,war_reg_season,war_playoffs,predator_offense,predator_defense,predator_total,pace_impact,cluster
26594,Lou Williams,willilo02,2019,RS,LAC,4388,1993,5.114451,-3.697212,1.41724,4.304897,4.304897,0.0,4.042942,-4.405614,-0.362673,1.624138,5
2549,Devin Booker,bookede01,2019,RS,PHO,4772,2242,3.582047,-2.997283,0.584764,3.865201,3.865201,0.0,3.075763,-2.860811,0.214952,1.353761,5
6568,Luka Doncic,doncilu01,2019,RS,DAL,4877,2318,3.095994,-0.869823,2.226171,5.93936,5.93936,0.0,3.268718,-0.52967,2.739048,0.940743,5
21533,D'Angelo Russell,russeda01,2019,RS,BRK,5222,2448,2.735094,-0.576171,2.158923,6.180959,6.180959,0.0,3.173506,-0.619807,2.553699,0.828866,5
25754,Russell Westbrook,westbru01,2019,RS,OKC,5749,2630,2.702463,-0.326028,2.376435,7.063565,7.063565,0.0,3.331565,0.549353,3.880918,2.737644,5
27316,Trae Young,youngtr01,2019,RS,ATL,5516,2503,2.396499,-4.517486,-2.120987,0.817245,0.817245,0.0,2.795597,-4.777071,-1.981475,1.773287,5
10765,Buddy Hield,hieldbu01,2019,RS,SAC,5717,2615,2.172344,-0.811709,1.360635,5.524852,5.524852,0.0,2.321566,-0.540975,1.780592,0.753613,5
20421,JJ Redick,redicjj01,2019,RS,PHI,5211,2379,1.914443,-0.460058,1.454384,5.149106,5.149106,0.0,1.968375,-1.127337,0.841038,0.918013,5
15548,Boban Marjanovic,marjabo01,2019,RS,PHI,653,305,1.86512,0.526359,2.391479,0.804147,0.804147,0.0,1.062652,0.093238,1.15589,0.519302,5
14974,Kevin Love,loveke01,2019,RS,CLE,1249,598,1.856374,1.521788,3.378162,1.887343,1.887343,0.0,2.121479,0.181299,2.302778,0.961721,5


Unnamed: 0,player_name,player_id,season,season_type,team,poss,mp,raptor_offense,raptor_defense,raptor_total,war_total,war_reg_season,war_playoffs,predator_offense,predator_defense,predator_total,pace_impact,cluster


In [None]:
kmeans = KMeans(n_clusters = 12)

kmeans.fit(scale_df)

y_kmeans = kmeans.predict(scale_df)
kmeans.fit(scale_df)

y_kmeans = kmeans.predict(scale_df)

In [35]:
all_data['cluster'] = y_kmeans

In [100]:
cluster_0 = all_data[all_data['cluster']==1]

In [46]:
cluster_0[cluster_0['season']==2019]

Unnamed: 0,player_name,player_id,season,season_type,team,poss,mp,raptor_offense,raptor_defense,raptor_total,war_total,war_reg_season,war_playoffs,predator_offense,predator_defense,predator_total,pace_impact,cluster
132,Jaylen Adams,adamsja01,2019,RS,ATL,952,428,-2.377644,-3.734729,-6.112373,-0.733513,-0.733513,0.0,-2.023145,-4.304853,-6.327998,-0.099018,0
269,DeVaughn Akoon-Purcell,akoonde01,2019,RS,DEN,49,22,-2.229499,-1.01984,-3.249339,-0.005556,-0.005556,0.0,-1.262366,0.66152,-0.600846,-0.894708,0
353,Jarrett Allen,allenja01,2019,RS,BRK,4478,2096,-1.523007,-2.805412,-4.328419,-1.688439,-1.688439,0.0,-1.515219,-1.770966,-3.286186,0.030829,0
358,Kadeem Allen,allenka01,2019,RS,NYK,870,416,2.078138,-1.023795,1.054343,0.801391,0.801391,0.0,1.129783,-1.333298,-0.203515,-0.764299,0
499,Al-Farouq Aminu,aminual01,2019,PO,POR,823,399,-0.879552,-0.971744,-1.851295,0.1872,0.0,0.1872,-1.023039,-0.77789,-1.800928,-0.783185,0
712,Ryan Anderson,anderry01,2019,RS,PHO,570,278,-3.270465,-1.670249,-4.940714,-0.309508,-0.309508,0.0,-2.183461,-2.721894,-4.905355,-0.398038,0
797,Carmelo Anthony,anthoca01,2019,RS,HOU,612,294,-2.506433,-2.628394,-5.134827,-0.357081,-0.357081,0.0,-1.131556,-2.09568,-3.227237,-0.18251,0
863,Ryan Arcidiacono,arcidry01,2019,RS,CHI,4077,1961,-0.091056,-0.403091,-0.494146,2.22707,2.22707,0.0,-1.028347,-0.31001,-1.338357,-1.350893,0
909,Trevor Ariza,arizatr01,2019,RS,PHO,1856,884,-1.445941,-2.092738,-3.538678,-0.355141,-0.355141,0.0,-1.016009,-1.24049,-2.256499,-0.162205,0
910,Trevor Ariza,arizatr01,2019,RS,WAS,3122,1465,0.512549,-1.910558,-1.398009,1.014975,1.014975,0.0,0.663968,-1.607899,-0.943931,0.447631,0


In [99]:
cluster_1 = all_data[all_data['cluster']==8]
cluster_1[cluster_1['season']==2019]

Unnamed: 0,player_name,player_id,season,season_type,team,poss,mp,raptor_offense,raptor_defense,raptor_total,war_total,war_reg_season,war_playoffs,predator_offense,predator_defense,predator_total,pace_impact,cluster
6074,Tyler Davis,davisty01,2019,RS,OKC,2,1,-40.825921,61.113882,20.287961,0.011423,0.011423,0.0,-26.935305,42.989063,16.053758,-2.873834,8
24683,Tyler Ulis,ulisty01,2019,RS,CHI,2,1,-16.801473,62.469205,45.667732,0.023868,0.023868,0.0,-15.001847,41.704602,26.702755,-3.443889,8


In [50]:
cluster_3 = all_data[all_data['cluster']==3]
cluster_3[cluster_3['season']==2019].head(50)

Unnamed: 0,player_name,player_id,season,season_type,team,poss,mp,raptor_offense,raptor_defense,raptor_total,war_total,war_reg_season,war_playoffs,predator_offense,predator_defense,predator_total,pace_impact,cluster
349,Grayson Allen,allengr01,2019,PO,UTA,32,14,-9.8357,-5.824538,-15.660238,-0.096257,0.0,-0.096257,-8.832492,-8.682236,-17.514728,1.202344,3
350,Grayson Allen,allengr01,2019,RS,UTA,899,416,-4.608434,-5.53922,-10.147654,-1.583498,-1.583498,0.0,-3.970137,-4.558908,-8.529045,0.869532,3
352,Jarrett Allen,allenja01,2019,PO,BRK,245,110,-1.208032,-7.931457,-9.139489,-0.378198,0.0,-0.378198,-3.278979,-7.242624,-10.521602,2.247688,3
713,Ryan Anderson,anderry01,2019,RS,MIA,90,44,-3.705832,-2.712141,-6.417973,-0.081488,-0.081488,0.0,-3.74913,-4.981757,-8.730887,-1.056914,3
1282,Ron Baker,bakerro01,2019,RS,WAS,98,45,-10.675821,-0.86468,-11.540501,-0.201974,-0.201974,0.0,-9.408844,-3.288406,-12.69725,0.07724,3
1307,Wade Baldwin IV,baldwwa01,2019,RS,POR,199,94,-11.48865,-5.027375,-16.516024,-0.667983,-0.667983,0.0,-10.653276,-5.076885,-15.730161,1.201191,3
1782,Jerryd Bayless,bayleje01,2019,RS,MIN,1365,657,-1.929942,-6.130163,-8.060105,-1.75416,-1.75416,0.0,-1.306543,-5.091015,-6.397558,-1.4769,3
2049,Dairis Bertans,bertada02,2019,RS,NOP,368,167,-5.792376,-5.219272,-11.011647,-0.699774,-0.699774,0.0,-4.934068,-5.509983,-10.444051,-0.600327,3
2289,Antonio Blakeney,blakean01,2019,RS,CHI,1750,829,-3.05917,-4.042692,-7.101862,-1.829112,-1.829112,0.0,-3.451717,-5.196974,-8.648692,-0.638573,3
2486,Jonah Bolden,boldejo01,2019,PO,PHI,175,79,-7.768725,-5.571062,-13.339787,-0.448217,0.0,-0.448217,-5.493739,-5.852479,-11.346218,1.807021,3


In [61]:
season_2019 = all_data[all_data['season']==2019]

In [66]:
cluster_lst_2019 = [10,  0,  2,  9,  3,  7,  4, 11,  5,  1,  8,  6]
cluster_lst_2019.sort()

In [69]:
cluster_dfs = []
for i in cluster_lst_2019:
    cluster cluster_dfs.append(season_2019[season_2019['cluster']==i])

In [101]:
pd.set_option('display.max_rows', 100)
cluster_dfs[0][:100]

Unnamed: 0,player_name,player_id,season,season_type,team,poss,mp,raptor_offense,raptor_defense,raptor_total,war_total,war_reg_season,war_playoffs,predator_offense,predator_defense,predator_total,pace_impact,cluster
132,Jaylen Adams,adamsja01,2019,RS,ATL,952,428,-2.377644,-3.734729,-6.112373,-0.733513,-0.733513,0.0,-2.023145,-4.304853,-6.327998,-0.099018,0
269,DeVaughn Akoon-Purcell,akoonde01,2019,RS,DEN,49,22,-2.229499,-1.01984,-3.249339,-0.005556,-0.005556,0.0,-1.262366,0.66152,-0.600846,-0.894708,0
353,Jarrett Allen,allenja01,2019,RS,BRK,4478,2096,-1.523007,-2.805412,-4.328419,-1.688439,-1.688439,0.0,-1.515219,-1.770966,-3.286186,0.030829,0
358,Kadeem Allen,allenka01,2019,RS,NYK,870,416,2.078138,-1.023795,1.054343,0.801391,0.801391,0.0,1.129783,-1.333298,-0.203515,-0.764299,0
499,Al-Farouq Aminu,aminual01,2019,PO,POR,823,399,-0.879552,-0.971744,-1.851295,0.1872,0.0,0.1872,-1.023039,-0.77789,-1.800928,-0.783185,0
712,Ryan Anderson,anderry01,2019,RS,PHO,570,278,-3.270465,-1.670249,-4.940714,-0.309508,-0.309508,0.0,-2.183461,-2.721894,-4.905355,-0.398038,0
797,Carmelo Anthony,anthoca01,2019,RS,HOU,612,294,-2.506433,-2.628394,-5.134827,-0.357081,-0.357081,0.0,-1.131556,-2.09568,-3.227237,-0.18251,0
863,Ryan Arcidiacono,arcidry01,2019,RS,CHI,4077,1961,-0.091056,-0.403091,-0.494146,2.22707,2.22707,0.0,-1.028347,-0.31001,-1.338357,-1.350893,0
909,Trevor Ariza,arizatr01,2019,RS,PHO,1856,884,-1.445941,-2.092738,-3.538678,-0.355141,-0.355141,0.0,-1.016009,-1.24049,-2.256499,-0.162205,0
910,Trevor Ariza,arizatr01,2019,RS,WAS,3122,1465,0.512549,-1.910558,-1.398009,1.014975,1.014975,0.0,0.663968,-1.607899,-0.943931,0.447631,0
