In [5]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans
import hvplot.pandas
import plotly.express as px

In [8]:
# import csv 

great_data = pd.read_csv("great_advanced_players.csv")
great_player_df = great_data.set_index('Unnamed: 0')
great_player_df.index.name = None
great_player_df.head()

Unnamed: 0,Player,Season,class,G,MP,PER,BPM,VORP
15,Giannis Antetokounmpo,2016-17,4.0,80.0,35.6,26.1,7.3,6.7
35,Bradley Beal,2016-17,4.0,77.0,34.9,20.1,3.3,3.5
48,Eric Bledsoe,2016-17,4.0,66.0,33.0,20.5,3.6,3.1
65,Jimmy Butler,2016-17,4.0,76.0,37.0,25.1,7.3,6.6
86,Mike Conley,2016-17,4.0,69.0,33.2,23.2,6.8,5.1


In [10]:
# create great columns df for the machine learning model


great = [great_player_df["G"],great_player_df["MP"], great_player_df["PER"],great_player_df["BPM"],great_player_df["VORP"]]
great_df = pd.concat(great, axis=1)
great_df.head()

Unnamed: 0,G,MP,PER,BPM,VORP
15,80.0,35.6,26.1,7.3,6.7
35,77.0,34.9,20.1,3.3,3.5
48,66.0,33.0,20.5,3.6,3.1
65,76.0,37.0,25.1,7.3,6.6
86,69.0,33.2,23.2,6.8,5.1


In [11]:
# scale the great df

great_scaled = StandardScaler().fit_transform(great_df)
print(great_scaled[0:5])

[[ 1.0552057   0.8091452   0.83070905  0.87086442  1.6426105 ]
 [ 0.82725969  0.55691364 -1.06389054 -0.9399764  -0.37538986]
 [-0.00854236 -0.12771488 -0.9375839  -0.80416333 -0.6276399 ]
 [ 0.75127769  1.31360831  0.51494245  0.87086442  1.57954799]
 [ 0.21940366 -0.05564872 -0.08501408  0.64450932  0.63361032]]


In [12]:
# Initialize PCA model
pca = PCA(n_components=3)

In [13]:
# Get 3 principal components for the data.
great_pca = pca.fit_transform(great_scaled)

In [14]:
# create PCA df
df_great_pca = pd.DataFrame(
    data = great_pca, columns = ["principal component 1", "principal component 2", "principal component 3"], index=great_player_df.index)
df_great_pca.head()

Unnamed: 0,principal component 1,principal component 2,principal component 3
15,2.21333,-0.981045,0.20748
35,-1.064168,-1.411882,-0.059114
48,-1.338686,-0.343116,-0.020048
65,2.079154,-1.123076,-0.396937
86,0.705463,-0.096314,0.243921


In [15]:
# Find the best value for K
inertia = []
k = list(range(1, 11))

# Calculate the inertia for the range of K values
for i in k:
    km = KMeans(n_clusters=i, random_state=0)
    km.fit(df_great_pca)
    inertia.append(km.inertia_)

# Create the elbow curve
elbow_data = {"k": k, "inertia": inertia}
df_elbow = pd.DataFrame(elbow_data)
df_elbow.hvplot.line(x="k", y="inertia", xticks=k, title="Elbow Curve")

In [16]:
# make "Player" and "Year" its own df

player = [great_player_df["Player"],great_player_df["Season"]]
player_name = pd.concat(player, axis=1)
player_name.head()

Unnamed: 0,Player,Season
15,Giannis Antetokounmpo,2016-17
35,Bradley Beal,2016-17
48,Eric Bledsoe,2016-17
65,Jimmy Butler,2016-17
86,Mike Conley,2016-17


In [17]:
# Elbow curve is 2 so we will have two clusters

# Initialize the K-means model
model = KMeans(n_clusters=2, random_state=0)

# Fit the model
model.fit(df_great_pca)

# Predict clusters
predictions = model.predict(df_great_pca)

# Add the predicted class columns
player_name["class"] = model.labels_
player_name.head()

Unnamed: 0,Player,Season,class
15,Giannis Antetokounmpo,2016-17,0
35,Bradley Beal,2016-17,1
48,Eric Bledsoe,2016-17,1
65,Jimmy Butler,2016-17,0
86,Mike Conley,2016-17,0


In [18]:
#sort by class

sorted_class = player_name.sort_values(["class"])
class_one = sorted_class[(sorted_class["class"] == 0)]
class_two = sorted_class[(sorted_class["class"] == 1)]


In [24]:
# class 1 df

df_one = pd.concat([class_one, great_df], axis=1)
top_great = pd.DataFrame(index=great_player_df.index)
top_great = df_one.dropna()
top_great.count()
top_great.to_csv('elite_advanced_players.csv')

In [25]:
# class 2 df

df_two = pd.concat([class_two, great_df], axis=1)
bottom_great = pd.DataFrame(index=great_player_df.index)
bottom_great = df_two.dropna()
bottom_great.count()
bottom_great.to_csv('great_advanced_players.csv')

In [26]:
top_great.head(50)

Unnamed: 0,Player,Season,class,G,MP,PER,BPM,VORP
15,Giannis Antetokounmpo,2016-17,0.0,80.0,35.6,26.1,7.3,6.7
65,Jimmy Butler,2016-17,0.0,76.0,37.0,25.1,7.3,6.6
86,Mike Conley,2016-17,0.0,69.0,33.2,23.2,6.8,5.1
89,DeMarcus Cousins,2016-17,0.0,72.0,34.2,25.8,6.8,5.5
97,Stephen Curry,2016-17,0.0,79.0,33.4,24.6,6.9,5.9
99,Anthony Davis,2016-17,0.0,75.0,36.1,27.5,5.9,5.4
118,Kevin Durant,2016-17,0.0,62.0,33.4,27.6,8.9,5.7
172,James Harden,2016-17,0.0,81.0,36.4,27.4,8.7,8.0
220,LeBron James,2016-17,0.0,74.0,37.8,27.0,7.6,6.7
233,Nikola Jokić,2016-17,0.0,73.0,27.9,26.3,7.3,4.8
