In [None]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans
import hvplot.pandas
import plotly.express as px

In [None]:
#load the prepocessed data
file_path = "./nba_stats_clean.csv"
stats_df = pd.read_csv(file_path)
stats_df.head()

In [None]:
# Standardize data
stats_scaled = StandardScaler().fit_transform(stats_df)
print(stats_df[0:5])

In [None]:
# Initialize PCA model
pca = PCA(n_components=2)

In [None]:
# get 2 prinicipal components 
stats_pca = pca.fit_transform(stats_scaled)

In [None]:
# turn the PCA data into a DF
stats_pca_df = pd.DataFrame(
    data=stats_pca, columns=["principal component 1", "principal component 2"])
stats_pca_df.head()

In [None]:
# find value of K
inertia = []
k = list(range(1,11))

#calculate the inertia for the range of K value
for i in k:
    km = KMeans(n_clusters=i, random_state=0)
    km.fit(stats_pca_df)
    inertia.append(km.inertia_)

# create the elbow curve
elbow_data = {"k": k, "inertia": inertia}
df_elbow = pd.DataFrame(elbow_data)
df_elbow.hvplot.line(x="k", y="inertia", xticks=k, title="Elbow Curve")

In [None]:
# Initialize the K-means model
model = KMeans(n_clusters=3, random_state=0)

# Fit the model
model.fit(stats_pca_df)

# Predict clusters
predictions = model.predict(stats_pca_df)

# Add the predicted class columns
stats_pca_df["class"] = model.labels_
stats_pca_df.head()

In [None]:
stats_pca_df.hvplot.scatter(
    x="principal component 1",
    y="principal component 2",
    hover_cols=["class"],
    by="class",
)