In [None]:
# 📚 Basic Libraries
import numpy as np
import pandas as pd
import warnings

# 📊 Visualizations
import matplotlib.pyplot as plt
import seaborn as sns

# 🤖 Machine Learning
from sklearn.preprocessing import StandardScaler

In [None]:
pd.set_option('display.max_columns', None) # display all columns
warnings.filterwarnings('ignore') # ignore warnings

In [None]:
df = pd.read_csv("players_merged.csv")

In [None]:
df

In [None]:
df.drop(columns='Unnamed: 0', inplace=True)

In [None]:
df.position.value_counts()

In [None]:
plt.figure(figsize=(8, 6))
sns.scatterplot(data=df, x='AST', y='TRB', hue='position', style='position', s=100)
plt.title('position analytics')
plt.xlabel('Assists per game')
plt.ylabel('Rebounds per game')
plt.legend(title='position')
plt.show()

In [None]:
df1 = df.drop(columns=['Player', 'country', 'draft_year', 'draft_number', 'conference', 'division', 'team'])

In [None]:
sns.pairplot(df1, hue="position")

In [None]:
from sklearn.decomposition import PCA

In [None]:
y = df1['position']
X = df1.drop(columns='position')

In [None]:
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
X_scaled

In [None]:
pca = PCA(n_components=2)
X_pca = pca.fit_transform(X_scaled)

In [None]:
pca_df = pd.DataFrame(data = X_pca, columns = ['PC1', 'PC2'])
pca_df['position'] = y

In [None]:
pca_df.head()

In [None]:
plt.figure(figsize=(8, 6))
sns.scatterplot(data=pca_df, x='PC1', y='PC2', hue='position', style='position', s=100)
plt.title('PCA of Iris Dataset')
plt.xlabel('Principal Component 1')
plt.ylabel('Principal Component 2')
plt.legend(title='position')
plt.show()

In [None]:
from sklearn.cluster import KMeans

In [None]:
kmeans = KMeans(n_clusters=4, random_state=42)
kmeans.fit(X_scaled)

In [None]:
kmeans.labels_

In [None]:
df_clustered = pd.DataFrame(X_scaled, columns=X.columns)
df_clustered

In [None]:
df_clustered['Group'] = kmeans.labels_
df_clustered['position'] = y

In [None]:
df_clustered

In [None]:
group_mapping = {
    1: 'Superstars',
    0: 'All-Stars',
    3: 'Role Players',
    2: 'Prospects and Bench Players'
}

df_clustered['Group'] = df_clustered['Group'].map(group_mapping)

In [None]:
plt.figure(figsize=(8, 6))
sns.scatterplot(data=df_clustered, x='MP', y='PTS', hue='Group', palette='Set1', s=100)
plt.title('Players efficiency based on points')
plt.xlabel('Minutes per game')
plt.ylabel('Points per game')
plt.legend(title='Group')
plt.show()

In [None]:
plt.figure(figsize=(8, 6))
sns.scatterplot(data=df_clustered, x='MP', y='TRB', hue='Group', palette='Set1', s=100)
plt.title('Players efficiency based on rebounds')
plt.xlabel('Minutes per game')
plt.ylabel('Rebounds per game')
plt.legend(title='Group')
plt.show()

In [None]:
plt.figure(figsize=(8, 6))
sns.scatterplot(data=df_clustered, x='MP', y='AST', hue='Group', palette='Set1', s=100)
plt.title('Players efficiency based on assists')
plt.xlabel('Minutes per game')
plt.ylabel('Assists per game')
plt.legend(title='Group')
plt.show()

In [None]:
plt.figure(figsize=(8, 6))
sns.scatterplot(data=df_clustered, x='MP', y='GmSc', hue='Group', palette='Set1', s=100)
plt.title('Players performance')
plt.xlabel('Minutes per game')
plt.ylabel('Performance score')
plt.legend(title='Group')
plt.show()

In [None]:
from sklearn.metrics import silhouette_score

In [None]:
silhouette_avg = silhouette_score(X_scaled, kmeans.labels_)
print(f'Silhouette Score: {silhouette_avg:.3f}')

In [None]:
inertias = []
range_of_clusters = range(1, 11)

for k in range_of_clusters:
    model = KMeans(n_clusters=k, random_state=42, n_init=10)
    model.fit(X_scaled)
    inertias.append(model.inertia_)

In [None]:
import plotly.graph_objects as go

fig = go.Figure()
fig.add_trace(go.Scatter(x=list(range_of_clusters), y=inertias, mode='lines+markers', name='Inertia'))
fig.update_layout(title='Elbow Method For Optimal k',
                  xaxis_title='Number of clusters, k',
                  yaxis_title='Inertia',
                  xaxis=dict(tickmode='array', tickvals=list(range_of_clusters)))
fig.show()

In [None]:
df_grouped = df.copy()

In [None]:
df_grouped['Group'] = df_clustered['Group']

In [None]:
df_grouped.to_csv('players_classification.csv', index=False)