In [15]:
import pandas as pd
import numpy as np

from sklearn.preprocessing import StandardScaler
from sklearn.metrics import silhouette_score, davies_bouldin_score
from kmedoids import KMedoids
import warnings
warnings.filterwarnings("ignore")

In [16]:
X = pd.read_csv('../dataset/FM24_engineered_features.csv')
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [17]:
results = []
Ks = range(2, 11)

for k in Ks:
    kmedoids = KMedoids(n_clusters=k, metric='euclidean', method='alternate')

    kmedoids.fit(X_scaled)
    labels = kmedoids.labels_

    sil = silhouette_score(X_scaled, labels)
    db = davies_bouldin_score(X_scaled, labels)
    results.append({
        "k": k,
        "silhouette": sil,
        "davies_bouldin": db
    })

In [18]:
df_k_results = pd.DataFrame(results)
print("\nCluster quality:")
print(df_k_results)


Cluster quality:
    k  silhouette  davies_bouldin
0   2    0.492711        0.766150
1   3    0.308564        1.312674
2   4    0.219255        1.526247
3   5    0.198827        1.693984
4   6    0.181795        1.728049
5   7    0.133163        1.801482
6   8    0.134878        1.945701
7   9    0.125736        1.767359
8  10    0.124334        1.964392


In [19]:
best_row = df_k_results.sort_values("silhouette", ascending=False).iloc[0]
best_k = int(best_row["k"])
print("\nBest K:", best_k)


Best K: 2
