<a href="https://colab.research.google.com/github/feliperodighero/Machine-Learning-Study/blob/main/ComparandoClusters.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np
import pandas as pd

from sklearn import datasets
from sklearn.cluster import KMeans, AgglomerativeClustering, DBSCAN
from sklearn.metrics import silhouette_score
from sklearn.preprocessing import StandardScaler

In [2]:
def compare_algorithms(X, max_clusters):
    results = []
    cluster_range = range(2, max_clusters + 1)

    # KMeans
    for n_clusters in cluster_range:
        kmeans = KMeans(n_clusters=n_clusters, random_state=0, n_init='auto')
        cluster = kmeans.fit_predict(X)
        score = silhouette_score(X, cluster)
        results.append({'Algorithm': 'KMeans', 'n_clusters': n_clusters, 'Score': score})

    # Agglomerative
    for n_clusters in cluster_range:
        agglomerative = AgglomerativeClustering(n_clusters=n_clusters)
        cluster = agglomerative.fit_predict(X)
        score = silhouette_score(X, cluster)
        results.append({'Algorithm': 'AgglomerativeClustering', 'n_clusters': n_clusters, 'Score': score})

    # DBSCAN
    eps_values = np.arange(0.1, 0.9, 0.1)
    for eps in eps_values:
        dbscan = DBSCAN(eps=eps, min_samples=5)
        cluster = dbscan.fit_predict(X)
        if len(set(cluster)) > 1:
            score = silhouette_score(X, cluster)
            results.append({'Algorithm': 'DBSCAN', 'eps': eps, 'Score': score})

    return results

In [4]:
iris = datasets.load_iris()

In [5]:
scaler = StandardScaler()

In [6]:
X = scaler.fit_transform(iris.data)
y = iris.target

In [10]:
results = compare_algorithms(X, 10)

df = pd.DataFrame(results)
df

Unnamed: 0,Algorithm,n_clusters,Score,eps
0,KMeans,2.0,0.58175,
1,KMeans,3.0,0.459948,
2,KMeans,4.0,0.386941,
3,KMeans,5.0,0.345511,
4,KMeans,6.0,0.343719,
5,KMeans,7.0,0.329236,
6,KMeans,8.0,0.335194,
7,KMeans,9.0,0.352488,
8,KMeans,10.0,0.348607,
9,AgglomerativeClustering,2.0,0.577035,


In [12]:
max_score_index = df['Score'].idxmax()
print(df.loc[max_score_index])

Algorithm      KMeans
n_clusters        2.0
Score         0.58175
eps               NaN
Name: 0, dtype: object
