In [1]:
import numpy as np
import pandas as pd
from sklearn import datasets
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans, AgglomerativeClustering, DBSCAN
from sklearn.metrics import silhouette_score

In [2]:
def compare_algorithms(X, max_cluster):
    results = []
    cluster_range = range(2, max_cluster + 1)
    
    #KMeans
    for n_clusters in cluster_range:
        kmeans = KMeans(n_clusters=n_clusters, random_state=0, n_init='auto')
        cluster = kmeans.fit_predict(X)
        silhouette_avg = silhouette_score(X, cluster)
        results.append(('KMeans', n_clusters, silhouette_avg))

    #Agglomerativo
    for n_clusters in cluster_range:
        agglo = AgglomerativeClustering(n_clusters=n_clusters)
        cluster = agglo.fit_predict(X)
        silhouette_avg = silhouette_score(X, cluster)
        results.append(('agglomerative', n_clusters, silhouette_avg))
        
    #dbscan
    eps_values = np.arange(0.1, 0.9, 0.1)
    for eps in eps_values:
        dbscan = DBSCAN(eps=eps, min_samples=5)
        cluster = dbscan.fit_predict(X)
        if len(set(cluster)) > 1:
            silhouette_avg = silhouette_score(X, cluster)
            results.append(('DBSCAN', eps, silhouette_avg))
    
    return results       

In [3]:
iris = datasets.load_iris()
scaler = StandardScaler()
scaled_data = scaler.fit_transform(iris.data)
results = compare_algorithms(scaled_data, 10)
results

[('KMeans', 2, np.float64(0.5817500491982808)),
 ('KMeans', 3, np.float64(0.45994823920518635)),
 ('KMeans', 4, np.float64(0.38694104154427816)),
 ('KMeans', 5, np.float64(0.3455109959980947)),
 ('KMeans', 6, np.float64(0.3437191888640958)),
 ('KMeans', 7, np.float64(0.32923622098413463)),
 ('KMeans', 8, np.float64(0.3351935187631491)),
 ('KMeans', 9, np.float64(0.3524877506768797)),
 ('KMeans', 10, np.float64(0.34860741568769416)),
 ('agglomerative', 2, np.float64(0.5770346019475989)),
 ('agglomerative', 3, np.float64(0.4466890410285909)),
 ('agglomerative', 4, np.float64(0.4006363159855973)),
 ('agglomerative', 5, np.float64(0.33058726295230545)),
 ('agglomerative', 6, np.float64(0.3148548010051283)),
 ('agglomerative', 7, np.float64(0.31696983029912795)),
 ('agglomerative', 8, np.float64(0.31094652900725794)),
 ('agglomerative', 9, np.float64(0.31143422475471655)),
 ('agglomerative', 10, np.float64(0.3161120375980681)),
 ('DBSCAN', np.float64(0.30000000000000004), np.float64(-0.1941

In [4]:
df = pd.DataFrame(results, columns=['Agrupador', 'Clusters', 'Score'])
df

Unnamed: 0,Agrupador,Clusters,Score
0,KMeans,2.0,0.58175
1,KMeans,3.0,0.459948
2,KMeans,4.0,0.386941
3,KMeans,5.0,0.345511
4,KMeans,6.0,0.343719
5,KMeans,7.0,0.329236
6,KMeans,8.0,0.335194
7,KMeans,9.0,0.352488
8,KMeans,10.0,0.348607
9,agglomerative,2.0,0.577035


In [6]:
max_score_index = df['Score'].idxmax()
df.loc[max_score_index]

Agrupador     KMeans
Clusters         2.0
Score        0.58175
Name: 0, dtype: object