In [1]:
import numpy as np
import pandas as pd
from sklearn import datasets
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans, AgglomerativeClustering, DBSCAN
from sklearn.metrics import silhouette_score

In [9]:
def compare_algoritmos (X, max_clusters):
  #função processa tudo e joga nesta lista
  results = []
  cluster_range = range(2, max_clusters + 1)

  #KMeans
  for n_clusters in cluster_range:
    kmeans = KMeans(n_clusters=n_clusters, random_state=0, n_init='auto')
    clusters = kmeans.fit_predict(X)
    silhouette_avg = silhouette_score(X, clusters)

    results.append(('KMeans', n_clusters, silhouette_avg))

  #Agglomerative
  for n_clusters in cluster_range:
    agglomerative = AgglomerativeClustering(n_clusters=n_clusters)
    clusters = agglomerative.fit_predict(X)
    silhouette_avg = silhouette_score(X, clusters)

    results.append(('Agglomerative', n_clusters, silhouette_avg))

  #DBScan
  eps_values = np.arange(0.1, 0.9, 0.1)
  for eps in eps_values:
    dbscan = DBSCAN(eps=eps, min_samples=5)
    clusters = dbscan.fit_predict(X)
    if len(set (clusters)) > 1:
      silhouette_avg = silhouette_score(X, clusters)
      results.append(('DBScan', eps, silhouette_avg))


  return results

In [12]:
iris = datasets.load_iris()
scaler = StandardScaler()
scaled_data = scaler.fit_transform(iris.data)

results = compare_algoritmos(scaled_data, 10)

df = pd.DataFrame(results, columns=['Algoritmo', 'Parâmetro', 'Score'])
df

Unnamed: 0,Algoritmo,Parâmetro,Score
0,KMeans,2.0,0.58175
1,KMeans,3.0,0.459948
2,KMeans,4.0,0.386941
3,KMeans,5.0,0.345511
4,KMeans,6.0,0.343719
5,KMeans,7.0,0.329236
6,KMeans,8.0,0.335194
7,KMeans,9.0,0.352488
8,KMeans,10.0,0.348607
9,Agglomerative,2.0,0.577035


Em KMeans, o score mais alto em 2 clusters significa que é o número ideal de clusters para os dados

In [17]:
#filtra o maior score para a gente
max_score_index = df['Score'].idxmax()
print(df.loc[max_score_index])

Algoritmo     KMeans
Parâmetro        2.0
Score        0.58175
Name: 0, dtype: object
