# Análise de Agrupamento com KMeans, DBSCAN e SOM
Este notebook responde às questões propostas sobre clustering com análise de outliers, normalização, avaliação e comparação entre algoritmos.

In [None]:

import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from sklearn.cluster import KMeans, DBSCAN
from sklearn.metrics import silhouette_score, davies_bouldin_score
from scipy.spatial.distance import cdist
from sklearn.decomposition import PCA
from sklearn import datasets

# Carregar a base Iris
iris = datasets.load_iris()
df = pd.DataFrame(iris.data, columns=iris.feature_names)
df['target'] = iris.target
df.head()


In [None]:

# Identificação de outliers usando IQR
def remove_outliers_iqr(df, cols):
    for col in cols:
        Q1 = df[col].quantile(0.25)
        Q3 = df[col].quantile(0.75)
        IQR = Q3 - Q1
        lower = Q1 - 1.5 * IQR
        upper = Q3 + 1.5 * IQR
        df = df[(df[col] >= lower) & (df[col] <= upper)]
    return df

df_no_outliers = remove_outliers_iqr(df.copy(), iris.feature_names)
df_no_outliers.shape


In [None]:

scaler = MinMaxScaler()
df_scaled = pd.DataFrame(scaler.fit_transform(df_no_outliers[iris.feature_names]), columns=iris.feature_names)
df_scaled['target'] = df_no_outliers['target'].values
df_scaled.head()


In [None]:

distortions = []
silhouette_scores = []
K = range(2, 10)

for k in K:
    kmeans = KMeans(n_clusters=k, random_state=42)
    kmeans.fit(df_scaled[iris.feature_names])
    distortions.append(sum(np.min(cdist(df_scaled[iris.feature_names], kmeans.cluster_centers_, 'euclidean'), axis=1)) / df_scaled.shape[0])
    silhouette_scores.append(silhouette_score(df_scaled[iris.feature_names], kmeans.labels_))

plt.figure(figsize=(12,5))
plt.subplot(1, 2, 1)
plt.plot(K, distortions, 'bx-')
plt.xlabel('k')
plt.ylabel('Distortion')
plt.title('Método do Cotovelo')

plt.subplot(1, 2, 2)
plt.plot(K, silhouette_scores, 'bo-')
plt.xlabel('k')
plt.ylabel('Silhouette Score')
plt.title('Coeficiente de Silhouette')
plt.tight_layout()
plt.show()
