In [2]:
import pandas as pd
import matplotlib.pyplot as plt 
from sklearn.cluster import KMeans 
from sklearn.preprocessing import StandardScaler 
from sklearn.datasets import load_iris 
from sklearn.decomposition import PCA 

In [3]:
# Dados reais: Iris dataset (https://archive.ics.uci.edu/ml/datasets/iris)
data = load_iris()
df = pd.DataFrame(data.data, columns=data.feature_names)

# Pré-processamento
scaler = StandardScaler()
X_scaled = scaler.fit_transform(df)

# KMeans com configurações padrão (https://scikit-learn.org/stable/modules/generated/sklearn.cluster.Kmeans.html)
kmeans = KMeans(n_clusters=3, init='k-means++', random_state=42)
kmeans.fit(X_scaled)

# Visualização com redução dimensional (PCA)
pca = PCA(n_components=2)
principal_components = pca.fit_transform(X_scaled)
plt.scatter(principal_components[:, 0], principal_components[:, 1], c=kmeans.labels_, cmap='viridis')
plt.scatter(pca.transform(kmeans.cluster_centers_)[:, 0], pca.transform(kmeans.cluster_centers_)[:, 1], s=200, marker='X', c='red')
plt.xlabel('Componente Principal 1')
plt.ylabel('Componente Principal 2')
plt.title('Clusters Kmeans - Iris Dataset')
plt.savefig('kmeans_realistic_result.png')
plt.close()