<a href="https://colab.research.google.com/github/felipe-luis00/Projeto-Implementa-o-e-An-lise-do-Algoritmo-de-K-means-com-o-Dataset-Human-Activity-Recognition/blob/main/Implementa%C3%A7%C3%A3o_e_An%C3%A1lise_do_Algoritmo_de_K_means_com_o_Dataset_Human_Activity_Recognition_ipynb.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Importação de bibliotecas
import pandas as pd
import numpy as np
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score
import matplotlib.pyplot as plt
import seaborn as sns

# Caminhos dos arquivos
train_features_path = "UCI_HAR_Dataset/train/X_train.txt"
train_labels_path = "UCI_HAR_Dataset/train/y_train.txt"
features_names_path = "UCI_HAR_Dataset/features.txt"

# Carregar os dados
features_names = pd.read_csv(features_names_path, delim_whitespace=True, header=None)[1].tolist()
X_train = pd.read_csv(train_features_path, delim_whitespace=True, header=None, names=features_names)
y_train = pd.read_csv(train_labels_path, delim_whitespace=True, header=None, names=["Activity"])

# Normalização dos dados
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)

# Redução de dimensionalidade com PCA
pca = PCA(n_components=2)  # Para visualização em 2D
X_pca = pca.fit_transform(X_train_scaled)

# Escolha do número de clusters com o Método do Cotovelo
inertia = []
k_range = range(2, 11)
for k in k_range:
    kmeans = KMeans(n_clusters=k, init="k-means++", random_state=42, n_init=10)
    kmeans.fit(X_pca)
    inertia.append(kmeans.inertia_)

# Plot do Método do Cotovelo
plt.figure(figsize=(8, 5))
plt.plot(k_range, inertia, marker="o", linestyle="--")
plt.title("Método do Cotovelo")
plt.xlabel("Número de Clusters (K)")
plt.ylabel("Inércia")
plt.show()

# Escolha do K com Silhouette Score
silhouette_scores = []
for k in k_range:
    kmeans = KMeans(n_clusters=k, init="k-means++", random_state=42, n_init=10)
    labels = kmeans.fit_predict(X_pca)
    silhouette_scores.append(silhouette_score(X_pca, labels))

# Plot do Silhouette Score
plt.figure(figsize=(8, 5))
plt.plot(k_range, silhouette_scores, marker="o", linestyle="--")
plt.title("Silhouette Score por Número de Clusters")
plt.xlabel("Número de Clusters (K)")
plt.ylabel("Silhouette Score")
plt.show()

# Escolhendo o número ideal de clusters (exemplo: K=3)
optimal_k = 3
kmeans = KMeans(n_clusters=optimal_k, init="k-means++", random_state=42, n_init=10)
clusters = kmeans.fit_predict(X_pca)

# Visualização dos clusters em 2D
plt.figure(figsize=(10, 7))
sns.scatterplot(x=X_pca[:, 0], y=X_pca[:, 1], hue=clusters, palette="viridis", s=50)
plt.scatter(kmeans.cluster_centers_[:, 0], kmeans.cluster_centers_[:, 1], s=200, c="red", label="Centroides", marker="X")
plt.title(f"Agrupamento com K-means (K={optimal_k})")
plt.xlabel("Componente Principal 1")
plt.ylabel("Componente Principal 2")
plt.legend()
plt.show()
