# KMeans e DBSCAN


In [None]:
import plotly.express as px
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from sklearn.cluster import KMeans, DBSCAN

In [None]:
# Load the data
mydata = pd.read_csv("./Energy_consumption.csv")
mydata.info()
mydata.head()

In [None]:
# Cleaning up the data
mydata = mydata.dropna()
mydata = mydata.drop(['RenewableEnergy'], axis=1)
mydata['Timestamp'] = pd.to_datetime(mydata['Timestamp'])
mydata['Hour'] = mydata['Timestamp'].dt.hour
mydata['part_of_day'] = (mydata['Timestamp'].dt.hour % 24 + 4) // 4
mydata.head()

In [None]:
from sklearn.preprocessing import LabelEncoder
dayofweek = LabelEncoder()
lightingusage = LabelEncoder()
holiday = LabelEncoder()
HVACUsage = LabelEncoder()

mydata['DayOfWeek'] = dayofweek.fit_transform(mydata['DayOfWeek'])
mydata['LightingUsage'] = lightingusage.fit_transform(mydata['LightingUsage'])
mydata['Holiday'] = holiday.fit_transform(mydata['Holiday'])
mydata['HVACUsage'] = HVACUsage.fit_transform(mydata['HVACUsage'])
mydata = mydata.drop(['Timestamp'], axis=1)
mydata.head()

In [None]:
# escolhendo as features, e as normalizando
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
features = ['DayOfWeek', 'Hour', 'part_of_day', 'LightingUsage', 'Holiday', 'HVACUsage']
X = mydata[features]
X = scaler.fit_transform(X)
X = pd.DataFrame(X, columns=features)
X.head()

Primeiro, vamos fazer a clusterização com KMeans.

In [None]:
# KMeans
# Pra escolher o número de clusters, podemos usar o método do cotovelo

#SSE = soma dos erros quadráticos, quanto menor melhor
sse = []
for k in range(1, 11):
    kmeans = KMeans(n_clusters=k, random_state=42, n_init=10)
    kmeans.fit(X)
    sse.append(kmeans.inertia_)

plt.figure(figsize=(8, 5))
plt.plot(range(1, 11), sse, marker='o')
plt.xlabel('Número de clusters')
plt.ylabel('SSE')
plt.title('Método do Cotovelo')
plt.show()


In [None]:

from yellowbrick.cluster import KElbowVisualizer
elbow_method = KElbowVisualizer(KMeans(n_init=10), k=(1,10))
elbow_method.fit(X)
elbow_method.show()


In [None]:
%pip install ipykernel

In [None]:
kmeans = KMeans(n_clusters=4, random_state=42, n_init=10)
kmeans_labels = kmeans.fit_predict(X)
mydata['kmeans_labels'] = kmeans_labels

#fig = px.scatter_3d(mydata, x='DayOfWeek', y='Hour', z='part_of_day', color='kmeans_labels')
#fig.show()
mydata.head()

In [None]:
# reduzindo a dimensionalidade para visualizar
from sklearn.decomposition import PCA
PCA = PCA(n_components=2)
X_pca = PCA.fit_transform(X)
# plt = plt.scatter(X_pca[:, 0], X_pca[:, 1], c=kmeans_labels, cmap='viridis')
# plt.xlabel('Componente Principal 1')
# plt.ylabel('Componente Principal 2')