In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.cluster import KMeans, DBSCAN
from sklearn.metrics import silhouette_score
from scipy.cluster.hierarchy import dendrogram, linkage, fcluster
from sklearn.preprocessing import StandardScaler

# Step 1: Load and preprocess data
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/00374/energydata_complete.csv"
data = pd.read_csv(url)

# Select relevant features (e.g., temperature, humidity, energy consumption)
features = ["T1", "RH_1", "T2", "RH_2", "Appliances"]
df = data[features]

# Standardize the data
scaler = StandardScaler()
X_scaled = scaler.fit_transform(df)

# Step 2: K-Means Clustering
kmeans = KMeans(n_clusters=3, random_state=42)
kmeans_labels = kmeans.fit_predict(X_scaled)

# Calculate metrics for K-Means
kmeans_silhouette = silhouette_score(X_scaled, kmeans_labels)
kmeans_inertia = kmeans.inertia_

# Step 3: Hierarchical Clustering
hierarchical_linkage = linkage(X_scaled, method="ward")
hierarchical_labels = fcluster(hierarchical_linkage, t=3, criterion="maxclust")

# Calculate metrics for Hierarchical Clustering
hierarchical_silhouette = silhouette_score(X_scaled, hierarchical_labels)

# Step 4: DBSCAN
dbscan = DBSCAN(eps=1.5, min_samples=5)
dbscan_labels = dbscan.fit_predict(X_scaled)

# Filter out noise points (-1 label) for Silhouette Score calculation
dbscan_silhouette = silhouette_score(X_scaled[dbscan_labels != -1], dbscan_labels[dbscan_labels != -1])

# Step 5: Visualize Clusters
# Visualize K-Means Clusters
plt.figure(figsize=(8, 6))
plt.scatter(X_scaled[:, 0], X_scaled[:, 1], c=kmeans_labels, cmap="viridis", s=50)
plt.title("K-Means Clustering")
plt.xlabel("Feature 1")
plt.ylabel("Feature 2")
plt.colorbar(label="Cluster")
plt.show()

# Visualize Hierarchical Clustering (Dendrogram)
plt.figure(figsize=(10, 7))
dendrogram(hierarchical_linkage)
plt.title("Hierarchical Clustering Dendrogram")
plt.xlabel("Data Points")
plt.ylabel("Distance")
plt.show()

# Visualize DBSCAN Clusters
plt.figure(figsize=(8, 6))
plt.scatter(X_scaled[:, 0], X_scaled[:, 1], c=dbscan_labels, cmap="viridis", s=50)
plt.title("DBSCAN Clustering")
plt.xlabel("Feature 1")
plt.ylabel("Feature 2")
plt.colorbar(label="Cluster")
plt.show()

# Step 6: Print Results
print("Clustering Performance Metrics:")
print(f"K-Means - Silhouette Score: {kmeans_silhouette:.4f}, Inertia: {kmeans_inertia:.4f}")
print(f"Hierarchical - Silhouette Score: {hierarchical_silhouette:.4f}")
print(f"DBSCAN - Silhouette Score: {dbscan_silhouette:.4f}")
