In [1]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans, AgglomerativeClustering, DBSCAN
from sklearn.metrics import silhouette_score, adjusted_rand_score, adjusted_mutual_info_score
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE

# Generate sample data
np.random.seed(42)
n_samples = 500
n_features = 10
X = np.random.rand(n_samples, n_features)

# Normal cluster analysis using K-means
kmeans = KMeans(n_clusters=3, random_state=42)
kmeans_labels = kmeans.fit_predict(X)

# Correlation-based clustering using Agglomerative Clustering
agg_clustering = AgglomerativeClustering(n_clusters=3, linkage='average', affinity='correlation')
agg_labels = agg_clustering.fit_predict(X)

# DBSCAN clustering
dbscan = DBSCAN(eps=0.3, min_samples=5)
dbscan_labels = dbscan.fit_predict(X)

# Dimensionality reduction using PCA
pca = PCA(n_components=2)
pca_result = pca.fit_transform(X)

# Dimensionality reduction using t-SNE
tsne = TSNE(n_components=2, perplexity=30, random_state=42)
tsne_result = tsne.fit_transform(X)

# Evaluate clustering results
print("K-means Clustering:")
print("Silhouette Score:", silhouette_score(X, kmeans_labels))
print("Adjusted Rand Index:", adjusted_rand_score(kmeans_labels, agg_labels))
print("Adjusted Mutual Information Score:", adjusted_mutual_info_score(kmeans_labels, agg_labels))

print("\nCorrelation-based Clustering:")
print("Silhouette Score:", silhouette_score(X, agg_labels))
print("Adjusted Rand Index:", adjusted_rand_score(agg_labels, kmeans_labels))
print("Adjusted Mutual Information Score:", adjusted_mutual_info_score(agg_labels, kmeans_labels))

print("\nDBSCAN Clustering:")
print("Silhouette Score:", silhouette_score(X, dbscan_labels))

# Plotting the results
plt.figure(figsize=(12, 6))
plt.subplot(221)
plt.scatter(pca_result[:, 0], pca_result[:, 1], c=kmeans_labels)
plt.title("K-means Clustering (PCA)")
plt.subplot(222)
plt.scatter(pca_result[:, 0], pca_result[:, 1], c=agg_labels)
plt.title("Correlation-based Clustering (PCA)")
plt.subplot(223)
plt.scatter(tsne_result[:, 0], tsne_result[:, 1], c=kmeans_labels)
plt.title("K-means Clustering (t-SNE)")
plt.subplot(224)
plt.scatter(tsne_result[:, 0], tsne_result[:, 1], c=agg_labels)
plt.title("Correlation-based Clustering (t-SNE)")
plt.tight_layout()
plt.show()




K-means Clustering:
Silhouette Score: 0.07406941492586865
Adjusted Rand Index: 0.03498721851145265
Adjusted Mutual Information Score: 0.05319850710158657

Correlation-based Clustering:
Silhouette Score: 0.039456869018742124
Adjusted Rand Index: 0.03498721851145265
Adjusted Mutual Information Score: 0.05319850710158742

DBSCAN Clustering:


ValueError: Number of labels is 1. Valid values are 2 to n_samples - 1 (inclusive)