# Clustering Activity

In [38]:
# Import necessary libraries
from sklearn.datasets import make_blobs
from sklearn.cluster import KMeans, AgglomerativeClustering, SpectralClustering
from sklearn.metrics import adjusted_rand_score
import skfuzzy.cluster as fuzz
import numpy as np

# Generate a hypothetical dataset
X, y_true = make_blobs(n_samples=300, centers=4, random_state=42, cluster_std=1.0)

Exercise: Evaluate four clustering algorithms (K-Means, Fuzzy C-Means, Spectral, and Agglomerative)
1. Apply each clustering algorithm to the dataset where:
- cluster_count = 4
- random_state = 42 (where applicable)
- do not specify affinity, metric, algo, init, linkage (if applicable)
2. Use the **Adjusted Rand index** and **Silhouette Score** to evaluate the clustering performance.
3. Compare the performance of the algorithms based on the aforementioned metrics and decide which algorithm is best suited for the data given.

In [39]:
kmeans = KMeans(n_clusters=4, random_state=42, n_init = 10)
kmeans.fit(X)
y_pred_kmeans = kmeans.predict(X)
print("Adjusted Rand Index for K-Means: ", adjusted_rand_score(y_true, y_pred_kmeans))
sill_kmeans = silhouette_score(X, kmeans.labels_)
print("Silhouette Score for K-Means: ", sill_kmeans)

Adjusted Rand Index for K-Means:  0.9910811504997546
Silhouette Score for K-Means:  0.7915830011443039


In [40]:
fcm_centers, fcm_labels, _, _, _, _, _ = fuzz.cmeans(X.T, 4, m=2, error=0.005, maxiter=1000, init=None)
y_pred_fcm = np.argmax(fcm_labels, axis=0)
print("Adjusted Rand Index for Fuzzy C-Means: ", adjusted_rand_score(y_true, y_pred_fcm))
sill_fcm = silhouette_score(X, y_pred_fcm)
print("Silhouette Score for Fuzzy C-Means: ", sill_fcm)

Adjusted Rand Index for Fuzzy C-Means:  0.9910811504997546
Silhouette Score for Fuzzy C-Means:  0.7915830011443039


In [41]:
spectral = SpectralClustering(n_clusters=4, assign_labels='discretize', random_state=42)
y_pred_spectral = spectral.fit_predict(X)
print("Adjusted Rand Index for Spectral Clustering: ", adjusted_rand_score(y_true, y_pred_spectral))
sill_spectral = silhouette_score(X, y_pred_spectral)
print("Silhouette Score for Spectral Clustering: ", sill_spectral)

Adjusted Rand Index for Spectral Clustering:  0.9910811504997546
Silhouette Score for Spectral Clustering:  0.7915830011443039


In [43]:
agglo = AgglomerativeClustering(n_clusters=4)
y_pred_agglo = agglo.fit_predict(X)
print("Adjusted Rand Index for Agglomerative Clustering: ", adjusted_rand_score(y_true, y_pred_agglo))
sill_agglo = silhouette_score(X, y_pred_agglo)
print("Silhouette Score for Agglomerative Clustering: ", sill_agglo)

Adjusted Rand Index for Agglomerative Clustering:  0.9910811504997546
Silhouette Score for Agglomerative Clustering:  0.7915830011443039
