# Clustering Activity

In [1]:
# Import necessary libraries
from sklearn.datasets import make_blobs
from sklearn.cluster import KMeans, AgglomerativeClustering, SpectralClustering
from sklearn.metrics import adjusted_rand_score
import skfuzzy.cluster as fuzz
import numpy as np

# Generate a hypothetical dataset
X, y_true = make_blobs(n_samples=300, centers=4, random_state=42, cluster_std=1.0)

Exercise: Evaluate four clustering algorithms (K-Means, Fuzzy C-Means, Spectral, and Agglomerative)
1. Apply each clustering algorithm to the dataset where:
- cluster_count = 4
- random_state = 42 (where applicable)
- do not specify affinity, metric, algo, init, linkage (if applicable)
2. Use the **Adjusted Rand index** and **Silhouette Score** to evaluate the clustering performance.
3. Compare the performance of the algorithms based on the aforementioned metrics and decide which algorithm is best suited for the data given.

In [2]:
from sklearn.metrics import silhouette_score

# K-Means
kmeans = KMeans(n_clusters=4, random_state=42)
kmeans_labels = kmeans.fit_predict(X)
kmeans_ari = adjusted_rand_score(y_true, kmeans_labels)
kmeans_silhouette = silhouette_score(X, kmeans_labels)

# Fuzzy C-Means
fcm_centers, fcm_labels, _, _, _, _, _ = fuzz.cmeans(X.T, 4, m=2, error=0.005, maxiter=1000, init=None)
fcm_labels_hard = np.argmax(fcm_labels, axis=0)
fcm_ari = adjusted_rand_score(y_true, fcm_labels_hard)
fcm_silhouette = silhouette_score(X, fcm_labels_hard)

# Spectral Clustering
spectral = SpectralClustering(n_clusters=4, random_state=42)
spectral_labels = spectral.fit_predict(X)
spectral_ari = adjusted_rand_score(y_true, spectral_labels)
spectral_silhouette = silhouette_score(X, spectral_labels)

# Agglomerative Clustering
agglomerative = AgglomerativeClustering(n_clusters=4)
agglomerative_labels = agglomerative.fit_predict(X)
agglomerative_ari = adjusted_rand_score(y_true, agglomerative_labels)
agglomerative_silhouette = silhouette_score(X, agglomerative_labels)

# Compare the performance
print("Adjusted Rand Index:")
print(f"K-Means: {kmeans_ari}")
print(f"Fuzzy C-Means: {fcm_ari}")
print(f"Spectral Clustering: {spectral_ari}")
print(f"Agglomerative Clustering: {agglomerative_ari}")

print("\nSilhouette Score:")
print(f"K-Means: {kmeans_silhouette}")
print(f"Fuzzy C-Means: {fcm_silhouette}")
print(f"Spectral Clustering: {spectral_silhouette}")
print(f"Agglomerative Clustering: {agglomerative_silhouette}")

  super()._check_params_vs_input(X, default_n_init=10)


Adjusted Rand Index:
K-Means: 0.9910811504997546
Fuzzy C-Means: 0.9910811504997546
Spectral Clustering: 0.9910811504997546
Agglomerative Clustering: 0.9910811504997546

Silhouette Score:
K-Means: 0.7915830011443039
Fuzzy C-Means: 0.7915830011443039
Spectral Clustering: 0.7915830011443039
Agglomerative Clustering: 0.7915830011443039
