# Clustering Activity

In [26]:
# Import necessary libraries
from sklearn.datasets import make_blobs
from sklearn.cluster import KMeans, AgglomerativeClustering, SpectralClustering
from sklearn.metrics import adjusted_rand_score
import skfuzzy.cluster as fuzz
import numpy as np
from sklearn.metrics import adjusted_rand_score, silhouette_score

# Generate a hypothetical dataset
X, y_true = make_blobs(n_samples=300, centers=4, random_state=42, cluster_std=1.0)

Exercise: Evaluate four clustering algorithms (K-Means, Fuzzy C-Means, Spectral, and Agglomerative)
1. Apply each clustering algorithm to the dataset where:
- cluster_count = 4
- random_state = 42 (where applicable)
- do not specify affinity, metric, algo, init, linkage (if applicable)
2. Use the **Adjusted Rand index** and **Silhouette Score** to evaluate the clustering performance.
3. Compare the performance of the algorithms based on the aforementioned metrics and decide which algorithm is best suited for the data given.

# K-Means

In [27]:
kmeans = KMeans(n_clusters=4, random_state=42, n_init=10)
labels = kmeans.fit_predict(X)
ari = adjusted_rand_score(y_true, labels)
silhouette = silhouette_score(X, labels)

print("K-Means")
print(f"Adjusted Rand Index: {ari}")
print(f"Silhouette Score: {silhouette}")

K-Means
Adjusted Rand Index: 0.9910811504997546
Silhouette Score: 0.7915830011443039


# Fuzzy C-Means

In [33]:
fcm_centers, fcm_labels, _, _, _, _, _ = fuzz.cmeans(X.T, c = 4, m=1.5, error=0.005, maxiter=1000)
labels = np.argmax(fcm_labels, axis=0)
ari = adjusted_rand_score(y_true, labels)
silhouette = silhouette_score(X, labels)

print("Fuzzy C-Means")
print(f"Adjusted Rand Index: {ari}")
print(f"Silhouette Score: {silhouette}")

Fuzzy C-Means
Adjusted Rand Index: 0.9910811504997546
Silhouette Score: 0.7915830011443039


# Spectral

In [29]:
labels = SpectralClustering(n_clusters=4, random_state=42).fit_predict(X)
ari = adjusted_rand_score(y_true, labels)
silhouette = silhouette_score(X, labels)

print("Spectral")
print(f"Adjusted Rand Index: {ari}")
print(f"Silhouette Score: {silhouette}")

Spectral
Adjusted Rand Index: 0.9910811504997546
Silhouette Score: 0.7915830011443039


# Agglomerative

In [30]:
labels = AgglomerativeClustering(n_clusters=4).fit_predict(X)
ari = adjusted_rand_score(y_true, labels)
silhouette = silhouette_score(X, labels)

print("Agglomerative")
print(f"Adjusted Rand Index: {ari}")
print(f"Silhouette Score: {silhouette}")

Agglomerative
Adjusted Rand Index: 0.9910811504997546
Silhouette Score: 0.7915830011443039
