In [37]:
import pandas as pd
from sklearn.cluster import KMeans, AgglomerativeClustering, SpectralClustering, MiniBatchKMeans
from sklearn.metrics import adjusted_rand_score
from sklearn.preprocessing import StandardScaler
from scipy.cluster.vq import kmeans2

data = pd.read_csv('/Users/efang/Desktop/coding/Intro-to-ML/CSDS340/data/twomoons.csv', header=None)
X = data.iloc[:, :2].values  
y_true = data.iloc[:, 2].values  

def evaluate_clustering(y_pred, y_true, name):
    try:
        score = adjusted_rand_score(y_true, y_pred)
        print(f"{name} Adjusted Rand Index: {score:.4f}")
        return score
    except Exception as e:
        print(f"An error occurred with {name}: {e}")
        return None

centroids, labels = kmeans2(X, 2, minit='++')
kmeans_ari = evaluate_clustering(labels, y_true, "K-means (scipy)")

agglo = AgglomerativeClustering(n_clusters=2)
agglo_labels = agglo.fit_predict(X)
agglo_ari = evaluate_clustering(agglo_labels, y_true, "Agglomerative Clustering")







K-means (scipy) Adjusted Rand Index: 0.3066
Agglomerative Clustering Adjusted Rand Index: 0.3306


In [55]:
for neighbors in range(1, 20):
    spec = SpectralClustering(n_clusters=2, affinity = 'nearest_neighbors', n_neighbors = neighbors, assign_labels='discretize', random_state=42).fit(X)
    spec_accuracy = evaluate_clustering(spec.labels_, y_true, "SpectralClustering")
    print(spec_accuracy, neighbors)

# SpectralClustering Adjusted Rand Index: 0.6691 / cluster_qr
# 0.6691233181935041 14

gamma_values = [0.01, 0.05, 0.1, 0.5, 1, 5, 6, 7, 8, 9, 10]

for gamma in gamma_values:
    spec = SpectralClustering(n_clusters=2, affinity = 'rbf', gamma = gamma, assign_labels='discretize', random_state=42).fit(X)
    spec_accuracy = evaluate_clustering(spec.labels_, y_true, "SpectralClustering")
    print(spec_accuracy, gamma)


SpectralClustering Adjusted Rand Index: 0.0480
0.047983673469387755 1
SpectralClustering Adjusted Rand Index: 0.1358
0.13575757575757577 2
SpectralClustering Adjusted Rand Index: 0.0415
0.04154169837503917 3
SpectralClustering Adjusted Rand Index: 0.2059
0.20591790142799588 4
SpectralClustering Adjusted Rand Index: 0.2059
0.20591790142799588 5
SpectralClustering Adjusted Rand Index: 0.3308
0.33083307189915295 6
SpectralClustering Adjusted Rand Index: 0.3308
0.33083307189915295 7
SpectralClustering Adjusted Rand Index: 0.4044
0.4043632094699255 8
SpectralClustering Adjusted Rand Index: 0.4044
0.4043632094699255 9
SpectralClustering Adjusted Rand Index: 0.4044
0.4043632094699255 10
SpectralClustering Adjusted Rand Index: 0.4853
0.4852686308492201 11
SpectralClustering Adjusted Rand Index: 0.5433
0.5432987041591718 12
SpectralClustering Adjusted Rand Index: 0.5735
0.5734638922888617 13
SpectralClustering Adjusted Rand Index: 0.5735
0.5734638922888617 14
SpectralClustering Adjusted Rand In

