In [1]:
import numpy as np
from sklearn.cluster import KMeans
from keras.datasets import cifar10
from sklearn.metrics import accuracy_score, normalized_mutual_info_score, silhouette_score

(X_train, y_train), (X_test, y_test) = cifar10.load_data()

X_train = X_train.reshape(X_train.shape[0], -1)
X_test = X_test.reshape(X_test.shape[0], -1)

X_train_scaled = X_train/255
X_test_scaled = X_test/255

# Define number of clusters
n_clusters = 10

kmeans = KMeans(n_clusters=n_clusters, n_init=10, random_state=42)
kmeans.fit(X_train_scaled)

# Get labels for training and test data
y_pred = kmeans.predict(X_train_scaled)
y_pred_test = kmeans.predict(X_test_scaled)

# Creating a Mapping Between Predicted Labels and Clusters Based on Majority Labels within Each Cluster
relation = dict((i, 0) for i in range(n_clusters))
for i in range(n_clusters):
    u, indeces = np.unique(y_train[y_pred == i], return_inverse=True)
    i_pred = u[np.argmax(np.bincount(indeces))]
    relation[i] = i_pred

y_pred_corr = np.array([relation[i] for i in y_pred])

# Creating a Mapping Between Predicted Labels and Clusters Based on Majority Labels within Each Cluster
relation_test = dict((i, 0) for i in range(n_clusters))
for i in range(n_clusters):
    u, indeces = np.unique(y_test[y_pred_test == i], return_inverse=True)
    i_pred = u[np.argmax(np.bincount(indeces))]
    relation_test[i] = i_pred

y_pred_corr_test = np.array([relation_test[i] for i in y_pred_test])

# Compute accuracy
train_accuracy = accuracy_score(y_train, y_pred_corr)
test_accuracy = accuracy_score(y_test, y_pred_corr_test)

# Compute NMI
nmi_train = normalized_mutual_info_score(y_train.flatten(), y_pred_corr)
nmi_test = normalized_mutual_info_score(y_test.flatten(), y_pred_corr_test)

# Compute silhouette score
silhouette_train = silhouette_score(X_train_scaled, y_pred)
silhouette_test = silhouette_score(X_test_scaled, y_pred_test)

print("Training accuracy:", train_accuracy)
print("Test accuracy:", test_accuracy)
print("Training NMI:", nmi_train)
print("Test NMI:", nmi_test)
print("Training silhouette score:", silhouette_train)
print("Test silhouette score:", silhouette_test)



Training accuracy: 0.22126
Test accuracy: 0.2219
Training NMI: 0.07292053100720179
Test NMI: 0.07538402608447203
Training silhouette score: 0.050634543919521095
Test silhouette score: 0.0491361564139692
