# multiple algorithms on MNIST

In [None]:
import numpy as np
from torchvision import datasets
import torchvision.transforms as transforms
import time

# to import MNIST as torch tensor
transform = transforms.ToTensor()

# load the training and test datasets
train_set = datasets.MNIST(root='data', train=True,
                                   download=True, transform=transform)
test_set = datasets.MNIST(root='data', train=False,
                                  download=True, transform=transform)
train_data = train_set.data.numpy()
train_labels = train_set.targets.numpy()
test_data = test_set.data.numpy()
test_labels = test_set.targets.numpy()

## clustering model instantiation and training

In [None]:
from sklearn.cluster import KMeans, AgglomerativeClustering, DBSCAN

# create a k-means model
kmeans_model = KMeans(n_clusters=10, tol = 1e-4, max_iter = 400).fit(train_data.reshape(-1, 28*28))

In [None]:
# create a hierarchical model
hier_model = AgglomerativeClustering(n_clusters=10).fit(train_data.reshape(-1, 28*28))

In [None]:
# create a density-based model
density_model = DBSCAN().fit(train_data.reshape(-1, 28*28))

In [None]:
kmeans_labels_pred = kmeans_model.predict(test_data.reshape(-1, 28*28))
hier_labels_pred = hier_model.predict(test_data.reshape(-1, 28*28))
density_labels_pred = density_model.predict(test_data.reshape(-1, 28*28))

## performance evaluation

In [None]:
import import_ipynb
import DataVisuals as dv
def metric_calc(train_labels, model_labels, test_labels, pred_labels):
    train_metric = dv.Metrics(train_labels, model_labels)
    test_metric = dv.Metrics(test_labels, pred_labels)
    return train_metric.nmi(), train_metric.ari(), train_metric.acc(), test_metric.nmi(), test_metric.ari(), test_metric.acc()

In [None]:
kmeans_train_nmi, kmeans_train_ari, kmeans_train_acc, kmeans_test_nmi, kmeans_test_ari, kmeans_test_acc
      = metric_calc(train_labels, kmeans_model.labels_, test_labels, kmeans_pred_labels)
hier_train_nmi, hier_train_ari, hier_train_acc, hier_test_nmi, hier_test_ari, hier_test_acc
      = metric_calc(train_labels, hier_model.labels_, test_labels, hier_pred_labels)
density_train_nmi, density_train_ari, density_train_acc, density_test_nmi, density_test_ari, density_test_acc
      = metric_calc(train_labels, density_model.labels_, test_labels, density_pred_labels)

In [None]:
from tabulate import tabulate
print(tabulate([['K-Means', kmeans_train_nmi, kmeans_train_ari, kmeans_train_acc], 
                ['Hierarchical', hier_train_nmi, hier_train_ari, hier_train_acc], 
                ['Density based', density_train_nmi, density_train_ari, density_train_acc]]
    , headers=['Algorithms', 'Normalized Mutual Information', 'Adjusted Rand Index', 'Accuracy']))

In [None]:
print('Test clustering results:\n=======================')
print(tabulate([['K-Means', kmeans_test_nmi, kmeans_test_ari, kmeans_test_acc], 
                ['Hierarchical', hier_test_nmi, hier_test_ari, hier_test_acc], 
                ['Density based', density_test_nmi, density_test_ari, density_test_acc]]
    , headers=['Algorithms', 'Normalized Mutual Information', 'Adjusted Rand Index', 'Accuracy']))