In [1]:
import tensorflow as tf
import numpy as np
import pandas as pd
from models.deep_km import DeepKMeansAutoEncoder
from models.deep_spectral_clustering import DeepSpectralClusteringAutoEncoder
from models.seq_ae_km import SeqAutoEncoderKMeans
from models.utils import clustering_accuracy
from sklearn.datasets import load_digits 

In [2]:
x_train, y_train = load_digits(n_class=10, return_X_y=True)
x_train/=16

In [3]:
runs = 10
accuracy_dict = {"accuracy:mean":[], "accuracy:std":[]}
names = []
input_dim = 64

In [4]:
accuracies = [] 
for run in range(runs):
    print("Run %d/%d" % (run+1, runs))
    seq_ae_km = SeqAutoEncoderKMeans(input_dim)
    seq_ae_km.compile(optimizer="adam", loss=lambda x, y: tf.zeros(input_dim))
    seq_ae_km.train(x_train, batch_size=256, epochs=60, verbose=0)
    accuracies.append(clustering_accuracy(y_train, seq_ae_km.assignments))
accuracy_dict['accuracy:mean'].append(np.mean(accuracies))
accuracy_dict['accuracy:std'].append(np.std(accuracies))
names.append('AutoEncoder + K-Means')
print('%.2f ± %.2f' % (np.mean(accuracies), np.std(accuracies)))

Run 1/10
Run 2/10
Run 3/10
Run 4/10
Run 5/10
Run 6/10
Run 7/10
Run 8/10
Run 9/10
Run 10/10
0.76 ± 0.03


In [5]:
accuracies = [] 
for run in range(runs):
    print("Run %d/%d" % (run+1, runs))
    deep_km = DeepKMeansAutoEncoder(input_dim, lmbda=1)
    deep_km.compile(optimizer="adam", loss=lambda x, y: tf.zeros(input_dim))
    deep_km.pre_train(x_train, batch_size=256, epochs=50, verbose=0)
    deep_km.train(x_train, batch_size=256, epochs=10, verbose=0)
    accuracies.append(clustering_accuracy(y_train, deep_km.assignments))
accuracy_dict['accuracy:mean'].append(np.mean(accuracies))
accuracy_dict['accuracy:std'].append(np.std(accuracies))
names.append('Deep K-Means')
print('%.2f ± %.2f' % (np.mean(accuracies), np.std(accuracies)))

Run 1/10
Run 2/10
Run 3/10
Run 4/10
Run 5/10
Run 6/10
Run 7/10
Run 8/10
Run 9/10
Run 10/10
0.78 ± 0.01


In [6]:
accuracies = [] 
for run in range(runs):
    print("Run %d/%d" % (run+1, runs))
    deep_sc = DeepSpectralClusteringAutoEncoder(input_dim, lmbda=1)
    deep_sc.compile(optimizer="adam", loss=lambda x, y: tf.zeros(input_dim))
    deep_sc.pre_train(x_train, batch_size=256, epochs=50, verbose=0)
    deep_sc.train(x_train, batch_size=256, epochs=10, verbose=0)
    accuracies.append(clustering_accuracy(y_train, deep_sc.assignments))
accuracy_dict['accuracy:mean'].append(np.mean(accuracies))
accuracy_dict['accuracy:std'].append(np.std(accuracies))
names.append('Deep Spectral Clustering')
print('%.2f ± %.2f' % (np.mean(accuracies), np.std(accuracies)))

Run 1/10
Run 2/10
Run 3/10
Run 4/10
Run 5/10
Run 6/10
Run 7/10
Run 8/10
Run 9/10
Run 10/10
0.82 ± 0.02


In [7]:
accuracies = [] 
for run in range(runs):
    print("Run %d/%d" % (run+1, runs))
    deep_sc = DeepSpectralClusteringAutoEncoder(input_dim, lmbda=1, normalize=True)
    deep_sc.compile(optimizer="adam", loss=lambda x, y: tf.zeros(input_dim))
    deep_sc.pre_train(x_train, batch_size=256, epochs=50, verbose=0)
    deep_sc.train(x_train, batch_size=256, epochs=10, verbose=0)
    accuracies.append(clustering_accuracy(y_train, deep_sc.assignments))
accuracy_dict['accuracy:mean'].append(np.mean(accuracies))
accuracy_dict['accuracy:std'].append(np.std(accuracies))
names.append('Deep Spectral Clustering with Normalization')
print('%.2f ± %.2f' % (np.mean(accuracies), np.std(accuracies)))

Run 1/10
Run 2/10
Run 3/10
Run 4/10
Run 5/10
Run 6/10
Run 7/10
Run 8/10
Run 9/10
Run 10/10
0.79 ± 0.04


In [8]:
pd.DataFrame(accuracy_dict, index=names).round(3)


Unnamed: 0,accuracy:mean,accuracy:std
AutoEncoder + K-Means,0.763,0.034
Deep K-Means,0.784,0.013
Deep Spectral Clustering,0.822,0.018
Deep Spectral Clustering with Normalization,0.79,0.043
