# PART 0: IMPORTS

In [None]:
import numpy as np
import pandas as pd
import util_mnist_reader
import matplotlib.pyplot as plt

from sklearn.cluster import KMeans
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report, confusion_matrix
from sklearn import metrics
from sklearn.mixture import GaussianMixture

from keras.models import Model
from keras.layers import Dense, Input, Conv2D, MaxPooling2D, UpSampling2D, Flatten, BatchNormalization

# PART 1: K-MEANS CLUSTERING

In [None]:
#Load Dataset
X_train, y_train = util_mnist_reader.load_mnist('data/fashion', kind='train')
X_test, y_test = util_mnist_reader.load_mnist('data/fashion', kind='t10k')
#print(X_train.shape, X_test.shape, y_test.shape)
labelNames = ["top", "trouser", "pullover", "dress", "coat", "sandal", "shirt", "sneaker", "bag", "ankle boot"]

#Normalize Input
X_train = X_train.astype('float32') / 255
X_test = X_test.astype('float32') / 255

#KMeans
kmeans = KMeans(n_clusters=10, random_state=42).fit(X_train)
y_preds = kmeans.predict(X_test)
#print(y_preds.shape)

#Print confusion matrix
c_m=confusion_matrix(y_test,y_preds)
print(c_m)

#Display Accuracy
accuracy = metrics.normalized_mutual_info_score(y_test, y_preds, average_method='geometric')
print('Accuracy: ', accuracy)
print(classification_report(y_test, y_preds, target_names=labelNames))

# PART 2: K-MEANS CLUSTERING LAYER USING AUTO-ENCODER NETWORK

In [None]:
#AutoEncoder
X_train = np.reshape(X_train, (len(X_train), 28, 28, 1)) 
X_test = np.reshape(X_test, (len(X_test), 28, 28, 1))  

def autoencoder(input_img):
    # Encoder
    x = Conv2D(64, (3, 3), activation='relu', padding='same')(input_img)
    x - BatchNormalization()(x)
    x = MaxPooling2D((2, 2), padding='same')(x)
    x = Conv2D(32, (3, 3), activation='relu', padding='same')(x)
    x - BatchNormalization()(x)
    x = MaxPooling2D((2, 2), padding='same')(x)
    x = Conv2D(32, (3, 3), activation='relu', padding='same')(x)
    x = BatchNormalization()(x)
    encoded = MaxPooling2D((2, 2), padding='same')(x)

    #Decoder
    x = Conv2D(32, (3, 3), activation='relu', padding='same')(encoded)
    x = BatchNormalization()(x)
    x = UpSampling2D((2, 2))(x)
    x = Conv2D(32, (3, 3), activation='relu', padding='same')(x)
    x = BatchNormalization()(x)
    x = UpSampling2D((2, 2))(x)
    x = Conv2D(64, (3, 3), activation='relu')(x)
    x = BatchNormalization()(x)
    x = UpSampling2D((2, 2))(x)
    decoded = Conv2D(1, (3, 3), activation='sigmoid', padding='same')(x)

    return Model(inputs=input_img, outputs=decoded, name='AE'), Model(inputs=input_img, outputs=encoded, name='encoder')


#Train AutoEncoder
input_img = Input(shape = (28, 28, 1))
autoencoder, encoder = autoencoder(input_img)

autoencoder.compile(optimizer='RMSprop', loss='binary_crossentropy')
autoencoder_train = autoencoder.fit(X_train, X_train,epochs=100,validation_data=(X_test, X_test))

autoencoder.save_weights('autoencoder.h5')

def plotGraph(history):
    plt.figure(1)
    plt.plot(history.history['loss'])
    plt.plot(history.history['val_loss'])
    plt.title('AutoEncoder loss')
    plt.ylabel('Loss')
    plt.xlabel('Epoch')
    plt.legend(['Train', 'Cross-Validation'], loc='upper left')
    plt.show()
plotGraph(autoencoder_train)

In [None]:
autoencoder1 = encoder
#Copy weights
for l1, l2 in zip(autoencoder1.layers[0:8], autoencoder.layers[0:8]):
    l1.set_weights(l2.get_weights())
autoencoder1.get_weights()[0][1]

#Train model using the output from Encoded layer
#print(X_train.shape, X_test.shape)
encoded_imgs2 = autoencoder1.predict(X_train.reshape(len(X_train), 28, 28, 1))
#print(encoded_imgs2.shape)
encoded_imgs2=encoded_imgs2.reshape(-1,512)

encoded_imgs3 = autoencoder1.predict(X_test.reshape(len(X_test), 28, 28, 1))
encoded_imgs3=encoded_imgs3.reshape(-1,512)

#Perform k-Means after reducing dimensions using Autoencoder
kmeans = KMeans(n_clusters=10, init='random',max_iter=500,random_state=42).fit(encoded_imgs2)
#a=kmeans.labels_
y_pred=kmeans.predict(encoded_imgs3)

#Display confusion matrix and Accuracy
c_m2=confusion_matrix(y_test,y_pred)
print(c_m2)
acc1 = metrics.normalized_mutual_info_score(y_test, y_pred,average_method='geometric')
print('Accuracy: ', acc1)

# PART 3: GMM USING AUTO-ENCODER NETWORK

In [None]:
#Perform GMM after reducing dimensions using Autoencoder
gmm = GaussianMixture(n_components=10, max_iter=500 ,random_state=42).fit(encoded_imgs2)
y_pred1 = gmm.predict(encoded_imgs3)

#Display confusion matrix and Accuracy
c_m3 = confusion_matrix(y_test, y_pred1)
print(c_m3)
acc2 = metrics.normalized_mutual_info_score(y_test, y_pred1 ,average_method='geometric') 
print('Accuracy: ', acc2)