Understanding auto encoder on mnist digit dataset

In [None]:
from keras.layers import Input, Dense
from keras.models import Model
from keras.datasets import mnist
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
#load our mnist dataset
(XTrain, YTrain), (XTest, YTest) = mnist.load_data()

print('XTrain class = ', type(XTrain))
print('YTrain class = ', type(YTrain))

#shape of our dataset
print('XTrain shape = ', XTrain.shape)
print('XTrain shape = ', XTest.shape)
print('YTrain shape = ', YTrain.shape)
print('YTest shape = ', YTest.shape)

#number of distinct values of our mnist target
print('YTrain values = ', np.unique(YTrain))
print('YTest values = ', np.unique(YTest))

#distribution of classes in our dataset
unique, counts = np.unique(YTrain, return_counts=True)
print('YTrain distribution = ', dict(zip(unique, counts)))
unique, counts = np.unique(YTest, return_counts=True)
print('YTest distribution = ', dict(zip(unique, counts)))

In [None]:
#we plot an histogram distribution of our test and train data
fig, axs = plt.subplots(1, 2, figsize=(15, 5))
axs[0].hist(YTrain, ec = 'black')
axs[0].set_title('YTrain data')
axs[0].set_xlabel('Classes')
axs[0].set_ylabel('Number of occurences')
axs[1].hist(YTest, ec = 'black')
axs[1].set_title('YTest data')
axs[1].set_xlabel('Classes')
axs[1].set_ylabel('Number of occurences')
#we want to show all ticks...
axs[0].set_xticks(np.arange(10))
axs[1].set_xticks(np.arange(10))
plt.show()

In [None]:
from re import X
#data normalization
XTrain = XTrain.astype('float32')/255
XTest = XTest.astype('float32')/25

#data reshaping
XTrain = XTrain.reshape((len(XTrain), np.prod(XTrain.shape[1:])))
XTest = XTest.reshape((len(XTest), np.prod(XTest.shape[1:])))

print(XTrain.shape)
print(XTest.shape)

In [None]:
InputModel = Input(shape=(784,))
EncodedLayer = Dense(32, activation='relu')(InputModel)
DecodedLayer = Dense(784, activation='sigmoid')(EncodedLayer)
AutoencoderModel = Model(InputModel, DecodedLayer)
#we can sumarize our model
AutoencoderModel.summary()

In [None]:
#lets train the model using adadelta optimizer
#autoencodermodel.compile(optimizer = 'adadelta', loss = 'binary_crossentropy) or
AutoencoderModel.compile(optimizer = 'adam', loss = 'binary_crossentropy')
history = AutoencoderModel.fit(XTrain, XTrain, epochs = 100, batch_size = 256, shuffle = True, validation_data = (XTest, XTest))
#make prediction to decode the digits
DecodedDigits = AutoencoderModel.predict(XTest)

In [None]:
def plotmodelhistory(history):
    plt.plot(history.history['loss'])
    plt.plot(history.history['val_loss'])
    plt.title('Autoencoder Model loss')
    plt.ylabel('Loss')
    plt.xlabel('Epoch')
    plt.legend(['Train', 'Test'], loc = 'upper left')
    plt.show()

#list all data in history
print(history.history.keys())
#visualization of the loss minimization during the training process
plotmodelhistory(history)

In [None]:
n = 5
plt.figure(figsize = (20, 4))
for i in range(n):
    ax = plt.subplot(2, n, i+1)
    #input image
    plt.imshow(XTest[i+10].reshape(28, 28))
    plt.gray
    ax.get_xaxis().set_visible(False)
    ax.get_yaxis().set_visible(False)
    ax = plt.subplot(2, n, i+1+n)
    #image decoded by our auto encoder
    plt.imshow(DecodedDigits[i+10].reshape(28, 28))
    plt.gray()
    ax.get_xaxis().set_visible(False)
    ax.get_yaxis().set_visible(False)
plt.show()