# Autoencoder in Keras

based on https://blog.keras.io/building-autoencoders-in-keras.html

In [None]:
import numpy as np
from keras import layers
from keras import models
from keras import optimizers

import matplotlib.pyplot as plt

from keras.datasets import mnist

## load MNIST data

In [None]:
(x_train, _), (x_test, _) = mnist.load_data()

In [None]:
# MNIST images are 8-bit 28x28 grey images
print('X train shape', x_train.shape, '; X test shape', x_test.shape)
type(x_train[0,0,0])

In [None]:
# convert numpy arrays to float and normalise images to 1
x_train = x_train.astype('float32') / 255.
x_test = x_test.astype('float32') / 255.

In [None]:
# reshape images to vectors
x_train = x_train.reshape((len(x_train), 28*28))
x_test = x_test.reshape((len(x_test), 28*28))
print('X train shape', x_train.shape, '; X test shape', x_test.shape)

## create model

In [None]:
# model instance using Sequential API
autoencoder = models.Sequential()

# add encoding dense layer which encodes representation of the input
autoencoder.add(layers.Dense(32, activation='relu', input_shape=(784,)))

# add decoding dense layer which reconstructs the input
autoencoder.add(layers.Dense(784, activation='sigmoid'))

In [None]:
autoencoder.summary()

Q: Why is the number of parameters in encode and decode layers different?

A: due to bias parameters

In [None]:
# size of layer 1
print('dense_1', 32*784+32)

# size of layer 2
print('dense_2', 32*784+784)

In [None]:
#autoencoder.compile(optimizer='sgd', loss='mean_squared_error')
autoencoder.compile(optimizer=optimizers.SGD(lr=0.1, momentum=0.9), loss='mean_squared_error')
#autoencoder.compile(optimizer='adadelta', loss='mean_squared_error')
# autoencoder.compile(optimizer='adadelta', loss='binary_crossentropy')

## train the model

In [None]:
hist = autoencoder.fit(
    x_train,
    x_train,
    epochs=50,
    batch_size=128,
    shuffle=True,
    validation_data=(x_test, x_test))

## predict

In [None]:
# encode and decode some digits
# note that we take them from the *test* set
decoded_imgs = autoencoder.predict(x_test)

In [None]:
n = 10  # how many digits we will display
plt.figure(figsize=(20, 4))
for i in range(n):
    # display original
    ax = plt.subplot(2, n, i + 1)
    plt.imshow(x_test[i].reshape(28, 28))
    plt.gray()
    ax.get_xaxis().set_visible(False)
    ax.get_yaxis().set_visible(False)

    # display reconstruction
    ax = plt.subplot(2, n, i + 1 + n)
    plt.imshow(decoded_imgs[i].reshape(28, 28))
    plt.gray()
    ax.get_xaxis().set_visible(False)
    ax.get_yaxis().set_visible(False)
plt.show()

# plot training history

In [None]:
history_dict = hist.history

In [None]:
history_dict.keys()

In [None]:
train_loss_values = history_dict['loss']
valid_loss_values = history_dict['val_loss']

In [None]:
epochs = list(range(1, len(train_loss_values)+1))

In [None]:
#plt.clf()
plt.figure(figsize=(12, 10))
plt.plot(epochs, train_loss_values, 'r', label='Training loss')
plt.plot(epochs, valid_loss_values, 'b', label='Validation loss')
plt.title('Training and Validation Losses')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()

plt.show()

## task: create and compare training loss histories for SGD with different learning rates (LR) 0.1, 0.01, 0.001