In [None]:
## Load libraries
import pandas as pd
import numpy as np
import sys
import matplotlib.pyplot as plt
import matplotlib.cm as cm
from sklearn.decomposition import PCA
from tensorflow.keras.datasets import mnist
plt.style.use('dark_background')
%matplotlib inline

In [None]:
np.set_printoptions(precision=2)

In [None]:
import tensorflow as tf

In [None]:
tf.__version__

---

Load MNIST Data

---

In [None]:
## Load MNIST data
(X_train, _), (X_test, _) = mnist.load_data()
X_train =X_train.reshape(X_train.shape[0], X_train.shape[1]*X_train.shape[2])
X_test = X_test.reshape(X_test.shape[0], X_test.shape[1]*X_test.shape[2])

num_features = X_train.shape[1]
num_samples = X_train.shape[0]

# Normalize the samples (images)
xmax = np.amax(X_train)
xmin = np.amin(X_train)
X_train = (X_train - xmin) / (xmax - xmin) # all train features turn into a number between 0 and 1
X_test = (X_test - xmin)/(xmax - xmin)

print('MNIST set')
print('---------------------')
print('Number of training samples = %d'%(num_samples))
print('Number of features = %d'%(num_features))

In [None]:
## Parameters for the autoencoder
batch_size = 256
max_epochs = 50
learning_rate = 1e-03
latent_dim = 128
hidden_dim = 256
original_dim = X_train.shape[1]

In [None]:
## Convert numpy to tf.data.datasets
training_dataset = tf.data.Dataset.from_tensor_slices(X_train).batch(batch_size)

In [None]:
## Encoder
class Encoder(tf.keras.layers.Layer):
  # Define input independent model information
  def __init__(self, hidden_dim, latent_dim):
    super(Encoder, self).__init__()
    self.encoder_layer1 = tf.keras.layers.Dense(units = hidden_dim, activation = tf.nn.relu)
    self.encoder_layer2 = tf.keras.layers.Dense(units = latent_dim, activation = tf.nn.relu)

  ## Method for forward propagation
  def call(self, input_features):
    a = self.encoder_layer1(input_features)
    a = self.encoder_layer2(a)
    return a

In [None]:
## Decoder
class Decoder(tf.keras.layers.Layer):
  def __init__(self, latent_dim, hidden_dim, original_dim):
    super(Decoder, self).__init__()
    self.decoder_layer1 = tf.keras.layers.Dense(units = hidden_dim, activation = tf.nn.relu)
    self.decoder_layer2 = tf.keras.layers.Dense(units = original_dim, activation = tf.nn.relu)

  def call(self, encoded_features):
    a = self.decoder_layer1(encoded_features)
    a = self.decoder_layer2(a)
    return a

In [None]:
## Autoencoder
class Autoencoder(tf.keras.Model):
  def __init__(self, latent_dim, hidden_dim, original_dim):
    super(Autoencoder, self).__init__()
    self.loss = []
    self.encoder = Encoder(hidden_dim = hidden_dim, latent_dim = latent_dim)
    self.decoder = Decoder(latent_dim = latent_dim, hidden_dim = hidden_dim, original_dim = original_dim)

  def call(self, input_features):
    encoded_features = self.encoder(input_features)
    reconstructed_features = self.decoder(encoded_features)
    return reconstructed_features

In [None]:
## Build model
autoencoder = Autoencoder(latent_dim = latent_dim,
                          hidden_dim = hidden_dim,
                          original_dim = original_dim)

In [None]:
## Optimizer
opt = tf.keras.optimizers.Adam(learning_rate = learning_rate)

In [None]:
## Custom training - loss
def loss(true, pred):
  return tf.reduce_mean(tf.square(tf.subtract(true, pred)))

In [None]:
## Custom training - compute gradient of loss and update weights
@tf.function
def train(model, loss, opt, original_features):
  with tf.GradientTape() as g:
    pred = tf.cast(model(original_features), tf.float64)
    loss_batch = loss(original_features, pred)
  gradients = g.gradient(loss_batch, model.trainable_variables)
  opt.apply_gradients(zip(gradients, model.trainable_variables))
  return loss_batch

In [None]:
## Train network
# Varible to store training loss per epoch
loss_train_epoch = tf.keras.metrics.Mean()
loss_train_epoch_plot = np.empty(max_epochs)

# Iterate over epochs
for epoch in range(max_epochs):
  for step, train_batch_features in enumerate(training_dataset):
    loss_batch = train(autoencoder, loss, opt, train_batch_features)
    # Append training loss
    loss_train_epoch(loss_batch)
  loss_train_epoch_plot[epoch] = loss_train_epoch.result().numpy()
  print(f'Epoch {epoch+1}, loss = {loss_train_epoch_plot[epoch]}')

In [None]:
# Plot train loss as a function of epoch:
fig, ax = plt.subplots(1, 1, figsize = (4, 4))
fig.tight_layout(pad = 4.0)
ax.plot(loss_train_epoch_plot, 'b')
ax.set_xlabel('Epoch', fontsize = 12)
ax.set_ylabel('Loss value', fontsize = 12)
ax.set_xticks(np.arange(0, max_epochs+1, 5))
ax.set_title('Loss vs. Epoch', fontsize = 14)

In [None]:
# Plot random input images and their reconstructed versions
img = {j:index for j,
       index in zip(range(5),
                    np.random.choice(X_test.shape[0], 5))}
fig, ax = plt.subplots(2, 5, figsize = (8, 2))
for j, ind in img.items():
  # Original image
  ax[0, j].imshow(X_test[ind].reshape(28, 28), cmap = 'gray')
  # Reconstructed image
  ax[1, j].imshow(autoencoder(X_test[ind].reshape(1, -1)).numpy().reshape(28, 28), cmap = 'gray')

In [None]:
## Dimension reduction and reconstruction using PCA
# Create and fit PCA object on the training data
k = 200 # we will look the top 10 eigenvectors
pca = PCA(n_components = 20)
# Fit PCA using training data
pca.fit(X_train)
# Reconstruct the random test images chosen in the previous cell
X_test_reconstructed = pca.inverse_transform(pca.transform(X_test[list(img.values())]))

fig, ax = plt.subplots(2, 5, figsize = (8, 2))
for j in range(X_test_reconstructed.shape[0]):
  # Original image
  ax[0, j].imshow(X_test[img[j]].reshape(28, 28), cmap = 'gray')
  # Reconstructed image
  ax[1, j].imshow(X_test_reconstructed[j].reshape(28, 28), cmap = 'gray')