<a href="https://colab.research.google.com/github/fabriziobasso/Colab_backup/blob/main/Copy_of_MINST_Autoencoders_v3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **AUTOENCODERS for DIMENSION REDUCTION: A STUDY**

There are so many practical applications of autoencoders. Dimensionality reduction is one of them.

There are so many techniques for dimensionality reduction. Autoencoders (AEs) and Principal Component Analysis (PCA) are popular among them.

PCA is not suitable for dimensionality reduction in non-linear data. In contrast, autoencoders work really well with non-linear data in dimensionality reduction.

In [None]:
%%capture
# adabelief
!pip install adabelief-tf --no-cache-di
!pip install tensorflow-addons

In [None]:
%%capture
import os
# Connect to Colab:
from google.colab import drive
drive.mount('/content/drive')

old_wd = os.getcwd()
os.chdir("/content/drive/MyDrive/Exercises/Autoencoders")

In [None]:
# Acquire MNIST data
from tensorflow.keras.datasets import mnist
import matplotlib.pyplot as plt
import numpy as np
from sklearn.decomposition import PCA

from sklearn.model_selection import train_test_split, StratifiedKFold, cross_validate, KFold

from tensorflow.keras import Model
from tensorflow.keras.layers import Dense
from tensorflow import keras
import tensorflow as tf
from keras.layers import Reshape
from keras.layers import LeakyReLU
from tensorflow.keras.metrics import Metric
from keras.layers import BatchNormalization
from keras.layers import Concatenate
from keras.layers import MultiHeadAttention, Attention
from tensorflow.keras.utils import plot_model
from tensorflow.keras.callbacks import ReduceLROnPlateau, LearningRateScheduler, EarlyStopping
from tensorflow.keras.applications.resnet50 import preprocess_input, decode_predictions
from tensorflow.keras.callbacks import ReduceLROnPlateau, LearningRateScheduler, EarlyStopping
from tensorflow.keras.layers import Dense, Input, InputLayer, Add, Concatenate, Dropout, BatchNormalization, Conv1D, Reshape, Flatten, AveragePooling1D, MaxPool1D
import tensorflow.keras.backend as K
from tensorflow_addons.activations import sparsemax
from tensorflow_addons.metrics import FBetaScore, F1Score
from adabelief_tf import AdaBeliefOptimizer
from keras import layers
import tensorflow.keras.backend as K

**Objectives**

At the end of this article, you’ll be able to

* Use Autoencoders to reduce the dimensionality of the input data
* Use PCA to reduce the dimensionality of the input data
* Compare the performance of PCA and Autoencoders in dimensionality reduction
* See how Autoencoders outperform PCA in dimensionality reduction
* Learn key differences between PCA and Autoencoders
* Learn when to use which method for dimensionality reduction

In [None]:
(train_images, train_labels), (test_images, test_labels) = mnist.load_data()

In [None]:
train_images.shape

In [None]:
# Reshape data
train_images = np.reshape(train_images, (-1, 784))
test_images = np.reshape(test_images, (-1, 784))

# Normalize data
train_images = train_images.astype('float32') / 255
test_images = test_images.astype('float32') / 255

### TRAIN VALIDATION SPLIT:

In [None]:
X_t, X_v, y_t, y_v = train_test_split(train_images, train_labels, stratify=train_labels, test_size=0.16, random_state=1978)

print("Test-Validation Split Sizes: {}, {}, {}. and {}".format(X_t.shape, y_t.shape, X_v.shape, y_v.shape))

## 1.0 Traditional Approaches

### Test PCA:

In [None]:
pca = PCA(n_components=2)
pca.fit(train_images)
compressed_images = pca.transform(test_images)
recovered_images = pca.inverse_transform(compressed_images)

In [None]:
# Visualize compressed MNIST digits after PCA
n = 5
plt.figure(figsize=(9, 2))
for i in range(n):
  ax = plt.subplot(1, n, i+1)
  plt.imshow(recovered_images[i].reshape(28, 28), cmap="gray")
  plt.title(test_labels[i])
  ax.axis('off')

plt.show()

In [None]:
pca = PCA(n_components=2, whiten=True)
pca.fit(train_images)
compressed_images = pca.transform(test_images)
recovered_images = pca.inverse_transform(compressed_images)

In [None]:
# Visualize compressed MNIST digits after PCA
n = 5
plt.figure(figsize=(9, 2))
for i in range(n):
  ax = plt.subplot(1, n, i+1)
  plt.imshow(recovered_images[i].reshape(28, 28), cmap="gray")
  plt.title(test_labels[i])
  ax.axis('off')

plt.show()

In [None]:
# Visualize compressed MNIST digits after PCA
n = 5
plt.figure(figsize=(9, 2))
for i in range(n):
  ax = plt.subplot(1, n, i+1)
  plt.imshow(test_images[i].reshape(28, 28), cmap="gray")
  plt.title(test_labels[i])
  ax.axis('off')

plt.show()

In [None]:
import seaborn as sns

plt.figure(figsize=(10, 7))

sns.scatterplot(x=compressed_images[:,0],
                y=compressed_images[:,1],
                hue=test_labels, palette='tab10')

plt.xlabel("First principal component")
plt.ylabel("Second principal component")

plt.legend(bbox_to_anchor=(1.01, 1),
           borderaxespad=0);

### LinearDiscriminantAnalysis

In [None]:
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.manifold import Isomap

In [None]:
#%%time
#embedding = Isomap(n_neighbors=30, n_components=2)
#X_transformed = embedding.fit(train_images)

#### **DEFINE CALLBACKS**

In [None]:
lr = tf.keras.callbacks.ReduceLROnPlateau(monitor="val_loss",
                        factor=0.75,
                        patience=10,
                        verbose=1,
                        mode="min")

early_stop = tf.keras.callbacks.EarlyStopping(monitor="val_loss",
                            patience=25,
                            verbose=1,
                            mode="min",
                            restore_best_weights=True)


checkpoint_filepath = '/checkpoint/'

Checkpoint = tf.keras.callbacks.keras.callbacks.ModelCheckpoint(filepath=checkpoint_filepath,
                                                 save_weights_only=True,
                                                 monitor="val_loss",
                                                 mode='min',
                                                 restore_best_weights=True)

## **2.0 Perform dimensionality reduction with Autoencoder**
Now, we’ll build a deep autoencoder to apply dimensionality reduction to the same MNIST data. We also keep the dimensionality of the latent vector two-dimensional so that it is easy to compare the output with the previous output returned by PCA.

- Step 1: Acquire and prepare the MNIST dataset as previously.
- Step 2: Define the autoencoder architecture

In [None]:
def model_autoencoder(act_1='sigmoid',act_lat='tanh'):
  input_dim = 28*28
  latent_vec_dim = 2

  input_layer = Input(shape=(input_dim,))

  # Define the autoencoder architecture
  # First build the encoder
  enc_layer_1 = Dense(500, activation=act_1)(input_layer)
  enc_layer_2 = Dense(300, activation=act_1)(enc_layer_1)
  enc_layer_3 = Dense(100, activation=act_1)(enc_layer_2)
  enc_layer_4 = Dense(latent_vec_dim, activation=act_lat)(enc_layer_3)
  encoder = enc_layer_4

  # Then build the decoder
  dec_layer_1 = Dense(100, activation=act_1)(encoder)
  dec_layer_2 = Dense(300, activation=act_1)(dec_layer_1)
  dec_layer_3 = Dense(500, activation=act_1)(dec_layer_2)
  dec_layer_4 = Dense(input_dim, activation=act_1)(dec_layer_3)
  decoder = dec_layer_4

  # Connect both encoder and decoder
  autoencoder = Model(input_layer, decoder, name="Deep_Autoencoder")

  # Latent representation (Optional)
  latent_model = Model(input_layer, encoder)

  return autoencoder, latent_model

In [None]:
input_dim = 28*28
latent_vec_dim = 2

input_layer = Input(shape=(input_dim,))

# Define the autoencoder architecture
# First build the encoder
enc_layer_1 = Dense(500, activation='sigmoid')(input_layer)
enc_layer_2 = Dense(300, activation='sigmoid')(enc_layer_1)
enc_layer_3 = Dense(100, activation='sigmoid')(enc_layer_2)
enc_layer_4 = Dense(latent_vec_dim, activation='tanh')(enc_layer_3)
encoder = enc_layer_4

# Then build the decoder
dec_layer_1 = Dense(100, activation='sigmoid')(encoder)
dec_layer_2 = Dense(300, activation='sigmoid')(dec_layer_1)
dec_layer_3 = Dense(500, activation='sigmoid')(dec_layer_2)
dec_layer_4 = Dense(input_dim, activation='sigmoid')(dec_layer_3)
decoder = dec_layer_4

# Connect both encoder and decoder
autoencoder = Model(input_layer, decoder, name="Deep_Autoencoder")

# Latent representation (Optional)
latent_model = Model(input_layer, encoder)

In [None]:
X_t.shape, y_t.shape, X_v.shape, y_v.shape

### 2.1 Optimizing Alogrithm: ADAM

In [None]:
autoencoder_adam_lr, latent_model_adam_lr = model_autoencoder(act_1='sigmoid',act_lat='tanh')

# Get summary
autoencoder_adam_lr.summary()

In [None]:
# Compile the autoencoder model
optimizer = tf.keras.optimizers.Adam()
loss = 'mse'
autoencoder_adam_lr.compile(loss=loss, optimizer=optimizer)

# Train the autoencoder with MNIST data
history_lr = autoencoder_adam_lr.fit(X_t, X_t, epochs=250, batch_size=128, callbacks = [early_stop, Checkpoint, lr],
                          shuffle=True, validation_data=(X_v, X_v))

autoencoder_adam_lr.save(f'/content/drive/MyDrive/Exercises/Autoencoders/sig_tanh/adam/autoencoder')
latent_model_adam_lr.save(f'/content/drive/MyDrive/Exercises/Autoencoders/sig_tanh/adam/encoder')

In [None]:
# Plot training and validation loss scores
# against the number of epochs.
plt.figure(figsize=(10, 7))
plt.plot(history_lr.history['loss'], label='Train')
plt.plot(history_lr.history['val_loss'], label='Validation')
plt.ylabel('MSE')
plt.xlabel('Epoch')
plt.title('Autoencoder Reconstruction Loss', pad=13)
plt.legend(loc='upper right')

In [None]:
compressed_images = autoencoder_adam_lr.predict(test_images)
n = 5
plt.figure(figsize=(9, 2))
for i in range(n):
  ax = plt.subplot(1, n, i+1)
  plt.imshow(compressed_images[i].reshape(28, 28), cmap="gray")
  ax.axis('off')

plt.show()

In [None]:
latent_representation = latent_model_adam_lr.predict(test_images)

import seaborn as sns

plt.figure(figsize=(10, 7))

sns.scatterplot(x=latent_representation[:,0],
                y=latent_representation[:,1],
                hue=test_labels, palette='tab10')

plt.xlabel("Encoder first dimension")
plt.ylabel("Encoder second dimension")

plt.grid(linestyle='--', alpha=0.5)

plt.legend(bbox_to_anchor=(1.01, 1),
           borderaxespad=0);

#### Adam with no LR

In [None]:
autoencoder_adam, latent_model_adam = model_autoencoder(act_1='sigmoid',act_lat='tanh')

# Get summary
autoencoder_adam.summary()

In [None]:
# Compile the autoencoder model
optimizer = tf.keras.optimizers.Adam()
loss = 'mse'
autoencoder_adam.compile(loss=loss, optimizer=optimizer)

# Train the autoencoder with MNIST data
history_nolr = autoencoder_adam.fit(X_t, X_t, epochs=100, batch_size=128, callbacks = [early_stop, Checkpoint],
                          shuffle=True, validation_data=(X_v, X_v))

autoencoder_adam.save(f'/content/drive/MyDrive/Exercises/Autoencoders/sig_tanh/adam_nolr/autoencoder')
latent_model_adam.save(f'/content/drive/MyDrive/Exercises/Autoencoders/sig_tanh/adam_nolr/encoder')

In [None]:
# Plot training and validation loss scores
# against the number of epochs.
plt.plot(history_nolr.history['loss'], label='Train')
plt.plot(history_nolr.history['val_loss'], label='Validation')
plt.ylabel('MSE')
plt.xlabel('Epoch')
plt.title('Autoencoder Reconstruction Loss', pad=13)
plt.legend(loc='upper right')

In [None]:
compressed_images = autoencoder_adam.predict(test_images)
n = 5
plt.figure(figsize=(9, 2))
for i in range(n):
  ax = plt.subplot(1, n, i+1)
  plt.imshow(compressed_images[i].reshape(28, 28), cmap="gray")
  ax.axis('off')

plt.show()

In [None]:
latent_representation = latent_model_adam.predict(test_images)

import seaborn as sns

plt.figure(figsize=(10, 7))

sns.scatterplot(x=latent_representation[:,0],
                y=latent_representation[:,1],
                hue=test_labels, palette='tab10')

plt.xlabel("Encoder first dimension")
plt.ylabel("Encoder second dimension")

plt.grid(linestyle='--', alpha=0.5)

plt.legend(bbox_to_anchor=(1.01, 1),
           borderaxespad=0);

In [None]:
# Plot training and validation loss scores
# against the number of epochs.
plt.figure(figsize=(10, 7))
plt.plot(history_lr.history['val_loss'], label='Validation Adam_lr')
plt.plot(history_nolr.history['val_loss'], label='Validation Adam_nolr')

plt.ylabel('MSE')
plt.xlabel('Epoch')
plt.title('Autoencoder Reconstruction Loss - ADAM lr vs ADAM nolr', pad=13)
plt.legend(loc='upper right')

### 2.2 Optimizing Algo: NADAM

In [None]:
autoencoder_nadam, latent_model_nadam = model_autoencoder(act_1='sigmoid',act_lat='tanh')

In [None]:
# Compile the autoencoder model
optimizer = tf.keras.optimizers.Nadam()
loss = 'mse'
autoencoder_nadam.compile(loss=loss, optimizer=optimizer)

# Train the autoencoder with MNIST data
history_nadam = autoencoder_nadam.fit(X_t, X_t, epochs=100, batch_size=128, callbacks = [early_stop, Checkpoint, lr],
                                      shuffle=True, validation_data=(X_v, X_v))

autoencoder_nadam.save(f'/content/drive/MyDrive/Exercises/Autoencoders/sig_tanh/nadam/autoencoder')
latent_model_nadam.save(f'/content/drive/MyDrive/Exercises/Autoencoders/sig_tanh/nadam/encoder')

In [None]:
# Plot training and validation loss scores
# against the number of epochs.
plt.plot(history_nadam.history['loss'], label='Train')
plt.plot(history_nadam.history['val_loss'], label='Validation')
plt.ylabel('Binary Cross Entropy Loss')
plt.xlabel('Epoch')
plt.title('Autoencoder Reconstruction Loss - Nadam', pad=13)
plt.legend(loc='upper right')

In [None]:
# Plot training and validation loss scores
# against the number of epochs.
plt.figure(figsize=(10, 7))
plt.plot(history_lr.history['val_loss'], label='Validation Adam lr')
plt.plot(history_nolr.history['val_loss'], label='Validation Adam nolr')
plt.plot(history_nadam.history['val_loss'], label='Validation Nadam')
plt.ylabel('Binary Cross Entropy Loss')
plt.xlabel('Epoch')
plt.title('Autoencoder Reconstruction Loss - Nadam vs ADAM', pad=13)
plt.legend(loc='upper right')

In [None]:
compressed_images = autoencoder_nadam.predict(test_images)
n = 5
plt.figure(figsize=(9, 2))
for i in range(n):
  ax = plt.subplot(1, n, i+1)
  plt.imshow(compressed_images[i].reshape(28, 28), cmap="gray")

  plt.title(test_labels[i])
  ax.axis('off')

plt.show()

In [None]:
n = 5
plt.figure(figsize=(9, 2))
for i in range(n):
  ax = plt.subplot(1, n, i+1)
  plt.imshow(test_images[i].reshape(28, 28), cmap="gray")
  plt.title(test_labels[i])
  ax.axis('off')

plt.show()

In [None]:
latent_representation = latent_model_nadam.predict(test_images)

import seaborn as sns

plt.figure(figsize=(10, 7))

sns.scatterplot(x=latent_representation[:,0],
                y=latent_representation[:,1],
                hue=test_labels, palette='tab10')

plt.xlabel("Encoder first dimension")
plt.ylabel("Encoder second dimension")

plt.grid(linestyle='--', alpha=0.5)

plt.legend(bbox_to_anchor=(1.01, 1),
           borderaxespad=0);

### 2.2 Optimizing Algo: ADAMBelief

In [None]:
autoencoder_adam_b, latent_model_adam_b = model_autoencoder(act_1='sigmoid',act_lat='tanh')

# Get summary
#autoencoder_adam_b.summary()

In [None]:
# Compile the autoencoder model
optimizer = AdaBeliefOptimizer(learning_rate=0.025,
                               weight_decay = 1e-5,
                               epsilon = 1e-7,
                               print_change_log = False)
loss = 'mse'

autoencoder_adam_b.compile(loss=loss, optimizer=optimizer)

# Train the autoencoder with MNIST data
history_ab = autoencoder_adam_b.fit(X_t, X_t, epochs=140, batch_size=128, callbacks = [early_stop, Checkpoint],
                                   shuffle=True, validation_data=(X_v, X_v))

autoencoder_adam_b.save(f'/content/drive/MyDrive/Exercises/Autoencoders/sig_tanh/adabelief/autoencoder')
latent_model_adam_b.save(f'/content/drive/MyDrive/Exercises/Autoencoders/sig_tanh/adabelief/encoder')

In [None]:
# Plot training and validation loss scores
# against the number of epochs.
plt.plot(history_ab.history['loss'], label='Train')
plt.plot(history_ab.history['val_loss'], label='Validation')
plt.ylabel('Binary Cross Entropy Loss')
plt.xlabel('Epoch')
plt.title('Autoencoder Reconstruction Loss - AdaBelief', pad=13)
plt.legend(loc='upper right')

In [None]:
# Plot training and validation loss scores
# against the number of epochs.
plt.plot(history_lr.history['val_loss'], label='Validation Adam')
plt.plot(history_ab.history['val_loss'], label='Validation Nadam')
plt.ylabel('Binary Cross Entropy Loss')
plt.xlabel('Epoch')
plt.title('Autoencoder Reconstruction Loss - Nadam vs ADAM', pad=13)
plt.legend(loc='upper right')

### 2.2 Optimizing Algo: SGD

In [None]:
autoencoder_sgdnest, latent_model_sgdnest = model_autoencoder(act_1='sigmoid',act_lat='tanh')

# Get summary
#autoencoder_adam_b.summary()

In [None]:
# Compile the autoencoder model
optimizer = keras.optimizers.SGD(learning_rate=0.02)
loss = 'mse'

autoencoder_sgdnest.compile(loss=loss, optimizer=optimizer)

# Train the autoencoder with MNIST data
history_ab = autoencoder_sgdnest.fit(X_t, X_t, epochs=250, batch_size=128, callbacks = [early_stop, Checkpoint, lr],
                                   shuffle=True, validation_data=(X_v, X_v))

In [None]:
# Plot training and validation loss scores
# against the number of epochs.
plt.plot(history_ab.history['loss'], label='Train')
plt.plot(history_ab.history['val_loss'], label='Validation')
plt.ylabel('Binary Cross Entropy Loss')
plt.xlabel('Epoch')
plt.title('Autoencoder Reconstruction Loss - Nadam', pad=13)
plt.legend(loc='upper right')

In [None]:
# Plot training and validation loss scores
# against the number of epochs.
plt.plot(history_nolr.history['val_loss'], label='Validation Adam')
plt.plot(history_ab.history['val_loss'], label='Validation SGD')
plt.ylabel('Binary Cross Entropy Loss')
plt.xlabel('Epoch')
plt.title('Autoencoder Reconstruction Loss - SGD vs ADAM', pad=13)
plt.legend(loc='upper right')

In [None]:
compressed_images = autoencoder_sgdnest.predict(test_images)
n = 5
plt.figure(figsize=(9, 2))
for i in range(n):
  ax = plt.subplot(1, n, i+1)
  plt.imshow(compressed_images[i].reshape(28, 28), cmap="gray")
  ax.axis('off')

plt.show()

## 3.0 Variational Autoencoders:

In [None]:
class Sampling(layers.Layer):
    """Uses (z_mean, z_log_var) to sample z, the vector encoding a digit."""

    def call(self, inputs):
        z_mean, z_log_var = inputs
        batch = tf.shape(z_mean)[0]
        dim = tf.shape(z_mean)[1]
        epsilon = tf.keras.backend.random_normal(shape=(batch, dim))
        return z_mean + tf.exp(0.5 * z_log_var) * epsilon

In [None]:
input_dim = 28*28
latent_vec_dim = 2

input_layer = Input(shape=(input_dim,))

# Define the autoencoder architecture
# First build the encoder
enc_layer_1 = Dense(500, activation='sigmoid')(input_layer)
enc_layer_2 = Dense(300, activation='sigmoid')(enc_layer_1)
enc_layer_3 = Dense(100, activation='sigmoid')(enc_layer_2)
enc_layer_4 = Dense(32, activation='sigmoid')(enc_layer_3)
z_mean = layers.Dense(latent_vec_dim, name="z_mean")(enc_layer_4)
z_log_var = layers.Dense(latent_vec_dim, name="z_log_var")(enc_layer_4)
z = Sampling()([z_mean, z_log_var])
encoder = keras.Model(input_layer, [z_mean, z_log_var, z], name="encoder")
encoder.summary()


In [None]:
# Then build the decoder
latent_inputs = keras.Input(shape=(latent_vec_dim,))
dec_layer_0 = Dense(32, activation='sigmoid')(latent_inputs)
dec_layer_1 = Dense(100, activation='sigmoid')(dec_layer_0)
dec_layer_2 = Dense(300, activation='sigmoid')(dec_layer_1)
dec_layer_3 = Dense(500, activation='sigmoid')(dec_layer_2)
dec_layer_4 = Dense(input_dim, activation='sigmoid')(dec_layer_3)

decoder = keras.Model(latent_inputs, dec_layer_4, name="decoder")
decoder.summary()

In [None]:
# instantiate VAE model
outputs = decoder(encoder(input_layer)[2])
vae = keras.Model(input_layer, outputs, name='vae_mlp')

What we've done so far allows us to instantiate 3 models:

* an end-to-end autoencoder mapping inputs to reconstructions
* an encoder mapping inputs to the latent space
* a generator that can take points on the latent space and will output the corresponding reconstructed samples.

We train the model using the end-to-end model, with a custom loss function: the sum of a reconstruction term, and the KL divergence regularization term.

In [None]:
reconstruction_loss = keras.losses.binary_crossentropy(input_layer, outputs)
reconstruction_loss *= input_dim
kl_loss = 1 + z_log_var - K.square(z_mean) - K.exp(z_log_var)
kl_loss = K.sum(kl_loss, axis=-1)
kl_loss *= -0.5
vae_loss = K.mean(reconstruction_loss + kl_loss)
vae.add_loss(vae_loss)
vae.compile(optimizer='adam')

In [None]:
history_vae = vae.fit(X_t, X_t,
                      epochs=100,
                      batch_size=128,
                      callbacks = [early_stop, Checkpoint, lr],
                      shuffle=True,
                      validation_data=(X_v, X_v))

In [None]:
latent_representation = encoder.predict(test_images)

plt.figure(figsize=(10, 7))

sns.scatterplot(x=latent_representation[2][:,0],
                y=latent_representation[2][:,1],
                hue=test_labels, palette='tab10')

plt.xlabel("Encoder first dimension")
plt.ylabel("Encoder second dimension")

plt.grid(linestyle='--', alpha=0.5)

plt.legend(bbox_to_anchor=(1.01, 1),
           borderaxespad=0);

In [None]:
plt.figure(figsize=(10, 7))

sns.scatterplot(x=latent_representation[1][:,0],
                y=latent_representation[1][:,1],
                hue=test_labels, palette='tab10')

plt.xlabel("Encoder first dimension")
plt.ylabel("Encoder second dimension")

plt.grid(linestyle='--', alpha=0.5)

plt.legend(bbox_to_anchor=(1.01, 1),
           borderaxespad=0);

In [None]:
# Plot training and validation loss scores
# against the number of epochs.
plt.figure(figsize=(10, 7))
plt.plot(history_vae.history['loss'], label='Train')
plt.plot(history_vae.history['val_loss'], label='Validation')
plt.ylabel('Customized Loss')
plt.xlabel('Epoch')
plt.title('Autoencoder Reconstruction Loss - Nadam', pad=13)
plt.legend(loc='upper right')

In [None]:
compressed_images = vae.predict(test_images)
n = 5
plt.figure(figsize=(9, 2))
for i in range(n):
  ax = plt.subplot(1, n, i+1)
  plt.imshow(compressed_images[i].reshape(28, 28), cmap="gray")

  plt.title(test_labels[i])
  ax.axis('off')

plt.show()

In [None]:
# Display a 2D manifold of the digits
n = 20  # figure with 15x15 digits
digit_size = 28
figure = np.zeros((digit_size * n, digit_size * n))
# We will sample n points within [-15, 15] standard deviations
grid_x = np.linspace(-3, 3, n)
grid_y = np.linspace(-3, 3, n)

for i, yi in enumerate(grid_x):
    for j, xi in enumerate(grid_y):
        z_sample = np.array([[xi, yi]])
        x_decoded = decoder.predict(z_sample)
        digit = x_decoded[0].reshape(digit_size, digit_size)
        figure[i * digit_size: (i + 1) * digit_size,
               j * digit_size: (j + 1) * digit_size] = digit

plt.figure(figsize=(10, 10))
plt.imshow(figure)
plt.show()

In [None]:
vae.save(f'/content/drive/MyDrive/Exercises/Autoencoders/sig_tanh/vae/autoencoder')
encoder.save(f'/content/drive/MyDrive/Exercises/Autoencoders/sig_tanh/vae/encoder')

## 3.0 Exploring Different Activation Functions:

#### 4.1 Linear for Latent Dimension:

In [None]:
autoencoder_siglin, latent_model_siglin = model_autoencoder(act_1='sigmoid',act_lat='linear')

# Get summary
autoencoder_siglin.summary()

In [None]:
# Compile the autoencoder model
optimizer = tf.keras.optimizers.Adam()
loss = 'mse'
autoencoder_siglin.compile(loss=loss, optimizer=optimizer)

# Train the autoencoder with MNIST data
history_siglin = autoencoder_siglin.fit(X_t, X_t, epochs=100, batch_size=128, callbacks = [early_stop, Checkpoint, lr],
                                    shuffle=True, validation_data=(X_v, X_v))

autoencoder_siglin.save(f'/content/drive/MyDrive/Exercises/Autoencoders/sig_lin/enc/autoencoder')
latent_model_siglin.save(f'/content/drive/MyDrive/Exercises/Autoencoders/sig_lin/enc/encoder')

In [None]:
# Plot training and validation loss scores
# against the number of epochs.
plt.figure(figsize=(10, 7))
plt.plot(history_siglin.history['loss'], label='Train')
plt.plot(history_siglin.history['val_loss'], label='Validation')
plt.ylabel('MSE')
plt.xlabel('Epoch')
plt.title('Autoencoder Reconstruction Loss', pad=13)
plt.legend(loc='upper right')

In [None]:
compressed_images = autoencoder_siglin.predict(test_images)
n = 5
plt.figure(figsize=(9, 2))
for i in range(n):
  ax = plt.subplot(1, n, i+1)
  plt.imshow(compressed_images[i].reshape(28, 28), cmap="gray")
  ax.axis('off')

plt.show()

In [None]:
latent_representation = latent_model_siglin.predict(test_images)

import seaborn as sns

plt.figure(figsize=(10, 7))

sns.scatterplot(x=latent_representation[:,0],
                y=latent_representation[:,1],
                hue=test_labels, palette='tab10')

plt.xlabel("Encoder first dimension")
plt.ylabel("Encoder second dimension")

plt.grid(linestyle='--', alpha=0.5)

plt.legend(bbox_to_anchor=(1.01, 1),
           borderaxespad=0);

#### 4.2 Hidden Layers: Relu - Latent Space: Linear

In [None]:
def model_autoencoder(act_1='relu',act_lat='tanh',kernel="he_normal"):
  input_dim = 28*28
  latent_vec_dim = 2

  input_layer = Input(shape=(input_dim,))

  # Define the autoencoder architecture
  # First build the encoder
  enc_layer_1 = Dense(500, activation=act_1, kernel_initializer=kernel)(input_layer)
  enc_layer_2 = Dense(300, activation=act_1, kernel_initializer=kernel)(enc_layer_1)
  enc_layer_3 = Dense(100, activation=act_1, kernel_initializer=kernel)(enc_layer_2)
  enc_layer_4 = Dense(latent_vec_dim, activation=act_lat)(enc_layer_3)
  encoder = enc_layer_4

  # Then build the decoder
  dec_layer_1 = Dense(100, activation=act_1, kernel_initializer=kernel)(encoder)
  dec_layer_2 = Dense(300, activation=act_1, kernel_initializer=kernel)(dec_layer_1)
  dec_layer_3 = Dense(500, activation=act_1, kernel_initializer=kernel)(dec_layer_2)
  dec_layer_4 = Dense(input_dim, activation='sigmoid')(dec_layer_3)
  decoder = dec_layer_4

  # Connect both encoder and decoder
  autoencoder = Model(input_layer, decoder, name="Deep_Autoencoder")

  # Latent representation (Optional)
  latent_model = Model(input_layer, encoder)

  return autoencoder, latent_model

In [None]:
autoencoder_rellin, latent_model_rellin = model_autoencoder(act_1='relu',act_lat='linear')

# Get summary
autoencoder_rellin.summary()

In [None]:
# Compile the autoencoder model
optimizer = tf.keras.optimizers.Adam()
loss = 'mse'
autoencoder_rellin.compile(loss=loss, optimizer=optimizer)

# Train the autoencoder with MNIST data
history_rellin = autoencoder_rellin.fit(X_t, X_t, epochs=100, batch_size=128, callbacks = [early_stop, Checkpoint, lr],
                                       shuffle=True, validation_data=(X_v, X_v))

autoencoder_rellin.save(f'/content/drive/MyDrive/Exercises/Autoencoders/rel_lin/enc/autoencoder')
latent_model_rellin.save(f'/content/drive/MyDrive/Exercises/Autoencoders/rel_lin/enc/encoder')

In [None]:
# Plot training and validation loss scores
# against the number of epochs.
plt.figure(figsize=(10, 7))
plt.plot(history_rellin.history['loss'], label='Train')
plt.plot(history_rellin.history['val_loss'], label='Validation')
plt.ylabel('MSE')
plt.xlabel('Epoch')
plt.title('Autoencoder Reconstruction Loss', pad=13)
plt.legend(loc='upper right')

In [None]:
compressed_images = autoencoder_rellin.predict(test_images)
n = 5
plt.figure(figsize=(9, 2))
for i in range(n):
  ax = plt.subplot(1, n, i+1)
  plt.imshow(compressed_images[i].reshape(28, 28), cmap="gray")
  ax.axis('off')

plt.show()

In [None]:
latent_representation = latent_model_rellin.predict(test_images)

import seaborn as sns

plt.figure(figsize=(10, 7))

sns.scatterplot(x=latent_representation[:,0],
                y=latent_representation[:,1],
                hue=test_labels, palette='tab10')

plt.xlabel("Encoder first dimension")
plt.ylabel("Encoder second dimension")

plt.grid(linestyle='--', alpha=0.5)

plt.legend(bbox_to_anchor=(1.01, 1),
           borderaxespad=0);

#### 4.2 Hidden Layers: PRelu - Latent Space: Linear

In [None]:
def model_autoencoder(kernel="he_normal"):
  input_dim = 28*28
  latent_vec_dim = 2

  input_layer = Input(shape=(input_dim,))

  # Define the autoencoder architecture
  # First build the encoder
  enc_layer_1 = Dense(500, kernel_initializer=kernel)(input_layer)
  enc_layer_1 = tf.keras.layers.PReLU()(enc_layer_1)

  enc_layer_2 = Dense(300, kernel_initializer=kernel)(enc_layer_1)
  enc_layer_2 = tf.keras.layers.PReLU()(enc_layer_2)

  enc_layer_3 = Dense(100, kernel_initializer=kernel)(enc_layer_2)
  enc_layer_3 = tf.keras.layers.PReLU()(enc_layer_3)

  enc_layer_4 = Dense(latent_vec_dim, activation='linear')(enc_layer_3)

  encoder = enc_layer_4

  # Then build the decoder
  dec_layer_1 = Dense(100, kernel_initializer=kernel)(encoder)
  dec_layer_1 = tf.keras.layers.PReLU()(dec_layer_1)

  dec_layer_2 = Dense(300, kernel_initializer=kernel)(dec_layer_1)
  dec_layer_2 = tf.keras.layers.PReLU()(dec_layer_2)

  dec_layer_3 = Dense(500, kernel_initializer=kernel)(dec_layer_2)
  dec_layer_3 = tf.keras.layers.PReLU()(dec_layer_3)

  dec_layer_4 = Dense(input_dim, activation='sigmoid')(dec_layer_3)
  decoder = dec_layer_4

  # Connect both encoder and decoder
  autoencoder = Model(input_layer, decoder, name="Deep_Autoencoder")

  # Latent representation (Optional)
  latent_model = Model(input_layer, encoder)

  return autoencoder, latent_model

In [None]:
autoencoder_prellin, latent_model_prellin = model_autoencoder()

# Get summary
autoencoder_prellin.summary()

In [None]:
# Compile the autoencoder model
optimizer = tf.keras.optimizers.Adam()
loss = 'mse'
autoencoder_prellin.compile(loss=loss, optimizer=optimizer)

# Train the autoencoder with MNIST data
history_prellin = autoencoder_prellin.fit(X_t, X_t, epochs=100, batch_size=128, callbacks = [early_stop, Checkpoint, lr],
                                       shuffle=True, validation_data=(X_v, X_v))

autoencoder_prellin.save(f'/content/drive/MyDrive/Exercises/Autoencoders/prel_lin/enc/autoencoder')
latent_model_prellin.save(f'/content/drive/MyDrive/Exercises/Autoencoders/prel_lin/enc/encoder')

In [None]:
# Plot training and validation loss scores
# against the number of epochs.
plt.figure(figsize=(10, 7))
plt.plot(history_prellin.history['loss'], label='Train')
plt.plot(history_prellin.history['val_loss'], label='Validation')
plt.ylabel('MSE')
plt.xlabel('Epoch')
plt.title('Autoencoder Reconstruction Loss', pad=13)
plt.legend(loc='upper right')

In [None]:
# Plot training and validation loss scores
# against the number of epochs.
plt.figure(figsize=(10, 7))
plt.plot(history_rellin.history['val_loss'], label='Relu_Lin')
plt.plot(history_prellin.history['val_loss'], label='Prelu_lin')
plt.ylabel('MSE')
plt.xlabel('Epoch')
plt.title('Autoencoder Reconstruction Loss', pad=13)
plt.legend(loc='upper right')

In [None]:
compressed_images = autoencoder_prellin.predict(test_images)
n = 5
plt.figure(figsize=(9, 2))
for i in range(n):
  ax = plt.subplot(1, n, i+1)
  plt.imshow(compressed_images[i].reshape(28, 28), cmap="gray")
  ax.axis('off')

plt.show()

In [None]:
latent_representation = latent_model_prellin.predict(test_images)

plt.figure(figsize=(10, 7))

sns.scatterplot(x=latent_representation[:,0],
                y=latent_representation[:,1],
                hue=test_labels, palette='tab10')

plt.xlabel("Encoder first dimension")
plt.ylabel("Encoder second dimension")

plt.grid(linestyle='--', alpha=0.5)

plt.legend(bbox_to_anchor=(1.01, 1),
           borderaxespad=0);

#### 4.3 Hidden Layers: PRelu - Latent Space: Linear + Regularization L1

In [None]:
def model_autoencoder(kernel="he_normal"):
  input_dim = 28*28
  latent_vec_dim = 2

  input_layer = Input(shape=(input_dim,))

  # Define the autoencoder architecture
  # First build the encoder
  enc_layer_1 = Dense(500, kernel_initializer=kernel)(input_layer)
  enc_layer_1 = tf.keras.layers.PReLU()(enc_layer_1)

  enc_layer_2 = Dense(300, kernel_initializer=kernel)(enc_layer_1)
  enc_layer_2 = tf.keras.layers.PReLU()(enc_layer_2)

  enc_layer_3 = Dense(100, kernel_initializer=kernel)(enc_layer_2)
  enc_layer_3 = tf.keras.layers.PReLU()(enc_layer_3)

  enc_layer_4 = Dense(latent_vec_dim, activation='linear', kernel_regularizer = tf.keras.regularizers.L1(l1=0.001))(enc_layer_3)

  encoder = enc_layer_4

  # Then build the decoder
  dec_layer_1 = Dense(100, kernel_initializer=kernel)(encoder)
  dec_layer_1 = tf.keras.layers.PReLU()(dec_layer_1)

  dec_layer_2 = Dense(300, kernel_initializer=kernel)(dec_layer_1)
  dec_layer_2 = tf.keras.layers.PReLU()(dec_layer_2)

  dec_layer_3 = Dense(500, kernel_initializer=kernel)(dec_layer_2)
  dec_layer_3 = tf.keras.layers.PReLU()(dec_layer_3)

  dec_layer_4 = Dense(input_dim, activation='sigmoid')(dec_layer_3)
  decoder = dec_layer_4

  # Connect both encoder and decoder
  autoencoder = Model(input_layer, decoder, name="Deep_Autoencoder")

  # Latent representation (Optional)
  latent_model = Model(input_layer, encoder)

  return autoencoder, latent_model

In [None]:
autoencoder_prellin, latent_model_prellin = model_autoencoder()

# Get summary
autoencoder_prellin.summary()

In [None]:
# Compile the autoencoder model
optimizer = tf.keras.optimizers.Adam()
loss = 'mse'
autoencoder_prellin.compile(loss=loss, optimizer=optimizer)

# Train the autoencoder with MNIST data
history_prellin = autoencoder_prellin.fit(X_t, X_t, epochs=120, batch_size=128, callbacks = [early_stop, Checkpoint, lr],
                                       shuffle=True, validation_data=(X_v, X_v))

autoencoder_prellin.save(f'/content/drive/MyDrive/Exercises/Autoencoders/prel_lin_l1/enc/autoencoder')
latent_model_prellin.save(f'/content/drive/MyDrive/Exercises/Autoencoders/prel_lin_l1/enc/encoder')

In [None]:
# Plot training and validation loss scores
# against the number of epochs.
plt.figure(figsize=(10, 7))
plt.plot(history_prellin.history['loss'], label='Train')
plt.plot(history_prellin.history['val_loss'], label='Validation')
plt.ylabel('MSE')
plt.xlabel('Epoch')
plt.title('Autoencoder Reconstruction Loss', pad=13)
plt.legend(loc='upper right')

In [None]:
compressed_images = autoencoder_prellin.predict(test_images)
n = 5
plt.figure(figsize=(9, 2))
for i in range(n):
  ax = plt.subplot(1, n, i+1)
  plt.imshow(compressed_images[i].reshape(28, 28), cmap="gray")
  ax.axis('off')

plt.show()

In [None]:
latent_representation = latent_model_prellin.predict(test_images)

plt.figure(figsize=(10, 7))

sns.scatterplot(x=latent_representation[:,0],
                y=latent_representation[:,1],
                hue=test_labels, palette='tab10')

plt.xlabel("Encoder first dimension")
plt.ylabel("Encoder second dimension")

plt.grid(linestyle='--', alpha=0.5)

plt.legend(bbox_to_anchor=(1.01, 1),
           borderaxespad=0);

#### 4.3.1 Hidden Layers: PRelu - Latent Space: Linear + BatchNorm

In [None]:
def model_autoencoder(kernel="he_normal"):
  input_dim = 28*28
  latent_vec_dim = 2

  input_layer = Input(shape=(input_dim,))

  # Define the autoencoder architecture
  # First build the encoder
  enc_layer_1 = Dense(500, kernel_initializer=kernel)(input_layer)
  enc_layer_1 = tf.keras.layers.BatchNormalization()(enc_layer_1)
  enc_layer_1 = tf.keras.layers.PReLU()(enc_layer_1)

  enc_layer_2 = Dense(300, kernel_initializer=kernel)(enc_layer_1)
  enc_layer_2 = tf.keras.layers.BatchNormalization()(enc_layer_2)
  enc_layer_2 = tf.keras.layers.PReLU()(enc_layer_2)

  enc_layer_3 = Dense(100, kernel_initializer=kernel)(enc_layer_2)
  enc_layer_3 = tf.keras.layers.BatchNormalization()(enc_layer_3)
  enc_layer_3 = tf.keras.layers.PReLU()(enc_layer_3)

  enc_layer_4 = Dense(latent_vec_dim, activation='linear')(enc_layer_3)

  encoder = enc_layer_4

  # Then build the decoder
  dec_layer_1 = Dense(100, kernel_initializer=kernel)(encoder)
  dec_layer_1 = tf.keras.layers.BatchNormalization()(dec_layer_1)
  dec_layer_1 = tf.keras.layers.PReLU()(dec_layer_1)

  dec_layer_2 = Dense(300, kernel_initializer=kernel)(dec_layer_1)
  dec_layer_2 = tf.keras.layers.BatchNormalization()(dec_layer_2)
  dec_layer_2 = tf.keras.layers.PReLU()(dec_layer_2)

  dec_layer_3 = Dense(500, kernel_initializer=kernel)(dec_layer_2)
  dec_layer_3 = tf.keras.layers.BatchNormalization()(dec_layer_3)
  dec_layer_3 = tf.keras.layers.PReLU()(dec_layer_3)

  dec_layer_4 = Dense(input_dim, activation='sigmoid')(dec_layer_3)
  decoder = dec_layer_4

  # Connect both encoder and decoder
  autoencoder = Model(input_layer, decoder, name="Deep_Autoencoder")

  # Latent representation (Optional)
  latent_model = Model(input_layer, encoder)

  return autoencoder, latent_model

In [None]:
autoencoder_prellin, latent_model_prellin = model_autoencoder()

# Get summary
autoencoder_prellin.summary()

In [None]:
# Compile the autoencoder model
optimizer = tf.keras.optimizers.Adam()
loss = 'mse'
autoencoder_prellin.compile(loss=loss, optimizer=optimizer)

# Train the autoencoder with MNIST data
history_prellin = autoencoder_prellin.fit(X_t, X_t, epochs=120, batch_size=128, callbacks = [early_stop, Checkpoint, lr],
                                       shuffle=True, validation_data=(X_v, X_v))

autoencoder_prellin.save(f'/content/drive/MyDrive/Exercises/Autoencoders/prel_lin_bn/enc/autoencoder')
latent_model_prellin.save(f'/content/drive/MyDrive/Exercises/Autoencoders/prel_lin_bn/enc/encoder')

In [None]:
# Plot training and validation loss scores
# against the number of epochs.
plt.figure(figsize=(10, 7))
plt.plot(history_prellin.history['loss'], label='Train')
plt.plot(history_prellin.history['val_loss'], label='Validation')
plt.ylabel('MSE')
plt.xlabel('Epoch')
plt.title('Autoencoder Reconstruction Loss', pad=13)
plt.legend(loc='upper right')

In [None]:
compressed_images = autoencoder_prellin.predict(test_images)
n = 5
plt.figure(figsize=(9, 2))
for i in range(n):
  ax = plt.subplot(1, n, i+1)
  plt.imshow(compressed_images[i].reshape(28, 28), cmap="gray")
  ax.axis('off')

plt.show()

In [None]:
latent_representation = latent_model_prellin.predict(test_images)

plt.figure(figsize=(10, 7))

sns.scatterplot(x=latent_representation[:,0],
                y=latent_representation[:,1],
                hue=test_labels, palette='tab10')

plt.xlabel("Encoder first dimension")
plt.ylabel("Encoder second dimension")

plt.grid(linestyle='--', alpha=0.5)

plt.legend(bbox_to_anchor=(1.01, 1),
           borderaxespad=0);

#### 4.4 Hidden: Swish - Latent Linear

In [None]:
def model_autoencoder(kernel="he_normal"):
  input_dim = 28*28
  latent_vec_dim = 2

  input_layer = Input(shape=(input_dim,))

  # Define the autoencoder architecture
  # First build the encoder
  enc_layer_1 = Dense(500, kernel_initializer=kernel)(input_layer)
  enc_layer_1 = tf.keras.activations.swish(enc_layer_1)

  enc_layer_2 = Dense(300, kernel_initializer=kernel)(enc_layer_1)
  enc_layer_2 = tf.keras.activations.swish(enc_layer_2)

  enc_layer_3 = Dense(100, kernel_initializer=kernel)(enc_layer_2)
  enc_layer_3 = tf.keras.activations.swish(enc_layer_3)

  enc_layer_4 = Dense(latent_vec_dim, activation='linear')(enc_layer_3)

  encoder = enc_layer_4

  # Then build the decoder
  dec_layer_1 = Dense(100, kernel_initializer=kernel)(encoder)
  dec_layer_1 = tf.keras.activations.swish(dec_layer_1)

  dec_layer_2 = Dense(300, kernel_initializer=kernel)(dec_layer_1)
  dec_layer_2 = tf.keras.activations.swish(dec_layer_2)

  dec_layer_3 = Dense(500, kernel_initializer=kernel)(dec_layer_2)
  dec_layer_3 = tf.keras.activations.swish(dec_layer_3)

  dec_layer_4 = Dense(input_dim, activation='sigmoid')(dec_layer_3)
  decoder = dec_layer_4

  # Connect both encoder and decoder
  autoencoder = Model(input_layer, decoder, name="Deep_Autoencoder")

  # Latent representation (Optional)
  latent_model = Model(input_layer, encoder)

  return autoencoder, latent_model

In [None]:
autoencoder_swilin, latent_model_swilin = model_autoencoder()

# Get summary
autoencoder_swilin.summary()

In [None]:
# Compile the autoencoder model
optimizer = tf.keras.optimizers.Adam()
loss = 'mse'
autoencoder_swilin.compile(loss=loss, optimizer=optimizer)

# Train the autoencoder with MNIST data
history_swilin = autoencoder_swilin.fit(X_t, X_t, epochs=100, batch_size=128, callbacks = [early_stop, Checkpoint, lr],
                                       shuffle=True, validation_data=(X_v, X_v))

autoencoder_swilin.save(f'/content/drive/MyDrive/Exercises/Autoencoders/prel_lin/enc/autoencoder')
latent_model_swilin.save(f'/content/drive/MyDrive/Exercises/Autoencoders/prel_lin/enc/encoder')

In [None]:
# Plot training and validation loss scores
# against the number of epochs.
plt.figure(figsize=(10, 7))
plt.plot(history_swilin.history['loss'], label='Train')
plt.plot(history_swilin.history['val_loss'], label='Validation')
plt.ylabel('MSE')
plt.xlabel('Epoch')
plt.title('Autoencoder Reconstruction Loss', pad=13)
plt.legend(loc='upper right')

In [None]:
compressed_images = autoencoder_swilin.predict(test_images)
n = 5
plt.figure(figsize=(9, 2))
for i in range(n):
  ax = plt.subplot(1, n, i+1)
  plt.imshow(compressed_images[i].reshape(28, 28), cmap="gray")
  ax.axis('off')

plt.show()

In [None]:
latent_representation = latent_model_swilin.predict(test_images)

plt.figure(figsize=(10, 7))

sns.scatterplot(x=latent_representation[:,0],
                y=latent_representation[:,1],
                hue=test_labels, palette='tab10')

plt.xlabel("Encoder first dimension")
plt.ylabel("Encoder second dimension")

plt.grid(linestyle='--', alpha=0.5)

plt.legend(bbox_to_anchor=(1.01, 1),
           borderaxespad=0);

In [None]:
# Plot training and validation loss scores
# against the number of epochs.
plt.figure(figsize=(10, 7))
plt.plot(history_rellin.history['val_loss'], label='Relu_Lin')
plt.plot(history_swilin.history['val_loss'], label='Swish_lin')
plt.ylabel('MSE')
plt.xlabel('Epoch')
plt.title('Autoencoder Reconstruction Loss', pad=13)
plt.legend(loc='upper right')

#### 4.5 Hidden: Swish - Latent: Sig

In [None]:
def model_autoencoder(kernel="he_normal"):
  input_dim = 28*28
  latent_vec_dim = 2

  input_layer = Input(shape=(input_dim,))

  # Define the autoencoder architecture
  # First build the encoder
  enc_layer_1 = Dense(500, kernel_initializer=kernel)(input_layer)
  enc_layer_1 = tf.keras.activations.swish(enc_layer_1)

  enc_layer_2 = Dense(300, kernel_initializer=kernel)(enc_layer_1)
  enc_layer_2 = tf.keras.activations.swish(enc_layer_2)

  enc_layer_3 = Dense(100, kernel_initializer=kernel)(enc_layer_2)
  enc_layer_3 = tf.keras.activations.swish(enc_layer_3)

  enc_layer_4 = Dense(latent_vec_dim, activation='sigmoid')(enc_layer_3)

  encoder = enc_layer_4

  # Then build the decoder
  dec_layer_1 = Dense(100, kernel_initializer=kernel)(encoder)
  dec_layer_1 = tf.keras.activations.swish(dec_layer_1)

  dec_layer_2 = Dense(300, kernel_initializer=kernel)(dec_layer_1)
  dec_layer_2 = tf.keras.activations.swish(dec_layer_2)

  dec_layer_3 = Dense(500, kernel_initializer=kernel)(dec_layer_2)
  dec_layer_3 = tf.keras.activations.swish(dec_layer_3)

  dec_layer_4 = Dense(input_dim, activation='sigmoid')(dec_layer_3)
  decoder = dec_layer_4

  # Connect both encoder and decoder
  autoencoder = Model(input_layer, decoder, name="Deep_Autoencoder")

  # Latent representation (Optional)
  latent_model = Model(input_layer, encoder)

  return autoencoder, latent_model

In [None]:
autoencoder_swisig, latent_model_swisig = model_autoencoder()

# Get summary
autoencoder_swisig.summary()

In [None]:
# Compile the autoencoder model
optimizer = tf.keras.optimizers.Adam()
loss = 'mse'
autoencoder_swisig.compile(loss=loss, optimizer=optimizer)

# Train the autoencoder with MNIST data
history_swisig = autoencoder_swisig.fit(X_t, X_t, epochs=120, batch_size=128, callbacks = [early_stop, Checkpoint, lr],
                                       shuffle=True, validation_data=(X_v, X_v))

autoencoder_swisig.save(f'/content/drive/MyDrive/Exercises/Autoencoders/swi_sig/enc/autoencoder')
latent_model_swisig.save(f'/content/drive/MyDrive/Exercises/Autoencoders/swi_sig/enc/encoder')

In [None]:
# Plot training and validation loss scores
# against the number of epochs.
plt.figure(figsize=(10, 7))
plt.plot(history_swisig.history['loss'], label='Train')
plt.plot(history_swisig.history['val_loss'], label='Validation')
plt.ylabel('MSE')
plt.xlabel('Epoch')
plt.title('Autoencoder Reconstruction Loss', pad=13)
plt.legend(loc='upper right')

In [None]:
compressed_images = autoencoder_swisig.predict(test_images)
n = 5
plt.figure(figsize=(9, 2))
for i in range(n):
  ax = plt.subplot(1, n, i+1)
  plt.imshow(compressed_images[i].reshape(28, 28), cmap="gray")
  ax.axis('off')

plt.show()

In [None]:
latent_representation = latent_model_swisig.predict(test_images)

plt.figure(figsize=(10, 7))

sns.scatterplot(x=latent_representation[:,0],
                y=latent_representation[:,1],
                hue=test_labels, palette='tab10')

plt.xlabel("Encoder first dimension")
plt.ylabel("Encoder second dimension")

plt.grid(linestyle='--', alpha=0.5)

plt.legend(bbox_to_anchor=(1.01, 1),
           borderaxespad=0);

#### 4.6 Hidden: Selu - Latent: Linear

In [None]:
def model_autoencoder(kernel="lecun_normal"):
  input_dim = 28*28
  latent_vec_dim = 2

  input_layer = Input(shape=(input_dim,))

  # Define the autoencoder architecture
  # First build the encoder
  enc_layer_1 = Dense(500, kernel_initializer=kernel)(input_layer)
  enc_layer_1 = tf.keras.activations.selu(enc_layer_1)

  enc_layer_2 = Dense(300, kernel_initializer=kernel)(enc_layer_1)
  enc_layer_2 = tf.keras.activations.selu(enc_layer_2)

  enc_layer_3 = Dense(100, kernel_initializer=kernel)(enc_layer_2)
  enc_layer_3 = tf.keras.activations.selu(enc_layer_3)

  enc_layer_4 = Dense(latent_vec_dim, activation='linear')(enc_layer_3)

  encoder = enc_layer_4

  # Then build the decoder
  dec_layer_1 = Dense(100, kernel_initializer=kernel)(encoder)
  dec_layer_1 = tf.keras.activations.selu(dec_layer_1)

  dec_layer_2 = Dense(300, kernel_initializer=kernel)(dec_layer_1)
  dec_layer_2 = tf.keras.activations.selu(dec_layer_2)

  dec_layer_3 = Dense(500, kernel_initializer=kernel)(dec_layer_2)
  dec_layer_3 = tf.keras.activations.selu(dec_layer_3)

  dec_layer_4 = Dense(input_dim, activation='sigmoid')(dec_layer_3)
  decoder = dec_layer_4

  # Connect both encoder and decoder
  autoencoder = Model(input_layer, decoder, name="Deep_Autoencoder")

  # Latent representation (Optional)
  latent_model = Model(input_layer, encoder)

  return autoencoder, latent_model

In [None]:
autoencoder_selulin, latent_model_selulin = model_autoencoder()

# Get summary
autoencoder_selulin.summary()

In [None]:
# Compile the autoencoder model
optimizer = tf.keras.optimizers.Adam()
loss = 'mse'
autoencoder_selulin.compile(loss=loss, optimizer=optimizer)

# Train the autoencoder with MNIST data
history_selulin = autoencoder_selulin.fit(X_t, X_t, epochs=120, batch_size=128, callbacks = [early_stop, Checkpoint, lr],
                                       shuffle=True, validation_data=(X_v, X_v))

autoencoder_selulin.save(f'/content/drive/MyDrive/Exercises/Autoencoders/selu_lin/enc/autoencoder')
latent_model_selulin.save(f'/content/drive/MyDrive/Exercises/Autoencoders/selu_lin/enc/encoder')

In [None]:
# Plot training and validation loss scores
# against the number of epochs.
plt.figure(figsize=(10, 7))
plt.plot(history_selulin.history['loss'], label='Train')
plt.plot(history_selulin.history['val_loss'], label='Validation')
plt.ylabel('MSE')
plt.xlabel('Epoch')
plt.title('Autoencoder Reconstruction Loss - Selu-Lin Combo', pad=13)
plt.legend(loc='upper right')

In [None]:
compressed_images = autoencoder_selulin.predict(test_images)
n = 5
plt.figure(figsize=(9, 2))
for i in range(n):
  ax = plt.subplot(1, n, i+1)
  plt.imshow(compressed_images[i].reshape(28, 28), cmap="gray")
  ax.axis('off')

plt.show()

In [None]:
latent_representation = latent_model_selulin.predict(test_images)

plt.figure(figsize=(10, 7))

sns.scatterplot(x=latent_representation[:,0],
                y=latent_representation[:,1],
                hue=test_labels, palette='tab10')

plt.xlabel("Encoder first dimension")
plt.ylabel("Encoder second dimension")

plt.grid(linestyle='--', alpha=0.5)

plt.legend(bbox_to_anchor=(1.01, 1),
           borderaxespad=0);

#### 4.7 Hidden: Selu - Latent: Linear + regulatization





In [None]:
def model_autoencoder(kernel="lecun_normal", l1=0.01):
  input_dim = 28*28
  latent_vec_dim = 2

  input_layer = Input(shape=(input_dim,))

  # Define the autoencoder architecture
  # First build the encoder
  enc_layer_1 = Dense(500, kernel_initializer=kernel)(input_layer)
  enc_layer_1 = tf.keras.activations.selu(enc_layer_1)

  enc_layer_2 = Dense(300, kernel_initializer=kernel)(enc_layer_1)
  enc_layer_2 = tf.keras.activations.selu(enc_layer_2)

  enc_layer_3 = Dense(100, kernel_initializer=kernel)(enc_layer_2)
  enc_layer_3 = tf.keras.activations.selu(enc_layer_3)

  enc_layer_4 = Dense(latent_vec_dim, activation='linear', kernel_regularizer = tf.keras.regularizers.L1(l1=l1))(enc_layer_3)

  encoder = enc_layer_4

  # Then build the decoder
  dec_layer_1 = Dense(100, kernel_initializer=kernel)(encoder)
  dec_layer_1 = tf.keras.activations.selu(dec_layer_1)

  dec_layer_2 = Dense(300, kernel_initializer=kernel)(dec_layer_1)
  dec_layer_2 = tf.keras.activations.selu(dec_layer_2)

  dec_layer_3 = Dense(500, kernel_initializer=kernel)(dec_layer_2)
  dec_layer_3 = tf.keras.activations.selu(dec_layer_3)

  dec_layer_4 = Dense(input_dim, activation='sigmoid')(dec_layer_3)
  decoder = dec_layer_4

  # Connect both encoder and decoder
  autoencoder = Model(input_layer, decoder, name="Deep_Autoencoder")

  # Latent representation (Optional)
  latent_model = Model(input_layer, encoder)

  return autoencoder, latent_model

In [None]:
autoencoder_selulin_r, latent_model_selulin_r = model_autoencoder(l1=0.001)

# Get summary
autoencoder_selulin_r.summary()

In [None]:
# Compile the autoencoder model
optimizer = tf.keras.optimizers.Adam()
loss = 'mse'
autoencoder_selulin_r.compile(loss=loss, optimizer=optimizer)

# Train the autoencoder with MNIST data
history_selulin_r = autoencoder_selulin_r.fit(X_t, X_t, epochs=120, batch_size=128, callbacks = [early_stop, Checkpoint, lr],
                                       shuffle=True, validation_data=(X_v, X_v))

autoencoder_selulin_r.save(f'/content/drive/MyDrive/Exercises/Autoencoders/selu_lin_reg/enc/autoencoder')
latent_model_selulin_r.save(f'/content/drive/MyDrive/Exercises/Autoencoders/selu_lin_reg/enc/encoder')

In [None]:
# Plot training and validation loss scores
# against the number of epochs.
plt.figure(figsize=(10, 7))
plt.plot(history_selulin.history['loss'], label='Train')
plt.plot(history_selulin.history['val_loss'], label='Validation')
plt.ylabel('MSE')
plt.xlabel('Epoch')
plt.title('Autoencoder Reconstruction Loss - Selu-Lin Combo-with Regual', pad=13)
plt.legend(loc='upper right')

In [None]:
compressed_images = autoencoder_selulin_r.predict(test_images)
n = 5
plt.figure(figsize=(9, 2))
for i in range(n):
  ax = plt.subplot(1, n, i+1)
  plt.imshow(compressed_images[i].reshape(28, 28), cmap="gray")
  ax.axis('off')

plt.show()

In [None]:
latent_representation = latent_model_selulin_r.predict(test_images)

plt.figure(figsize=(10, 7))

sns.scatterplot(x=latent_representation[:,0],
                y=latent_representation[:,1],
                hue=test_labels, palette='tab10')

plt.xlabel("Encoder first dimension")
plt.ylabel("Encoder second dimension")

plt.grid(linestyle='--', alpha=0.5)

plt.legend(bbox_to_anchor=(1.01, 1),
           borderaxespad=0);

#### 4.5 Hidden: Elu - Latent: Sig

In [None]:
def model_autoencoder_(kernel="he_normal"):
  input_dim = 28*28
  latent_vec_dim = 2

  input_layer = Input(shape=(input_dim,))

  # Define the autoencoder architecture
  # First build the encoder
  enc_layer_1 = Dense(500, kernel_initializer=kernel)(input_layer)
  enc_layer_1 = tf.keras.activations.elu(enc_layer_1)

  enc_layer_2 = Dense(300, kernel_initializer=kernel)(enc_layer_1)
  enc_layer_2 = tf.keras.activations.elu(enc_layer_2)

  enc_layer_3 = Dense(100, kernel_initializer=kernel)(enc_layer_2)
  enc_layer_3 = tf.keras.activations.elu(enc_layer_3)

  enc_layer_4 = Dense(latent_vec_dim, activation='sigmoid')(enc_layer_3)

  encoder = enc_layer_4

  # Then build the decoder
  dec_layer_1 = Dense(100, kernel_initializer=kernel)(encoder)
  dec_layer_1 = tf.keras.activations.elu(dec_layer_1)

  dec_layer_2 = Dense(300, kernel_initializer=kernel)(dec_layer_1)
  dec_layer_2 = tf.keras.activations.elu(dec_layer_2)

  dec_layer_3 = Dense(500, kernel_initializer=kernel)(dec_layer_2)
  dec_layer_3 = tf.keras.activations.elu(dec_layer_3)

  dec_layer_4 = Dense(input_dim, activation='sigmoid')(dec_layer_3)
  decoder = dec_layer_4

  # Connect both encoder and decoder
  autoencoder = Model(input_layer, decoder, name="Deep_Autoencoder")

  # Latent representation (Optional)
  latent_model = Model(input_layer, encoder)

  return autoencoder, latent_model

In [None]:
autoencoder_elusig, latent_model_elusig = model_autoencoder_()

# Get summary
autoencoder_elusig.summary()

In [None]:
# Compile the autoencoder model
optimizer = tf.keras.optimizers.Adam()
loss = 'mse'
autoencoder_elusig.compile(loss=loss, optimizer=optimizer)

# Train the autoencoder with MNIST data
history_elusig = autoencoder_elusig.fit(X_t, X_t, epochs=120, batch_size=128, callbacks = [early_stop, Checkpoint, lr],
                                       shuffle=True, validation_data=(X_v, X_v))

autoencoder_elusig.save(f'/content/drive/MyDrive/Exercises/Autoencoders/elu_sig/enc/autoencoder')
latent_model_elusig.save(f'/content/drive/MyDrive/Exercises/Autoencoders/elu_sig/enc/encoder')

In [None]:
# Plot training and validation loss scores
# against the number of epochs.
plt.figure(figsize=(10, 7))
plt.plot(history_elusig.history['loss'], label='Train')
plt.plot(history_elusig.history['val_loss'], label='Validation')
plt.ylabel('MSE')
plt.xlabel('Epoch')
plt.title('Autoencoder Reconstruction Loss - Elu-Sig Combo', pad=13)
plt.legend(loc='upper right')

In [None]:
compressed_images = autoencoder_elusig.predict(test_images)
n = 5
plt.figure(figsize=(9, 2))
for i in range(n):
  ax = plt.subplot(1, n, i+1)
  plt.imshow(compressed_images[i].reshape(28, 28), cmap="gray")
  ax.axis('off')

plt.show()

In [None]:
latent_representation = latent_model_elusig.predict(test_images)

plt.figure(figsize=(10, 7))

sns.scatterplot(x=latent_representation[:,0],
                y=latent_representation[:,1],
                hue=test_labels, palette='tab10')

plt.xlabel("Encoder first dimension")
plt.ylabel("Encoder second dimension")

plt.grid(linestyle='--', alpha=0.5)

plt.legend(bbox_to_anchor=(1.01, 1),
           borderaxespad=0);

#### 4.5 Hidden: Elu - Latent: Lin

In [None]:
def model_autoencoder_(kernel="he_normal"):
  input_dim = 28*28
  latent_vec_dim = 2

  input_layer = Input(shape=(input_dim,))

  # Define the autoencoder architecture
  # First build the encoder
  enc_layer_1 = Dense(500, kernel_initializer=kernel)(input_layer)
  enc_layer_1 = tf.keras.activations.elu(enc_layer_1)

  enc_layer_2 = Dense(300, kernel_initializer=kernel)(enc_layer_1)
  enc_layer_2 = tf.keras.activations.elu(enc_layer_2)

  enc_layer_3 = Dense(100, kernel_initializer=kernel)(enc_layer_2)
  enc_layer_3 = tf.keras.activations.elu(enc_layer_3)

  enc_layer_4 = Dense(latent_vec_dim, activation='linear')(enc_layer_3)

  encoder = enc_layer_4

  # Then build the decoder
  dec_layer_1 = Dense(100, kernel_initializer=kernel)(encoder)
  dec_layer_1 = tf.keras.activations.elu(dec_layer_1)

  dec_layer_2 = Dense(300, kernel_initializer=kernel)(dec_layer_1)
  dec_layer_2 = tf.keras.activations.elu(dec_layer_2)

  dec_layer_3 = Dense(500, kernel_initializer=kernel)(dec_layer_2)
  dec_layer_3 = tf.keras.activations.elu(dec_layer_3)

  dec_layer_4 = Dense(input_dim, activation='sigmoid')(dec_layer_3)
  decoder = dec_layer_4

  # Connect both encoder and decoder
  autoencoder = Model(input_layer, decoder, name="Deep_Autoencoder")

  # Latent representation (Optional)
  latent_model = Model(input_layer, encoder)

  return autoencoder, latent_model

In [None]:
autoencoder_elulin, latent_model_elulin = model_autoencoder_()

# Get summary
autoencoder_elulin.summary()

In [None]:
# Compile the autoencoder model
optimizer = tf.keras.optimizers.Adam()
loss = 'mse'
autoencoder_elulin.compile(loss=loss, optimizer=optimizer)

# Train the autoencoder with MNIST data
history_elulin = autoencoder_elulin.fit(X_t, X_t, epochs=120, batch_size=128, callbacks = [early_stop, Checkpoint, lr],
                                       shuffle=True, validation_data=(X_v, X_v))

autoencoder_elulin.save(f'/content/drive/MyDrive/Exercises/Autoencoders/elu_lin/enc/autoencoder')
latent_model_elulin.save(f'/content/drive/MyDrive/Exercises/Autoencoders/elu_lin/enc/encoder')

In [None]:
# Plot training and validation loss scores
# against the number of epochs.
plt.figure(figsize=(10, 7))
plt.plot(history_elulin.history['loss'], label='Train')
plt.plot(history_elulin.history['val_loss'], label='Validation')
plt.ylabel('MSE')
plt.xlabel('Epoch')
plt.title('Autoencoder Reconstruction Loss - Elu-Sig Combo', pad=13)
plt.legend(loc='upper right')

In [None]:
compressed_images = autoencoder_elulin.predict(test_images)
n = 5
plt.figure(figsize=(9, 2))
for i in range(n):
  ax = plt.subplot(1, n, i+1)
  plt.imshow(compressed_images[i].reshape(28, 28), cmap="gray")
  ax.axis('off')

plt.show()

In [None]:
latent_representation = latent_model_elulin.predict(test_images)

plt.figure(figsize=(10, 7))

sns.scatterplot(x=latent_representation[:,0],
                y=latent_representation[:,1],
                hue=test_labels, palette='tab10')

plt.xlabel("Encoder first dimension")
plt.ylabel("Encoder second dimension")

plt.grid(linestyle='--', alpha=0.5)

plt.legend(bbox_to_anchor=(1.01, 1),
           borderaxespad=0);

#### 4.6 Hidden: Elu - Latent: Lin + BN

In [None]:
def model_autoencoder_(kernel="he_normal"):
  input_dim = 28*28
  latent_vec_dim = 2

  input_layer = Input(shape=(input_dim,))

  # Define the autoencoder architecture
  # First build the encoder
  enc_layer_1 = Dense(500, kernel_initializer=kernel)(input_layer)
  enc_layer_1 = tf.keras.layers.BatchNormalization()(enc_layer_1)
  enc_layer_1 = tf.keras.activations.elu(enc_layer_1)

  enc_layer_2 = Dense(300, kernel_initializer=kernel)(enc_layer_1)
  enc_layer_2 = tf.keras.layers.BatchNormalization()(enc_layer_2)
  enc_layer_2 = tf.keras.activations.elu(enc_layer_2)

  enc_layer_3 = Dense(100, kernel_initializer=kernel)(enc_layer_2)
  enc_layer_3 = tf.keras.layers.BatchNormalization()(enc_layer_3)
  enc_layer_3 = tf.keras.activations.elu(enc_layer_3)

  enc_layer_4 = Dense(latent_vec_dim, activation='linear')(enc_layer_3)

  encoder = enc_layer_4

  # Then build the decoder
  dec_layer_1 = Dense(100, kernel_initializer=kernel)(encoder)
  dec_layer_1 = tf.keras.layers.BatchNormalization()(dec_layer_1)
  dec_layer_1 = tf.keras.activations.elu(dec_layer_1)

  dec_layer_2 = Dense(300, kernel_initializer=kernel)(dec_layer_1)
  dec_layer_2 = tf.keras.layers.BatchNormalization()(dec_layer_2)
  dec_layer_2 = tf.keras.activations.elu(dec_layer_2)

  dec_layer_3 = Dense(500, kernel_initializer=kernel)(dec_layer_2)
  dec_layer_3 = tf.keras.layers.BatchNormalization()(dec_layer_3)
  dec_layer_3 = tf.keras.activations.elu(dec_layer_3)

  dec_layer_4 = Dense(input_dim, activation='sigmoid')(dec_layer_3)
  decoder = dec_layer_4

  # Connect both encoder and decoder
  autoencoder = Model(input_layer, decoder, name="Deep_Autoencoder")

  # Latent representation (Optional)
  latent_model = Model(input_layer, encoder)

  return autoencoder, latent_model

In [None]:
autoencoder_elulin, latent_model_elulin = model_autoencoder_()

# Get summary
autoencoder_elulin.summary()

In [None]:
# Compile the autoencoder model
optimizer = tf.keras.optimizers.Adam()
loss = 'mse'
autoencoder_elulin.compile(loss=loss, optimizer=optimizer)

# Train the autoencoder with MNIST data
history_elulin = autoencoder_elulin.fit(X_t, X_t, epochs=120, batch_size=128, callbacks = [early_stop, Checkpoint, lr],
                                       shuffle=True, validation_data=(X_v, X_v))

autoencoder_elulin.save(f'/content/drive/MyDrive/Exercises/Autoencoders/elu_lin_bn/enc/autoencoder')
latent_model_elulin.save(f'/content/drive/MyDrive/Exercises/Autoencoders/elu_lin_bn/enc/encoder')

In [None]:
# Plot training and validation loss scores
# against the number of epochs.
plt.figure(figsize=(10, 7))
plt.plot(history_elulin.history['loss'], label='Train')
plt.plot(history_elulin.history['val_loss'], label='Validation')
plt.ylabel('MSE')
plt.xlabel('Epoch')
plt.title('Autoencoder Reconstruction Loss - Elu-Sig Combo', pad=13)
plt.legend(loc='upper right')

In [None]:
compressed_images = autoencoder_elulin.predict(test_images)
n = 5
plt.figure(figsize=(9, 2))
for i in range(n):
  ax = plt.subplot(1, n, i+1)
  plt.imshow(compressed_images[i].reshape(28, 28), cmap="gray")
  ax.axis('off')

plt.show()

In [None]:
latent_representation = latent_model_elulin.predict(test_images)

plt.figure(figsize=(10, 7))

sns.scatterplot(x=latent_representation[:,0],
                y=latent_representation[:,1],
                hue=test_labels, palette='tab10')

plt.xlabel("Encoder first dimension")
plt.ylabel("Encoder second dimension")

plt.grid(linestyle='--', alpha=0.5)

plt.legend(bbox_to_anchor=(1.01, 1),
           borderaxespad=0);

#### 4.7 Hidden: Elu - Latent: Lin + Reg L1

In [None]:
def model_autoencoder_(kernel="he_normal"):
  input_dim = 28*28
  latent_vec_dim = 2

  input_layer = Input(shape=(input_dim,))

  # Define the autoencoder architecture
  # First build the encoder
  enc_layer_1 = Dense(500, kernel_initializer=kernel)(input_layer)
  enc_layer_1 = tf.keras.activations.elu(enc_layer_1)

  enc_layer_2 = Dense(300, kernel_initializer=kernel)(enc_layer_1)
  enc_layer_2 = tf.keras.activations.elu(enc_layer_2)

  enc_layer_3 = Dense(100, kernel_initializer=kernel)(enc_layer_2)
  enc_layer_3 = tf.keras.activations.elu(enc_layer_3)

  enc_layer_4 = Dense(latent_vec_dim, activation='linear', kernel_regularizer = tf.keras.regularizers.L1(l1=0.01))(enc_layer_3)

  encoder = enc_layer_4

  # Then build the decoder
  dec_layer_1 = Dense(100, kernel_initializer=kernel)(encoder)
  dec_layer_1 = tf.keras.activations.elu(dec_layer_1)

  dec_layer_2 = Dense(300, kernel_initializer=kernel)(dec_layer_1)
  dec_layer_2 = tf.keras.activations.elu(dec_layer_2)

  dec_layer_3 = Dense(500, kernel_initializer=kernel)(dec_layer_2)
  dec_layer_3 = tf.keras.activations.elu(dec_layer_3)

  dec_layer_4 = Dense(input_dim, activation='sigmoid')(dec_layer_3)
  decoder = dec_layer_4

  # Connect both encoder and decoder
  autoencoder = Model(input_layer, decoder, name="Deep_Autoencoder")

  # Latent representation (Optional)
  latent_model = Model(input_layer, encoder)

  return autoencoder, latent_model

In [None]:
autoencoder_elulin_r, latent_model_elulin_r = model_autoencoder_()

# Get summary
autoencoder_elulin_r.summary()

In [None]:
# Compile the autoencoder model
optimizer = tf.keras.optimizers.Adam()
loss = 'mse'
autoencoder_elulin_r.compile(loss=loss, optimizer=optimizer)

# Train the autoencoder with MNIST data
history_elulin_r = autoencoder_elulin_r.fit(X_t, X_t, epochs=120, batch_size=128, callbacks = [early_stop, Checkpoint, lr],
                                       shuffle=True, validation_data=(X_v, X_v))

autoencoder_elulin_r.save(f'/content/drive/MyDrive/Exercises/Autoencoders/elu_lin/enc/autoencoder')
latent_model_elulin_r.save(f'/content/drive/MyDrive/Exercises/Autoencoders/elu_lin/enc/encoder')

In [None]:
# Plot training and validation loss scores
# against the number of epochs.
plt.figure(figsize=(10, 7))
plt.plot(history_elulin_r.history['loss'], label='Train')
plt.plot(history_elulin_r.history['val_loss'], label='Validation')
plt.ylabel('MSE')
plt.xlabel('Epoch')
plt.title('Autoencoder Reconstruction Loss - Elu-Sig Combo', pad=13)
plt.legend(loc='upper right')

In [None]:
compressed_images = autoencoder_elulin_r.predict(test_images)
n = 5
plt.figure(figsize=(9, 2))
for i in range(n):
  ax = plt.subplot(1, n, i+1)
  plt.imshow(compressed_images[i].reshape(28, 28), cmap="gray")
  ax.axis('off')

plt.show()

In [None]:
latent_representation = latent_model_elulin_r.predict(test_images)

plt.figure(figsize=(10, 7))

sns.scatterplot(x=latent_representation[:,0],
                y=latent_representation[:,1],
                hue=test_labels, palette='tab10')

plt.xlabel("Encoder first dimension")
plt.ylabel("Encoder second dimension")

plt.grid(linestyle='--', alpha=0.5)

plt.legend(bbox_to_anchor=(1.01, 1),
           borderaxespad=0);

#### 4.8 Hidden: Elu - Latent: Lin + Reg L2

In [None]:
def model_autoencoder_(kernel="he_normal"):
  input_dim = 28*28
  latent_vec_dim = 2

  input_layer = Input(shape=(input_dim,))

  # Define the autoencoder architecture
  # First build the encoder
  enc_layer_1 = Dense(500, kernel_initializer=kernel)(input_layer)
  enc_layer_1 = tf.keras.activations.elu(enc_layer_1)

  enc_layer_2 = Dense(300, kernel_initializer=kernel)(enc_layer_1)
  enc_layer_2 = tf.keras.activations.elu(enc_layer_2)

  enc_layer_3 = Dense(100, kernel_initializer=kernel)(enc_layer_2)
  enc_layer_3 = tf.keras.activations.elu(enc_layer_3)

  enc_layer_4 = Dense(latent_vec_dim, activation='linear', kernel_regularizer = tf.keras.regularizers.L2(l2=0.01))(enc_layer_3)

  encoder = enc_layer_4

  # Then build the decoder
  dec_layer_1 = Dense(100, kernel_initializer=kernel)(encoder)
  dec_layer_1 = tf.keras.activations.elu(dec_layer_1)

  dec_layer_2 = Dense(300, kernel_initializer=kernel)(dec_layer_1)
  dec_layer_2 = tf.keras.activations.elu(dec_layer_2)

  dec_layer_3 = Dense(500, kernel_initializer=kernel)(dec_layer_2)
  dec_layer_3 = tf.keras.activations.elu(dec_layer_3)

  dec_layer_4 = Dense(input_dim, activation='sigmoid')(dec_layer_3)
  decoder = dec_layer_4

  # Connect both encoder and decoder
  autoencoder = Model(input_layer, decoder, name="Deep_Autoencoder")

  # Latent representation (Optional)
  latent_model = Model(input_layer, encoder)

  return autoencoder, latent_model

In [None]:
autoencoder_elulin_r, latent_model_elulin_r = model_autoencoder_()

# Get summary
autoencoder_elulin_r.summary()

In [None]:
# Compile the autoencoder model
optimizer = tf.keras.optimizers.Adam()
loss = 'mse'
autoencoder_elulin_r.compile(loss=loss, optimizer=optimizer)

# Train the autoencoder with MNIST data
history_elulin_r = autoencoder_elulin_r.fit(X_t, X_t, epochs=120, batch_size=128, callbacks = [early_stop, Checkpoint, lr],
                                       shuffle=True, validation_data=(X_v, X_v))

autoencoder_elulin_r.save(f'/content/drive/MyDrive/Exercises/Autoencoders/elu_lin_rl2/enc/autoencoder')
latent_model_elulin_r.save(f'/content/drive/MyDrive/Exercises/Autoencoders/elu_lin_rl2/enc/encoder')

In [None]:
# Plot training and validation loss scores
# against the number of epochs.
plt.figure(figsize=(10, 7))
plt.plot(history_elulin_r.history['loss'], label='Train')
plt.plot(history_elulin_r.history['val_loss'], label='Validation')
plt.ylabel('MSE')
plt.xlabel('Epoch')
plt.title('Autoencoder Reconstruction Loss - Elu-Sig Combo', pad=13)
plt.legend(loc='upper right')

In [None]:
compressed_images = autoencoder_elulin_r.predict(test_images)
n = 5
plt.figure(figsize=(9, 2))
for i in range(n):
  ax = plt.subplot(1, n, i+1)
  plt.imshow(compressed_images[i].reshape(28, 28), cmap="gray")
  ax.axis('off')

plt.show()

In [None]:
latent_representation = latent_model_elulin_r.predict(test_images)

plt.figure(figsize=(10, 7))

sns.scatterplot(x=latent_representation[:,0],
                y=latent_representation[:,1],
                hue=test_labels, palette='tab10')

plt.xlabel("Encoder first dimension")
plt.ylabel("Encoder second dimension")

plt.grid(linestyle='--', alpha=0.5)

plt.legend(bbox_to_anchor=(1.01, 1),
           borderaxespad=0);

#### 4.9 Hidden: Gelu - Latent: Lin

In [None]:
def model_autoencoder_(kernel="he_normal"):
  input_dim = 28*28
  latent_vec_dim = 2

  input_layer = Input(shape=(input_dim,))

  # Define the autoencoder architecture
  # First build the encoder
  enc_layer_1 = Dense(500, kernel_initializer=kernel)(input_layer)
  enc_layer_1 = tf.keras.activations.gelu(enc_layer_1)

  enc_layer_2 = Dense(300, kernel_initializer=kernel)(enc_layer_1)
  enc_layer_2 = tf.keras.activations.gelu(enc_layer_2)

  enc_layer_3 = Dense(100, kernel_initializer=kernel)(enc_layer_2)
  enc_layer_3 = tf.keras.activations.gelu(enc_layer_3)

  enc_layer_4 = Dense(latent_vec_dim, activation='linear', )(enc_layer_3) #kernel_regularizer = tf.keras.regularizers.L2(l2=0.01)

  encoder = enc_layer_4

  # Then build the decoder
  dec_layer_1 = Dense(100, kernel_initializer=kernel)(encoder)
  dec_layer_1 = tf.keras.activations.gelu(dec_layer_1)

  dec_layer_2 = Dense(300, kernel_initializer=kernel)(dec_layer_1)
  dec_layer_2 = tf.keras.activations.gelu(dec_layer_2)

  dec_layer_3 = Dense(500, kernel_initializer=kernel)(dec_layer_2)
  dec_layer_3 = tf.keras.activations.gelu(dec_layer_3)

  dec_layer_4 = Dense(input_dim, activation='linear')(dec_layer_3)
  decoder = dec_layer_4

  # Connect both encoder and decoder
  autoencoder = Model(input_layer, decoder, name="Deep_Autoencoder")

  # Latent representation (Optional)
  latent_model = Model(input_layer, encoder)

  return autoencoder, latent_model

In [None]:
autoencoder_elulin_r, latent_model_elulin_r = model_autoencoder_()

# Get summary
autoencoder_elulin_r.summary()

In [None]:
# Compile the autoencoder model
optimizer = tf.keras.optimizers.Adam()
loss = 'mse'
autoencoder_elulin_r.compile(loss=loss, optimizer=optimizer)

# Train the autoencoder with MNIST data
history_elulin_r = autoencoder_elulin_r.fit(X_t, X_t, epochs=120, batch_size=128, callbacks = [early_stop, Checkpoint, lr],
                                       shuffle=True, validation_data=(X_v, X_v))

autoencoder_elulin_r.save(f'/content/drive/MyDrive/Exercises/Autoencoders/gelu_lin/enc/autoencoder')
latent_model_elulin_r.save(f'/content/drive/MyDrive/Exercises/Autoencoders/gelu_lin/enc/encoder')

In [None]:
# Plot training and validation loss scores
# against the number of epochs.
plt.figure(figsize=(10, 7))
plt.plot(history_elulin_r.history['loss'], label='Train')
plt.plot(history_elulin_r.history['val_loss'], label='Validation')
plt.ylabel('MSE')
plt.xlabel('Epoch')
plt.title('Autoencoder Reconstruction Loss - Elu-Sig Combo', pad=13)
plt.legend(loc='upper right')

In [None]:
compressed_images = autoencoder_elulin_r.predict(test_images)
n = 15
plt.figure(figsize=(26, 2))
for i in range(n):
  ax = plt.subplot(1, n, i+1)
  plt.imshow(compressed_images[i].reshape(28, 28), cmap="gray")
  ax.axis('off')

plt.show()

In [None]:
latent_representation = latent_model_elulin_r.predict(test_images)

plt.figure(figsize=(10, 7))

sns.scatterplot(x=latent_representation[:,0],
                y=latent_representation[:,1],
                hue=test_labels, palette='tab10')

plt.xlabel("Encoder first dimension")
plt.ylabel("Encoder second dimension")

plt.grid(linestyle='--', alpha=0.5)

plt.legend(bbox_to_anchor=(1.01, 1),
           borderaxespad=0);

#### 4.10 Hidden: Gelu - Latent: Tanh

In [None]:
def model_autoencoder_(kernel="he_normal"):
  input_dim = 28*28
  latent_vec_dim = 2

  input_layer = Input(shape=(input_dim,))

  # Define the autoencoder architecture
  # First build the encoder
  enc_layer_1 = Dense(500, kernel_initializer=kernel)(input_layer)
  enc_layer_1 = tf.keras.activations.gelu(enc_layer_1)

  enc_layer_2 = Dense(300, kernel_initializer=kernel)(enc_layer_1)
  enc_layer_2 = tf.keras.activations.gelu(enc_layer_2)

  enc_layer_3 = Dense(100, kernel_initializer=kernel)(enc_layer_2)
  enc_layer_3 = tf.keras.activations.gelu(enc_layer_3)

  enc_layer_4 = Dense(latent_vec_dim, activation='tanh', )(enc_layer_3) #kernel_regularizer = tf.keras.regularizers.L2(l2=0.01)

  encoder = enc_layer_4

  # Then build the decoder
  dec_layer_1 = Dense(100, kernel_initializer=kernel)(encoder)
  dec_layer_1 = tf.keras.activations.gelu(dec_layer_1)

  dec_layer_2 = Dense(300, kernel_initializer=kernel)(dec_layer_1)
  dec_layer_2 = tf.keras.activations.gelu(dec_layer_2)

  dec_layer_3 = Dense(500, kernel_initializer=kernel)(dec_layer_2)
  dec_layer_3 = tf.keras.activations.gelu(dec_layer_3)

  dec_layer_4 = Dense(input_dim, activation='linear')(dec_layer_3)
  decoder = dec_layer_4

  # Connect both encoder and decoder
  autoencoder = Model(input_layer, decoder, name="Deep_Autoencoder")

  # Latent representation (Optional)
  latent_model = Model(input_layer, encoder)

  return autoencoder, latent_model

In [None]:
autoencoder_elulin_r, latent_model_elulin_r = model_autoencoder_()

# Get summary
autoencoder_elulin_r.summary()

In [None]:
# Compile the autoencoder model
optimizer = tf.keras.optimizers.Adam()
loss = 'mse'
autoencoder_elulin_r.compile(loss=loss, optimizer=optimizer)

# Train the autoencoder with MNIST data
history_elulin_r = autoencoder_elulin_r.fit(X_t, X_t, epochs=120, batch_size=128, callbacks = [early_stop, Checkpoint, lr],
                                       shuffle=True, validation_data=(X_v, X_v))

autoencoder_elulin_r.save(f'/content/drive/MyDrive/Exercises/Autoencoders/gelu_tahn/enc/autoencoder')
latent_model_elulin_r.save(f'/content/drive/MyDrive/Exercises/Autoencoders/gelu_tahn/enc/encoder')

In [None]:
# Plot training and validation loss scores
# against the number of epochs.
plt.figure(figsize=(10, 7))
plt.plot(history_elulin_r.history['loss'], label='Train')
plt.plot(history_elulin_r.history['val_loss'], label='Validation')
plt.ylabel('MSE')
plt.xlabel('Epoch')
plt.title('Autoencoder Reconstruction Loss - Elu-Sig Combo', pad=13)
plt.legend(loc='upper right')

In [None]:
compressed_images = autoencoder_elulin_r.predict(test_images)
n = 15
plt.figure(figsize=(26, 2))
for i in range(n):
  ax = plt.subplot(1, n, i+1)
  plt.imshow(compressed_images[i].reshape(28, 28), cmap="gray")
  ax.axis('off')

plt.show()

In [None]:
latent_representation = latent_model_elulin_r.predict(test_images)

plt.figure(figsize=(10, 7))

sns.scatterplot(x=latent_representation[:,0],
                y=latent_representation[:,1],
                hue=test_labels, palette='tab10')

plt.xlabel("Encoder first dimension")
plt.ylabel("Encoder second dimension")

plt.grid(linestyle='--', alpha=0.5)

plt.legend(bbox_to_anchor=(1.01, 1),
           borderaxespad=0);

#### 4.11 Hidden: Gelu - Latent: Lin - L1 Reg

In [None]:
def model_autoencoder_(kernel="he_normal"):
  input_dim = 28*28
  latent_vec_dim = 2

  input_layer = Input(shape=(input_dim,))

  # Define the autoencoder architecture
  # First build the encoder
  enc_layer_1 = Dense(500, kernel_initializer=kernel)(input_layer)
  enc_layer_1 = tf.keras.activations.gelu(enc_layer_1)

  enc_layer_2 = Dense(300, kernel_initializer=kernel)(enc_layer_1)
  enc_layer_2 = tf.keras.activations.gelu(enc_layer_2)

  enc_layer_3 = Dense(100, kernel_initializer=kernel)(enc_layer_2)
  enc_layer_3 = tf.keras.activations.gelu(enc_layer_3)

  enc_layer_4 = Dense(latent_vec_dim, activation='linear', kernel_regularizer = tf.keras.regularizers.L1(l1=0.01))(enc_layer_3) #kernel_regularizer = tf.keras.regularizers.L2(l2=0.01)

  encoder = enc_layer_4

  # Then build the decoder
  dec_layer_1 = Dense(100, kernel_initializer=kernel)(encoder)
  dec_layer_1 = tf.keras.activations.gelu(dec_layer_1)

  dec_layer_2 = Dense(300, kernel_initializer=kernel)(dec_layer_1)
  dec_layer_2 = tf.keras.activations.gelu(dec_layer_2)

  dec_layer_3 = Dense(500, kernel_initializer=kernel)(dec_layer_2)
  dec_layer_3 = tf.keras.activations.gelu(dec_layer_3)

  dec_layer_4 = Dense(input_dim, activation='linear')(dec_layer_3)
  decoder = dec_layer_4

  # Connect both encoder and decoder
  autoencoder = Model(input_layer, decoder, name="Deep_Autoencoder")

  # Latent representation (Optional)
  latent_model = Model(input_layer, encoder)

  return autoencoder, latent_model

In [None]:
autoencoder_elulin_r, latent_model_elulin_r = model_autoencoder_()

# Get summary
autoencoder_elulin_r.summary()

In [None]:
# Compile the autoencoder model
optimizer = tf.keras.optimizers.Adam()
loss = 'mse'
autoencoder_elulin_r.compile(loss=loss, optimizer=optimizer)

# Train the autoencoder with MNIST data
history_elulin_r = autoencoder_elulin_r.fit(X_t, X_t, epochs=120, batch_size=128, callbacks = [early_stop, Checkpoint, lr],
                                       shuffle=True, validation_data=(X_v, X_v))

autoencoder_elulin_r.save(f'/content/drive/MyDrive/Exercises/Autoencoders/gelu_lin_l1/enc/autoencoder')
latent_model_elulin_r.save(f'/content/drive/MyDrive/Exercises/Autoencoders/gelu_lin_l1/enc/encoder')

In [None]:
# Plot training and validation loss scores
# against the number of epochs.
plt.figure(figsize=(10, 7))
plt.plot(history_elulin_r.history['loss'], label='Train')
plt.plot(history_elulin_r.history['val_loss'], label='Validation')
plt.ylabel('MSE')
plt.xlabel('Epoch')
plt.title('Autoencoder Reconstruction Loss - Elu-Sig Combo', pad=13)
plt.legend(loc='upper right')

In [None]:
compressed_images = autoencoder_elulin_r.predict(test_images)
n = 15
plt.figure(figsize=(26, 2))
for i in range(n):
  ax = plt.subplot(1, n, i+1)
  plt.imshow(compressed_images[i].reshape(28, 28), cmap="gray")
  ax.axis('off')

plt.show()

In [None]:
latent_representation = latent_model_elulin_r.predict(test_images)

plt.figure(figsize=(10, 7))

sns.scatterplot(x=latent_representation[:,0],
                y=latent_representation[:,1],
                hue=test_labels, palette='tab10')

plt.xlabel("Encoder first dimension")
plt.ylabel("Encoder second dimension")

plt.grid(linestyle='--', alpha=0.5)

plt.legend(bbox_to_anchor=(1.01, 1),
           borderaxespad=0);

## 4.0 Variational Autoencoders Relu-Activation:

In [None]:
class Sampling(layers.Layer):
    """Uses (z_mean, z_log_var) to sample z, the vector encoding a digit."""

    def call(self, inputs):
        z_mean, z_log_var = inputs
        batch = tf.shape(z_mean)[0]
        dim = tf.shape(z_mean)[1]
        epsilon = tf.keras.backend.random_normal(shape=(batch, dim))
        return z_mean + tf.exp(0.5 * z_log_var) * epsilon

In [None]:
input_dim = 28*28
latent_vec_dim = 2

input_layer = Input(shape=(input_dim,))

# Define the autoencoder architecture
# First build the encoder
enc_layer_1 = Dense(500, activation='relu')(input_layer)
enc_layer_2 = Dense(300, activation='relu')(enc_layer_1)
enc_layer_3 = Dense(100, activation='relu')(enc_layer_2)
enc_layer_4 = Dense(32, activation='relu')(enc_layer_3)
z_mean = layers.Dense(latent_vec_dim, name="z_mean")(enc_layer_4)
z_log_var = layers.Dense(latent_vec_dim, name="z_log_var")(enc_layer_4)
z = Sampling()([z_mean, z_log_var])
encoder = keras.Model(input_layer, [z_mean, z_log_var, z], name="encoder")
encoder.summary()


In [None]:
# Then build the decoder
latent_inputs = keras.Input(shape=(latent_vec_dim,))
dec_layer_0 = Dense(32, activation='relu')(latent_inputs)
dec_layer_1 = Dense(100, activation='relu')(dec_layer_0)
dec_layer_2 = Dense(300, activation='relu')(dec_layer_1)
dec_layer_3 = Dense(500, activation='relu')(dec_layer_2)
dec_layer_4 = Dense(input_dim, activation='sigmoid')(dec_layer_3)

decoder = keras.Model(latent_inputs, dec_layer_4, name="decoder")
decoder.summary()

In [None]:
# instantiate VAE model
outputs = decoder(encoder(input_layer)[2])
vae = keras.Model(input_layer, outputs, name='vae_mlp')

What we've done so far allows us to instantiate 3 models:

* an end-to-end autoencoder mapping inputs to reconstructions
* an encoder mapping inputs to the latent space
* a generator that can take points on the latent space and will output the corresponding reconstructed samples.

We train the model using the end-to-end model, with a custom loss function: the sum of a reconstruction term, and the KL divergence regularization term.

In [None]:
reconstruction_loss = keras.losses.binary_crossentropy(input_layer, outputs)
reconstruction_loss *= input_dim
kl_loss = 1 + z_log_var - K.square(z_mean) - K.exp(z_log_var)
kl_loss = K.sum(kl_loss, axis=-1)
kl_loss *= -0.5
vae_loss = K.mean(reconstruction_loss + kl_loss)
vae.add_loss(vae_loss)
vae.compile(optimizer='adam')

In [None]:
history_vae = vae.fit(X_t, X_t,
                      epochs=120,
                      batch_size=128,
                      callbacks = [early_stop, Checkpoint, lr],
                      shuffle=True,
                      validation_data=(X_v, X_v))

In [None]:
latent_representation = encoder.predict(test_images)

plt.figure(figsize=(10, 7))

sns.scatterplot(x=latent_representation[2][:,0],
                y=latent_representation[2][:,1],
                hue=test_labels, palette='tab10')

plt.xlabel("Encoder first dimension")
plt.ylabel("Encoder second dimension")

plt.grid(linestyle='--', alpha=0.5)

plt.legend(bbox_to_anchor=(1.01, 1),
           borderaxespad=0);

In [None]:
plt.figure(figsize=(10, 7))

sns.scatterplot(x=latent_representation[1][:,0],
                y=latent_representation[1][:,1],
                hue=test_labels, palette='tab10')

plt.xlabel("Encoder first dimension")
plt.ylabel("Encoder second dimension")

plt.grid(linestyle='--', alpha=0.5)

plt.legend(bbox_to_anchor=(1.01, 1),
           borderaxespad=0);

In [None]:
# Plot training and validation loss scores
# against the number of epochs.
plt.figure(figsize=(10, 7))
plt.plot(history_vae.history['loss'], label='Train')
plt.plot(history_vae.history['val_loss'], label='Validation')
plt.ylabel('Customized Loss')
plt.xlabel('Epoch')
plt.title('Autoencoder Reconstruction Loss - Nadam', pad=13)
plt.legend(loc='upper right')

In [None]:
compressed_images = vae.predict(test_images)
n = 5
plt.figure(figsize=(9, 2))
for i in range(n):
  ax = plt.subplot(1, n, i+1)
  plt.imshow(compressed_images[i].reshape(28, 28), cmap="gray")

  plt.title(test_labels[i])
  ax.axis('off')

plt.show()

In [None]:
# Display a 2D manifold of the digits
n = 20  # figure with 15x15 digits
digit_size = 28
figure = np.zeros((digit_size * n, digit_size * n))
# We will sample n points within [-15, 15] standard deviations
grid_x = np.linspace(-3, 3, n)
grid_y = np.linspace(-3, 3, n)

for i, yi in enumerate(grid_x):
    for j, xi in enumerate(grid_y):
        z_sample = np.array([[xi, yi]])
        x_decoded = decoder.predict(z_sample)
        digit = x_decoded[0].reshape(digit_size, digit_size)
        figure[i * digit_size: (i + 1) * digit_size,
               j * digit_size: (j + 1) * digit_size] = digit

plt.figure(figsize=(10, 10))
plt.imshow(figure)
plt.show()

In [None]:
vae.save(f'/content/drive/MyDrive/Exercises/Autoencoders/vae/autoencoder')
encoder.save(f'/content/drive/MyDrive/Exercises/Autoencoders/vae/encoder')

## 5.0 Variational Autoencoders Prelu-Activation:

In [None]:
class Sampling(layers.Layer):
    """Uses (z_mean, z_log_var) to sample z, the vector encoding a digit."""

    def call(self, inputs):
        z_mean, z_log_var = inputs
        batch = tf.shape(z_mean)[0]
        dim = tf.shape(z_mean)[1]
        epsilon = tf.keras.backend.random_normal(shape=(batch, dim))
        return z_mean + tf.exp(0.5 * z_log_var) * epsilon

In [None]:
input_dim = 28*28
latent_vec_dim = 2

input_layer = Input(shape=(input_dim,))

# Define the autoencoder architecture
# First build the encoder
enc_layer_1 = Dense(500, kernel_initializer='he_normal')(input_layer)
enc_layer_1 = tf.keras.layers.PReLU()(enc_layer_1)

enc_layer_2 = Dense(300, kernel_initializer='he_normal')(enc_layer_1)
enc_layer_2 = tf.keras.layers.PReLU()(enc_layer_2)

enc_layer_3 = Dense(100, kernel_initializer='he_normal')(enc_layer_2)
enc_layer_3 = tf.keras.layers.PReLU()(enc_layer_3)

enc_layer_4 = Dense(32, kernel_initializer='he_normal')(enc_layer_3)
enc_layer_4 = tf.keras.layers.PReLU()(enc_layer_4)

z_mean = layers.Dense(latent_vec_dim, name="z_mean")(enc_layer_4)
z_log_var = layers.Dense(latent_vec_dim, name="z_log_var")(enc_layer_4)
z = Sampling()([z_mean, z_log_var])
encoder = keras.Model(input_layer, [z_mean, z_log_var, z], name="encoder")
encoder.summary()


In [None]:
# Then build the decoder
latent_inputs = keras.Input(shape=(latent_vec_dim,))

dec_layer_0 = Dense(32, kernel_initializer='he_normal')(latent_inputs)
dec_layer_0 = tf.keras.layers.PReLU()(dec_layer_0)

dec_layer_1 = Dense(100, kernel_initializer='he_normal')(dec_layer_0)
dec_layer_1 = tf.keras.layers.PReLU()(dec_layer_1)


dec_layer_2 = Dense(300, kernel_initializer='he_normal')(dec_layer_1)
dec_layer_2 = tf.keras.layers.PReLU()(dec_layer_2)

dec_layer_3 = Dense(500, kernel_initializer='he_normal')(dec_layer_2)
dec_layer_3 = tf.keras.layers.PReLU()(dec_layer_3)

dec_layer_4 = Dense(input_dim, activation='sigmoid')(dec_layer_3)

decoder = keras.Model(latent_inputs, dec_layer_4, name="decoder")
decoder.summary()

In [None]:
# instantiate VAE model
outputs = decoder(encoder(input_layer)[2])
vae = keras.Model(input_layer, outputs, name='vae_mlp')

What we've done so far allows us to instantiate 3 models:

* an end-to-end autoencoder mapping inputs to reconstructions
* an encoder mapping inputs to the latent space
* a generator that can take points on the latent space and will output the corresponding reconstructed samples.

We train the model using the end-to-end model, with a custom loss function: the sum of a reconstruction term, and the KL divergence regularization term.

In [None]:
reconstruction_loss = keras.losses.binary_crossentropy(input_layer, outputs)
reconstruction_loss *= input_dim
kl_loss = 1 + z_log_var - K.square(z_mean) - K.exp(z_log_var)
kl_loss = K.sum(kl_loss, axis=-1)
kl_loss *= -0.5
vae_loss = K.mean(reconstruction_loss + kl_loss)
vae.add_loss(vae_loss)
vae.compile(optimizer='adam')

In [None]:
history_vae = vae.fit(X_t, X_t,
                      epochs=120,
                      batch_size=128,
                      callbacks = [early_stop, Checkpoint, lr],
                      shuffle=True,
                      validation_data=(X_v, X_v))

In [None]:
latent_representation = encoder.predict(test_images)

plt.figure(figsize=(15, 9))

sns.scatterplot(x=latent_representation[2][:,0],
                y=latent_representation[2][:,1],
                hue=test_labels, palette='tab10')

plt.xlabel("Encoder first dimension")
plt.ylabel("Encoder second dimension")

plt.grid(linestyle='--', alpha=0.5)

plt.legend(bbox_to_anchor=(1.01, 1),
           borderaxespad=0);

In [None]:
plt.figure(figsize=(10, 7))

sns.scatterplot(x=latent_representation[1][:,0],
                y=latent_representation[1][:,1],
                hue=test_labels, palette='tab10')

plt.xlabel("Encoder first dimension")
plt.ylabel("Encoder second dimension")

plt.grid(linestyle='--', alpha=0.5)

plt.legend(bbox_to_anchor=(1.01, 1),
           borderaxespad=0);

In [None]:
# Plot training and validation loss scores
# against the number of epochs.
plt.figure(figsize=(10, 7))
plt.plot(history_vae.history['loss'], label='Train')
plt.plot(history_vae.history['val_loss'], label='Validation')
plt.ylabel('Customized Loss')
plt.xlabel('Epoch')
plt.title('Autoencoder Reconstruction Loss - Nadam', pad=13)
plt.legend(loc='upper right')

In [None]:
compressed_images = vae.predict(test_images)
n = 5
plt.figure(figsize=(9, 2))
for i in range(n):
  ax = plt.subplot(1, n, i+1)
  plt.imshow(compressed_images[i].reshape(28, 28), cmap="gray")

  plt.title(test_labels[i])
  ax.axis('off')

plt.show()

In [None]:
# Display a 2D manifold of the digits
n = 20  # figure with 15x15 digits
digit_size = 28
figure = np.zeros((digit_size * n, digit_size * n))
# We will sample n points within [-15, 15] standard deviations
grid_x = np.linspace(-3, 3, n)
grid_y = np.linspace(-3, 3, n)

for i, yi in enumerate(grid_x):
    for j, xi in enumerate(grid_y):
        z_sample = np.array([[xi, yi]])
        x_decoded = decoder.predict(z_sample)
        digit = x_decoded[0].reshape(digit_size, digit_size)
        figure[i * digit_size: (i + 1) * digit_size,
               j * digit_size: (j + 1) * digit_size] = digit

plt.figure(figsize=(10, 10))
plt.imshow(figure, cmap="gray")
plt.show()

In [None]:
vae.save(f'/content/drive/MyDrive/Exercises/Autoencoders/vae_prelu/autoencoder')
encoder.save(f'/content/drive/MyDrive/Exercises/Autoencoders/vae_prelu/encoder')

## 6.0 Variational Autoencoders Elu-Activation:

In [None]:
class Sampling(layers.Layer):
    """Uses (z_mean, z_log_var) to sample z, the vector encoding a digit."""

    def call(self, inputs):
        z_mean, z_log_var = inputs
        batch = tf.shape(z_mean)[0]
        dim = tf.shape(z_mean)[1]
        epsilon = tf.keras.backend.random_normal(shape=(batch, dim))
        return z_mean + tf.exp(0.5 * z_log_var) * epsilon

In [None]:
input_dim = 28*28
latent_vec_dim = 2

input_layer = Input(shape=(input_dim,))

# Define the autoencoder architecture
# First build the encoder
enc_layer_1 = Dense(500, kernel_initializer='he_normal')(input_layer)
enc_layer_1 = tf.keras.activations.elu(enc_layer_1)

enc_layer_2 = Dense(300, kernel_initializer='he_normal')(enc_layer_1)
enc_layer_2 = tf.keras.activations.elu(enc_layer_2)

enc_layer_3 = Dense(100, kernel_initializer='he_normal')(enc_layer_2)
enc_layer_3 = tf.keras.activations.elu(enc_layer_3)

enc_layer_4 = Dense(32, kernel_initializer='he_normal')(enc_layer_3)
enc_layer_4 = tf.keras.activations.elu(enc_layer_4)

z_mean = layers.Dense(latent_vec_dim, name="z_mean")(enc_layer_4)
z_log_var = layers.Dense(latent_vec_dim, name="z_log_var")(enc_layer_4)
z = Sampling()([z_mean, z_log_var])
encoder = keras.Model(input_layer, [z_mean, z_log_var, z], name="encoder")
encoder.summary()


In [None]:
# Then build the decoder
latent_inputs = keras.Input(shape=(latent_vec_dim,))

dec_layer_0 = Dense(32, kernel_initializer='he_normal')(latent_inputs)
dec_layer_0 = tf.keras.activations.elu(dec_layer_0)

dec_layer_1 = Dense(100, kernel_initializer='he_normal')(dec_layer_0)
dec_layer_1 = tf.keras.activations.elu(dec_layer_1)


dec_layer_2 = Dense(300, kernel_initializer='he_normal')(dec_layer_1)
dec_layer_2 = tf.keras.activations.elu(dec_layer_2)

dec_layer_3 = Dense(500, kernel_initializer='he_normal')(dec_layer_2)
dec_layer_3 = tf.keras.activations.elu(dec_layer_3)

dec_layer_4 = Dense(input_dim, activation='sigmoid')(dec_layer_3)

decoder = keras.Model(latent_inputs, dec_layer_4, name="decoder")
decoder.summary()

In [None]:
# instantiate VAE model
outputs = decoder(encoder(input_layer)[2])
vae = keras.Model(input_layer, outputs, name='vae_mlp')

What we've done so far allows us to instantiate 3 models:

* an end-to-end autoencoder mapping inputs to reconstructions
* an encoder mapping inputs to the latent space
* a generator that can take points on the latent space and will output the corresponding reconstructed samples.

We train the model using the end-to-end model, with a custom loss function: the sum of a reconstruction term, and the KL divergence regularization term.

In [None]:
reconstruction_loss = keras.losses.binary_crossentropy(input_layer, outputs)
reconstruction_loss *= input_dim
kl_loss = 1 + z_log_var - K.square(z_mean) - K.exp(z_log_var)
kl_loss = K.sum(kl_loss, axis=-1)
kl_loss *= -0.5
vae_loss = K.mean(reconstruction_loss + kl_loss)
vae.add_loss(vae_loss)
vae.compile(optimizer='adam')

In [None]:
history_vae = vae.fit(X_t, X_t,
                      epochs=120,
                      batch_size=128,
                      callbacks = [early_stop, Checkpoint, lr],
                      shuffle=True,
                      validation_data=(X_v, X_v))

In [None]:
latent_representation = encoder.predict(test_images)

plt.figure(figsize=(15, 9))

sns.scatterplot(x=latent_representation[2][:,0],
                y=latent_representation[2][:,1],
                hue=test_labels, palette='tab10')

plt.xlabel("Encoder first dimension")
plt.ylabel("Encoder second dimension")

plt.grid(linestyle='--', alpha=0.5)

plt.legend(bbox_to_anchor=(1.01, 1),
           borderaxespad=0);

In [None]:
plt.figure(figsize=(10, 7))

sns.scatterplot(x=latent_representation[1][:,0],
                y=latent_representation[1][:,1],
                hue=test_labels, palette='tab10')

plt.xlabel("Encoder first dimension")
plt.ylabel("Encoder second dimension")

plt.grid(linestyle='--', alpha=0.5)

plt.legend(bbox_to_anchor=(1.01, 1),
           borderaxespad=0);

In [None]:
# Plot training and validation loss scores
# against the number of epochs.
plt.figure(figsize=(10, 7))
plt.plot(history_vae.history['loss'], label='Train')
plt.plot(history_vae.history['val_loss'], label='Validation')
plt.ylabel('Customized Loss')
plt.xlabel('Epoch')
plt.title('Autoencoder Reconstruction Loss - Nadam', pad=13)
plt.legend(loc='upper right')

In [None]:
compressed_images = vae.predict(test_images)
n = 5
plt.figure(figsize=(9, 2))
for i in range(n):
  ax = plt.subplot(1, n, i+1)
  plt.imshow(compressed_images[i].reshape(28, 28), cmap="gray")

  plt.title(test_labels[i])
  ax.axis('off')

plt.show()

In [None]:
# Display a 2D manifold of the digits
n = 20  # figure with 15x15 digits
digit_size = 28
figure = np.zeros((digit_size * n, digit_size * n))
# We will sample n points within [-15, 15] standard deviations
grid_x = np.linspace(-3, 3, n)
grid_y = np.linspace(-3, 3, n)

for i, yi in enumerate(grid_x):
    for j, xi in enumerate(grid_y):
        z_sample = np.array([[xi, yi]])
        x_decoded = decoder.predict(z_sample)
        digit = x_decoded[0].reshape(digit_size, digit_size)
        figure[i * digit_size: (i + 1) * digit_size,
               j * digit_size: (j + 1) * digit_size] = digit

plt.figure(figsize=(10, 10))
plt.imshow(figure, cmap="gray")
plt.show()

In [None]:
vae.save(f'/content/drive/MyDrive/Exercises/Autoencoders/vae_elu/autoencoder')
encoder.save(f'/content/drive/MyDrive/Exercises/Autoencoders/vae_elu/encoder')

## 7.0 Hidden Layers: PRelu - Latent Space: Linear + BatchNorm: MULTIPLE OUTPUT

In [None]:
from tensorflow.keras.utils import to_categorical

y_t_ = to_categorical(y_t)
y_v_ = to_categorical(y_v)
y_test = to_categorical(test_labels)

print(y_t.shape, y_v.shape, y_test.shape)

In [None]:
def model_autoencoder_mult(kernel="he_normal"):
  input_dim = 28*28
  latent_vec_dim = 2

  input_layer = Input(shape=(input_dim,))

  # Define the autoencoder architecture
  # First build the encoder
  enc_layer_1 = Dense(500, kernel_initializer=kernel)(input_layer)
  enc_layer_1 = tf.keras.layers.BatchNormalization()(enc_layer_1)
  enc_layer_1 = tf.keras.layers.PReLU()(enc_layer_1)

  enc_layer_2 = Dense(300, kernel_initializer=kernel)(enc_layer_1)
  enc_layer_2 = tf.keras.layers.BatchNormalization()(enc_layer_2)
  enc_layer_2 = tf.keras.layers.PReLU()(enc_layer_2)

  enc_layer_3 = Dense(100, kernel_initializer=kernel)(enc_layer_2)
  enc_layer_3 = tf.keras.layers.BatchNormalization()(enc_layer_3)
  enc_layer_3 = tf.keras.layers.PReLU()(enc_layer_3)

  enc_layer_4 = Dense(latent_vec_dim, activation='linear')(enc_layer_3)

  output_1 = Dense(10, activation='softmax', name="Class Output")(enc_layer_4)

  encoder = enc_layer_4

  # Then build the decoder
  dec_layer_1 = Dense(100, kernel_initializer=kernel)(encoder)
  dec_layer_1 = tf.keras.layers.BatchNormalization()(dec_layer_1)
  dec_layer_1 = tf.keras.layers.PReLU()(dec_layer_1)

  dec_layer_2 = Dense(300, kernel_initializer=kernel)(dec_layer_1)
  dec_layer_2 = tf.keras.layers.BatchNormalization()(dec_layer_2)
  dec_layer_2 = tf.keras.layers.PReLU()(dec_layer_2)

  dec_layer_3 = Dense(500, kernel_initializer=kernel)(dec_layer_2)
  dec_layer_3 = tf.keras.layers.BatchNormalization()(dec_layer_3)
  dec_layer_3 = tf.keras.layers.PReLU()(dec_layer_3)

  dec_layer_4 = Dense(input_dim, activation='sigmoid', name="Ric. Output")(dec_layer_3)
  decoder = dec_layer_4

  # Connect both encoder and decoder
  autoencoder_mult = Model(inputs = input_layer, outputs = [decoder, output_1], name="Deep_Autoencoder")

  # Latent representation (Optional)
  latent_model = Model(input_layer, encoder)

  return autoencoder_mult, latent_model

In [None]:
autoencoder_prellin, latent_model_prellin = model_autoencoder_mult()

# Get summary
autoencoder_prellin.summary()

In [None]:
# Compile the autoencoder model
optimizer = tf.keras.optimizers.Adam()
loss_0 = 'mse'
loss_1 = 'categorical_crossentropy'#
autoencoder_prellin.compile(loss=[loss_0, loss_1], loss_weights=[0.75,0.25], optimizer=optimizer)

# Train the autoencoder with MNIST data
history_prellin = autoencoder_prellin.fit(X_t, [X_t, y_t], epochs=120, batch_size=128, callbacks = [early_stop, Checkpoint, lr],
                                       shuffle=True, validation_data=(X_v, [X_v, y_v]))

autoencoder_prellin.save(f'/content/drive/MyDrive/Exercises/Autoencoders/prel_lin_bn_mo/enc/autoencoder')
latent_model_prellin.save(f'/content/drive/MyDrive/Exercises/Autoencoders/prel_lin_bn_mo/enc/encoder')

In [None]:
# Plot training and validation loss scores
# against the number of epochs.
plt.figure(figsize=(10, 7))
plt.plot(history_prellin.history['loss'], label='Train')
plt.plot(history_prellin.history['val_loss'], label='Validation')
plt.ylabel('MSE')
plt.xlabel('Epoch')
plt.title('Autoencoder Reconstruction Loss', pad=13)
plt.legend(loc='upper right')

In [None]:
compressed_images = autoencoder_prellin.predict(test_images)[0]
n = 15
plt.figure(figsize=(22, 2))
for i in range(n):
  ax = plt.subplot(1, n, i+1)
  plt.imshow(compressed_images[i].reshape(28, 28), cmap="gray")
  ax.axis('off')

plt.show()

In [None]:
latent_representation = latent_model_prellin.predict(test_images)

plt.figure(figsize=(10, 7))

sns.scatterplot(x=latent_representation[:,0],
                y=latent_representation[:,1],
                hue=test_labels, palette='tab10')

plt.xlabel("Encoder first dimension")
plt.ylabel("Encoder second dimension")

plt.grid(linestyle='--', alpha=0.5)

plt.legend(bbox_to_anchor=(1.01, 1),
           borderaxespad=0);

In [None]:
latent_representation = latent_model_prellin.predict(X_t)

plt.figure(figsize=(10, 7))

sns.scatterplot(x=latent_representation[:,0],
                y=latent_representation[:,1],
                hue=np.argmax(y_t, axis=1), palette='tab10')

plt.xlabel("Encoder first dimension")
plt.ylabel("Encoder second dimension")

plt.grid(linestyle='--', alpha=0.5)

plt.legend(bbox_to_anchor=(1.01, 1),
           borderaxespad=0);

In [None]:
latent_representation = latent_model_prellin.predict(X_v)

plt.figure(figsize=(10, 7))

sns.scatterplot(x=latent_representation[:,0],
                y=latent_representation[:,1],
                hue=np.argmax(y_v, axis=1), palette='tab10')

plt.xlabel("Encoder first dimension")
plt.ylabel("Encoder second dimension")

plt.grid(linestyle='--', alpha=0.5)

plt.legend(bbox_to_anchor=(1.01, 1),
           borderaxespad=0);

## 7.1 Hidden Layers: PRelu - Latent Space: Linear + BatchNorm + Regularization: MULTIPLE OUTPUT

In [None]:
from tensorflow.keras.utils import to_categorical

y_t_ = to_categorical(y_t)
y_v_ = to_categorical(y_v)
y_test = to_categorical(test_labels)

print(y_t.shape, y_v.shape, y_test.shape)

In [None]:
def model_autoencoder_mult(kernel="he_normal"):
  input_dim = 28*28
  latent_vec_dim = 2

  input_layer = Input(shape=(input_dim,))

  # Define the autoencoder architecture
  # First build the encoder
  enc_layer_1 = Dense(500, kernel_initializer=kernel)(input_layer)
  enc_layer_1 = tf.keras.layers.BatchNormalization()(enc_layer_1)
  enc_layer_1 = tf.keras.layers.PReLU()(enc_layer_1)

  enc_layer_2 = Dense(300, kernel_initializer=kernel)(enc_layer_1)
  enc_layer_2 = tf.keras.layers.BatchNormalization()(enc_layer_2)
  enc_layer_2 = tf.keras.layers.PReLU()(enc_layer_2)

  enc_layer_3 = Dense(100, kernel_initializer=kernel)(enc_layer_2)
  enc_layer_3 = tf.keras.layers.BatchNormalization()(enc_layer_3)
  enc_layer_3 = tf.keras.layers.PReLU()(enc_layer_3)

  enc_layer_4 = Dense(latent_vec_dim, activation='linear', kernel_regularizer = tf.keras.regularizers.L2(l2=1.0))(enc_layer_3)

  output_1 = Dense(10, activation='softmax', name="Class_Output")(enc_layer_4)

  encoder = enc_layer_4

  # Then build the decoder
  dec_layer_1 = Dense(100, kernel_initializer=kernel)(encoder)
  dec_layer_1 = tf.keras.layers.BatchNormalization()(dec_layer_1)
  dec_layer_1 = tf.keras.layers.PReLU()(dec_layer_1)

  dec_layer_2 = Dense(300, kernel_initializer=kernel)(dec_layer_1)
  dec_layer_2 = tf.keras.layers.BatchNormalization()(dec_layer_2)
  dec_layer_2 = tf.keras.layers.PReLU()(dec_layer_2)

  dec_layer_3 = Dense(500, kernel_initializer=kernel)(dec_layer_2)
  dec_layer_3 = tf.keras.layers.BatchNormalization()(dec_layer_3)
  dec_layer_3 = tf.keras.layers.PReLU()(dec_layer_3)

  dec_layer_4 = Dense(input_dim, activation='sigmoid', name="Ric_Output")(dec_layer_3)
  decoder = dec_layer_4

  # Connect both encoder and decoder
  autoencoder_mult = tf.keras.Model(inputs = input_layer, outputs = [decoder, output_1], name="Deep_Autoencoder")

  # Latent representation (Optional)
  latent_model = tf.keras.Model(input_layer, encoder)

  return autoencoder_mult, latent_model

In [None]:
autoencoder_prellin, latent_model_prellin = model_autoencoder_mult()

# Get summary
autoencoder_prellin.summary()

In [None]:
# Compile the autoencoder model
optimizer = tf.keras.optimizers.Adam()
loss_0 = 'mse'
loss_1 = 'categorical_crossentropy'#
autoencoder_prellin.compile(loss=[loss_0, loss_1], loss_weights=[0.75,0.25], optimizer=optimizer)

# Train the autoencoder with MNIST data
history_prellin = autoencoder_prellin.fit(X_t, [X_t, y_t_], epochs=120, batch_size=128, callbacks = [early_stop, Checkpoint, lr],
                                       shuffle=True, validation_data=(X_v, [X_v, y_v_]))

autoencoder_prellin.save(f'/content/drive/MyDrive/Exercises/Autoencoders/prel_lin_bn_mo_reg_l2_v2/enc/autoencoder')
latent_model_prellin.save(f'/content/drive/MyDrive/Exercises/Autoencoders/prel_lin_bn_mo_reg_l2_v2/enc/encoder')

In [None]:
# Plot training and validation loss scores
# against the number of epochs.
plt.figure(figsize=(10, 7))
plt.plot(history_prellin.history['loss'], label='Train')
plt.plot(history_prellin.history['val_loss'], label='Validation')
plt.ylabel('MSE')
plt.xlabel('Epoch')
plt.title('Autoencoder Reconstruction Loss', pad=13)
plt.legend(loc='upper right')

In [None]:
compressed_images = autoencoder_prellin.predict(test_images)[0]
n = 15
plt.figure(figsize=(22, 2))
for i in range(n):
  ax = plt.subplot(1, n, i+1)
  plt.imshow(compressed_images[i].reshape(28, 28), cmap="gray")
  ax.axis('off')

plt.show()

In [None]:
latent_representation = latent_model_prellin.predict(test_images)

plt.figure(figsize=(10, 7))

sns.scatterplot(x=latent_representation[:,0],
                y=latent_representation[:,1],
                hue=test_labels, palette='tab10')

plt.xlabel("Encoder first dimension")
plt.ylabel("Encoder second dimension")

plt.grid(linestyle='--', alpha=0.5)

plt.legend(bbox_to_anchor=(1.01, 1),
           borderaxespad=0);

In [None]:
latent_representation = latent_model_prellin.predict(X_t)

plt.figure(figsize=(10, 7))

sns.scatterplot(x=latent_representation[:,0],
                y=latent_representation[:,1],
                hue=y_t, palette='tab10')

plt.xlabel("Encoder first dimension")
plt.ylabel("Encoder second dimension")

plt.grid(linestyle='--', alpha=0.5)

plt.legend(bbox_to_anchor=(1.01, 1),
           borderaxespad=0);

In [None]:
latent_representation = latent_model_prellin.predict(X_v)

plt.figure(figsize=(10, 7))

sns.scatterplot(x=latent_representation[:,0],
                y=latent_representation[:,1],
                hue=y_v, palette='tab10')

plt.xlabel("Encoder first dimension")
plt.ylabel("Encoder second dimension")

plt.grid(linestyle='--', alpha=0.5)

plt.legend(bbox_to_anchor=(1.01, 1),
           borderaxespad=0);

## 7.2 Hidden Layers: PRelu - Latent Space: Linear + BatchNorm + Regularization: MULTIPLE OUTPUT test 2

In [None]:
from tensorflow.keras.utils import to_categorical

y_t_ = to_categorical(y_t)
y_v_ = to_categorical(y_v)
y_test = to_categorical(test_labels)

print(y_t.shape, y_v.shape, y_test.shape)

In [None]:
def model_autoencoder_mult(kernel="he_normal"):
  input_dim = 28*28
  latent_vec_dim = 2

  input_layer = Input(shape=(input_dim,))

  # Define the autoencoder architecture
  # First build the encoder
  enc_layer_1 = Dense(500, kernel_initializer=kernel)(input_layer)
  enc_layer_1 = tf.keras.layers.BatchNormalization()(enc_layer_1)
  enc_layer_1 = tf.keras.layers.PReLU()(enc_layer_1)

  enc_layer_2 = Dense(300, kernel_initializer=kernel)(enc_layer_1)
  enc_layer_2 = tf.keras.layers.BatchNormalization()(enc_layer_2)
  enc_layer_2 = tf.keras.layers.PReLU()(enc_layer_2)

  enc_layer_3 = Dense(100, kernel_initializer=kernel)(enc_layer_2)
  enc_layer_3 = tf.keras.layers.BatchNormalization()(enc_layer_3)
  enc_layer_3 = tf.keras.layers.PReLU()(enc_layer_3)

  enc_layer_4 = Dense(latent_vec_dim, activation='linear', kernel_regularizer = tf.keras.regularizers.L1(l1=0.01))(enc_layer_3)

  output_1 = Dense(10, activation='softmax', name="Class_Output")(enc_layer_4)

  encoder = enc_layer_4

  # Then build the decoder
  dec_layer_1 = Dense(100, kernel_initializer=kernel)(encoder)
  dec_layer_1 = tf.keras.layers.BatchNormalization()(dec_layer_1)
  dec_layer_1 = tf.keras.layers.PReLU()(dec_layer_1)

  dec_layer_2 = Dense(300, kernel_initializer=kernel)(dec_layer_1)
  dec_layer_2 = tf.keras.layers.BatchNormalization()(dec_layer_2)
  dec_layer_2 = tf.keras.layers.PReLU()(dec_layer_2)

  dec_layer_3 = Dense(500, kernel_initializer=kernel)(dec_layer_2)
  dec_layer_3 = tf.keras.layers.BatchNormalization()(dec_layer_3)
  dec_layer_3 = tf.keras.layers.PReLU()(dec_layer_3)

  dec_layer_4 = Dense(input_dim, activation='sigmoid', name="Ric_Output")(dec_layer_3)
  decoder = dec_layer_4

  # Connect both encoder and decoder
  autoencoder_mult = Model(inputs = input_layer, outputs = [decoder, output_1], name="Deep_Autoencoder")

  # Latent representation (Optional)
  latent_model = Model(input_layer, encoder)

  return autoencoder_mult, latent_model

In [None]:
autoencoder_prellin, latent_model_prellin = model_autoencoder_mult()

# Get summary
autoencoder_prellin.summary()

In [None]:
# Compile the autoencoder model
optimizer = tf.keras.optimizers.Adam()
loss_0 = 'mse'
loss_1 = 'categorical_crossentropy'#

autoencoder_prellin.compile(loss=[loss_0, loss_1], loss_weights=[0.75,0.25], optimizer=optimizer)

# Train the autoencoder with MNIST data
history_prellin = autoencoder_prellin.fit(X_t, [X_t, y_t_], epochs=120, batch_size=128, callbacks = [early_stop, Checkpoint, lr],
                                       shuffle=True, validation_data=(X_v, [X_v, y_v_]))

autoencoder_prellin.save(f'/content/drive/MyDrive/Exercises/Autoencoders/prel_lin_bn_mo_reg_l1/enc/autoencoder')
latent_model_prellin.save(f'/content/drive/MyDrive/Exercises/Autoencoders/prel_lin_bn_mo_reg_l1/enc/encoder')

In [None]:
# Plot training and validation loss scores
# against the number of epochs.
plt.figure(figsize=(10, 7))
plt.plot(history_prellin.history['loss'], label='Train')
plt.plot(history_prellin.history['val_loss'], label='Validation')
plt.ylabel('MSE')
plt.xlabel('Epoch')
plt.title('Autoencoder Reconstruction Loss', pad=13)
plt.legend(loc='upper right')

In [None]:
compressed_images = autoencoder_prellin.predict(test_images)[0]
n = 15
plt.figure(figsize=(22, 2))
for i in range(n):
  ax = plt.subplot(1, n, i+1)
  plt.imshow(compressed_images[i].reshape(28, 28), cmap="gray")
  ax.axis('off')

plt.show()

In [None]:
latent_representation = latent_model_prellin.predict(test_images)

plt.figure(figsize=(10, 7))

sns.scatterplot(x=latent_representation[:,0],
                y=latent_representation[:,1],
                hue=test_labels, palette='tab10')

plt.xlabel("Encoder first dimension")
plt.ylabel("Encoder second dimension")

plt.grid(linestyle='--', alpha=0.5)

plt.legend(bbox_to_anchor=(1.01, 1),
           borderaxespad=0);

In [None]:
latent_representation = latent_model_prellin.predict(X_t)

plt.figure(figsize=(10, 7))

sns.scatterplot(x=latent_representation[:,0],
                y=latent_representation[:,1],
                hue=y_t, palette='tab10')

plt.xlabel("Encoder first dimension")
plt.ylabel("Encoder second dimension")

plt.grid(linestyle='--', alpha=0.5)

plt.legend(bbox_to_anchor=(1.01, 1),
           borderaxespad=0);

In [None]:
latent_representation = latent_model_prellin.predict(X_v)

plt.figure(figsize=(10, 7))

sns.scatterplot(x=latent_representation[:,0],
                y=latent_representation[:,1],
                hue=y_v, palette='tab10')

plt.xlabel("Encoder first dimension")
plt.ylabel("Encoder second dimension")

plt.grid(linestyle='--', alpha=0.5)

plt.legend(bbox_to_anchor=(1.01, 1),
           borderaxespad=0);

## 7.3 Hidden Layers: Selu - Latent Space: Linear + BatchNorm + Regularization: MULTIPLE OUTPUT

In [None]:
from tensorflow.keras.utils import to_categorical

y_t_ = to_categorical(y_t)
y_v_ = to_categorical(y_v)
y_test = to_categorical(test_labels)

print(y_t.shape, y_v.shape, y_test.shape)

In [None]:
def model_autoencoder_mult(kernel="lecun_normal"):
  input_dim = 28*28
  latent_vec_dim = 2

  input_layer = Input(shape=(input_dim,))

  # Define the autoencoder architecture
  # First build the encoder
  enc_layer_1 = Dense(500, kernel_initializer=kernel)(input_layer)
  enc_layer_1 = tf.keras.layers.BatchNormalization()(enc_layer_1)
  enc_layer_1 = tf.keras.activations.selu(enc_layer_1)

  enc_layer_2 = Dense(300, kernel_initializer=kernel)(enc_layer_1)
  enc_layer_2 = tf.keras.layers.BatchNormalization()(enc_layer_2)
  enc_layer_2 = tf.keras.activations.selu(enc_layer_2)

  enc_layer_3 = Dense(100, kernel_initializer=kernel)(enc_layer_2)
  enc_layer_3 = tf.keras.layers.BatchNormalization()(enc_layer_3)
  enc_layer_3 = tf.keras.activations.selu(enc_layer_3)

  enc_layer_4 = Dense(latent_vec_dim, activation='linear', kernel_regularizer = tf.keras.regularizers.L2(l2=0.01))(enc_layer_3)
  #enc_layer_4 = tf.keras.layers.BatchNormalization()(enc_layer_4)

  output_1 = Dense(10, activation='softmax', name="Class_Output")(enc_layer_4)

  encoder = enc_layer_4

  # Then build the decoder
  dec_layer_1 = Dense(100, kernel_initializer=kernel)(encoder)
  dec_layer_1 = tf.keras.layers.BatchNormalization()(dec_layer_1)
  dec_layer_1 = tf.keras.activations.selu(dec_layer_1)

  dec_layer_2 = Dense(300, kernel_initializer=kernel)(dec_layer_1)
  dec_layer_2 = tf.keras.layers.BatchNormalization()(dec_layer_2)
  dec_layer_2 = tf.keras.activations.selu(dec_layer_2)

  dec_layer_3 = Dense(500, kernel_initializer=kernel)(dec_layer_2)
  dec_layer_3 = tf.keras.layers.BatchNormalization()(dec_layer_3)
  dec_layer_3 = tf.keras.activations.selu(dec_layer_3)

  dec_layer_4 = Dense(input_dim, activation='sigmoid', name="Ric_Output")(dec_layer_3)
  decoder = dec_layer_4

  # Connect both encoder and decoder
  autoencoder_mult = Model(inputs = input_layer, outputs = [decoder, output_1], name="Deep_Autoencoder")

  # Latent representation (Optional)
  latent_model = Model(input_layer, encoder)

  return autoencoder_mult, latent_model

In [None]:
autoencoder_prellin, latent_model_prellin = model_autoencoder_mult()

# Get summary
autoencoder_prellin.summary()

In [None]:
# Compile the autoencoder model
optimizer = tf.keras.optimizers.Adam()
loss_0 = 'mse'
loss_1 = 'categorical_crossentropy'#

autoencoder_prellin.compile(loss=[loss_0, loss_1], loss_weights=[0.75,0.25], optimizer=optimizer)

# Train the autoencoder with MNIST data
history_prellin = autoencoder_prellin.fit(X_t, [X_t, y_t_], epochs=120, batch_size=128, callbacks = [early_stop, Checkpoint, lr],
                                       shuffle=True, validation_data=(X_v, [X_v, y_v_]))

autoencoder_prellin.save(f'/content/drive/MyDrive/Exercises/Autoencoders/selu_lin_bn_mo_bn/enc/autoencoder')
latent_model_prellin.save(f'/content/drive/MyDrive/Exercises/Autoencoders/selu_lin_bn_mo_bn/enc/encoder')

In [None]:
# Plot training and validation loss scores
# against the number of epochs.
plt.figure(figsize=(10, 7))
plt.plot(history_prellin.history['loss'], label='Train')
plt.plot(history_prellin.history['val_loss'], label='Validation')
plt.ylabel('MSE')
plt.xlabel('Epoch')
plt.title('Autoencoder Reconstruction Loss', pad=13)
plt.legend(loc='upper right')

In [None]:
compressed_images = autoencoder_prellin.predict(test_images)[0]
n = 15
plt.figure(figsize=(22, 2))
for i in range(n):
  ax = plt.subplot(1, n, i+1)
  plt.imshow(compressed_images[i].reshape(28, 28), cmap="gray")
  ax.axis('off')

plt.show()

In [None]:
latent_representation = latent_model_prellin.predict(test_images)

plt.figure(figsize=(10, 7))

sns.scatterplot(x=latent_representation[:,0],
                y=latent_representation[:,1],
                hue=test_labels, palette='tab10')

plt.xlabel("Encoder first dimension")
plt.ylabel("Encoder second dimension")

plt.grid(linestyle='--', alpha=0.5)

plt.legend(bbox_to_anchor=(1.01, 1),
           borderaxespad=0);

In [None]:
latent_representation = latent_model_prellin.predict(X_t)

plt.figure(figsize=(10, 7))

sns.scatterplot(x=latent_representation[:,0],
                y=latent_representation[:,1],
                hue=y_t, palette='tab10')

plt.xlabel("Encoder first dimension")
plt.ylabel("Encoder second dimension")

plt.grid(linestyle='--', alpha=0.5)

plt.legend(bbox_to_anchor=(1.01, 1),
           borderaxespad=0);

In [None]:
latent_representation = latent_model_prellin.predict(X_v)

plt.figure(figsize=(10, 7))

sns.scatterplot(x=latent_representation[:,0],
                y=latent_representation[:,1],
                hue=y_v, palette='tab10')

plt.xlabel("Encoder first dimension")
plt.ylabel("Encoder second dimension")

plt.grid(linestyle='--', alpha=0.5)

plt.legend(bbox_to_anchor=(1.01, 1),
           borderaxespad=0);

## 7.4 Hidden Layers: Selu - Latent Space: Linear + BatchNorm + Regularization L1: MULTIPLE OUTPUT

In [None]:
from tensorflow.keras.utils import to_categorical

y_t_ = to_categorical(y_t)
y_v_ = to_categorical(y_v)
y_test = to_categorical(test_labels)

print(y_t.shape, y_v.shape, y_test.shape)

In [None]:
def model_autoencoder_mult(kernel="lecun_normal"):
  input_dim = 28*28
  latent_vec_dim = 2

  input_layer = Input(shape=(input_dim,))

  # Define the autoencoder architecture
  # First build the encoder
  enc_layer_1 = Dense(500, kernel_initializer=kernel)(input_layer)
  enc_layer_1 = tf.keras.layers.BatchNormalization()(enc_layer_1)
  enc_layer_1 = tf.keras.activations.selu(enc_layer_1)

  enc_layer_2 = Dense(300, kernel_initializer=kernel)(enc_layer_1)
  enc_layer_2 = tf.keras.layers.BatchNormalization()(enc_layer_2)
  enc_layer_2 = tf.keras.activations.selu(enc_layer_2)

  enc_layer_3 = Dense(100, kernel_initializer=kernel)(enc_layer_2)
  enc_layer_3 = tf.keras.layers.BatchNormalization()(enc_layer_3)
  enc_layer_3 = tf.keras.activations.selu(enc_layer_3)

  enc_layer_4 = Dense(latent_vec_dim, activation='linear', kernel_regularizer = tf.keras.regularizers.L1(l1=0.01))(enc_layer_3)
  #enc_layer_4 = tf.keras.layers.BatchNormalization()(enc_layer_4)

  output_1 = Dense(10, activation='softmax', name="Class_Output")(enc_layer_4)

  encoder = enc_layer_4

  # Then build the decoder
  dec_layer_1 = Dense(100, kernel_initializer=kernel)(encoder)
  dec_layer_1 = tf.keras.layers.BatchNormalization()(dec_layer_1)
  dec_layer_1 = tf.keras.activations.selu(dec_layer_1)

  dec_layer_2 = Dense(300, kernel_initializer=kernel)(dec_layer_1)
  dec_layer_2 = tf.keras.layers.BatchNormalization()(dec_layer_2)
  dec_layer_2 = tf.keras.activations.selu(dec_layer_2)

  dec_layer_3 = Dense(500, kernel_initializer=kernel)(dec_layer_2)
  dec_layer_3 = tf.keras.layers.BatchNormalization()(dec_layer_3)
  dec_layer_3 = tf.keras.activations.selu(dec_layer_3)

  dec_layer_4 = Dense(input_dim, activation='sigmoid', name="Ric_Output")(dec_layer_3)
  decoder = dec_layer_4

  # Connect both encoder and decoder
  autoencoder_mult = Model(inputs = input_layer, outputs = [decoder, output_1], name="Deep_Autoencoder")

  # Latent representation (Optional)
  latent_model = Model(input_layer, encoder)

  return autoencoder_mult, latent_model

In [None]:
autoencoder_prellin, latent_model_prellin = model_autoencoder_mult()

# Get summary
autoencoder_prellin.summary()

In [None]:
# Compile the autoencoder model
optimizer = tf.keras.optimizers.Adam()
loss_0 = 'mse'
loss_1 = 'categorical_crossentropy'#

autoencoder_prellin.compile(loss=[loss_0, loss_1], loss_weights=[0.75,0.25], optimizer=optimizer)

# Train the autoencoder with MNIST data
history_prellin = autoencoder_prellin.fit(X_t, [X_t, y_t_], epochs=120, batch_size=128, callbacks = [early_stop, Checkpoint, lr],
                                       shuffle=True, validation_data=(X_v, [X_v, y_v_]))

autoencoder_prellin.save(f'/content/drive/MyDrive/Exercises/Autoencoders/selu_lin_bn_mo_l1/enc/autoencoder')
latent_model_prellin.save(f'/content/drive/MyDrive/Exercises/Autoencoders/selu_lin_bn_mo_l1/enc/encoder')

In [None]:
# Plot training and validation loss scores
# against the number of epochs.
plt.figure(figsize=(10, 7))
plt.plot(history_prellin.history['loss'], label='Train')
plt.plot(history_prellin.history['val_loss'], label='Validation')
plt.ylabel('MSE')
plt.xlabel('Epoch')
plt.title('Autoencoder Reconstruction Loss', pad=13)
plt.legend(loc='upper right')

In [None]:
compressed_images = autoencoder_prellin.predict(test_images)[0]
n = 15
plt.figure(figsize=(22, 2))
for i in range(n):
  ax = plt.subplot(1, n, i+1)
  plt.imshow(compressed_images[i].reshape(28, 28), cmap="gray")
  ax.axis('off')

plt.show()

In [None]:
latent_representation = latent_model_prellin.predict(test_images)

plt.figure(figsize=(10, 7))

sns.scatterplot(x=latent_representation[:,0],
                y=latent_representation[:,1],
                hue=test_labels, palette='tab10')

plt.xlabel("Encoder first dimension")
plt.ylabel("Encoder second dimension")

plt.grid(linestyle='--', alpha=0.5)

plt.legend(bbox_to_anchor=(1.01, 1),
           borderaxespad=0);

In [None]:
latent_representation = latent_model_prellin.predict(X_t)

plt.figure(figsize=(10, 7))

sns.scatterplot(x=latent_representation[:,0],
                y=latent_representation[:,1],
                hue=y_t, palette='tab10')

plt.xlabel("Encoder first dimension")
plt.ylabel("Encoder second dimension")

plt.grid(linestyle='--', alpha=0.5)

plt.legend(bbox_to_anchor=(1.01, 1),
           borderaxespad=0);

In [None]:
latent_representation = latent_model_prellin.predict(X_v)

plt.figure(figsize=(10, 7))

sns.scatterplot(x=latent_representation[:,0],
                y=latent_representation[:,1],
                hue=y_v, palette='tab10')

plt.xlabel("Encoder first dimension")
plt.ylabel("Encoder second dimension")

plt.grid(linestyle='--', alpha=0.5)

plt.legend(bbox_to_anchor=(1.01, 1),
           borderaxespad=0);

## 7.5 Hidden Layers: Elu - Latent Space: Linear: MULTIPLE OUTPUT

In [None]:
from tensorflow.keras.utils import to_categorical

y_t_ = to_categorical(y_t)
y_v_ = to_categorical(y_v)
y_test = to_categorical(test_labels)

print(y_t.shape, y_v.shape, y_test.shape)

In [None]:
def model_autoencoder_mult(kernel="he_normal"):
  input_dim = 28*28
  latent_vec_dim = 2

  input_layer = Input(shape=(input_dim,))

  # Define the autoencoder architecture
  # First build the encoder
  enc_layer_1 = Dense(500, kernel_initializer=kernel)(input_layer)
  #enc_layer_1 = tf.keras.layers.BatchNormalization()(enc_layer_1) #bn worsen the
  enc_layer_1 = tf.keras.activations.elu(enc_layer_1)

  enc_layer_2 = Dense(300, kernel_initializer=kernel)(enc_layer_1)
  #enc_layer_2 = tf.keras.layers.BatchNormalization()(enc_layer_2)
  enc_layer_2 = tf.keras.activations.elu(enc_layer_2)

  enc_layer_3 = Dense(100, kernel_initializer=kernel)(enc_layer_2)
  #enc_layer_3 = tf.keras.layers.BatchNormalization()(enc_layer_3)
  enc_layer_3 = tf.keras.activations.elu(enc_layer_3)

  enc_layer_4 = Dense(latent_vec_dim, activation='linear')(enc_layer_3) #, kernel_regularizer = tf.keras.regularizers.L1(l1=0.01)
  #enc_layer_4 = tf.keras.layers.BatchNormalization()(enc_layer_4)

  output_1 = Dense(10, activation='softmax', name="Class_Output")(enc_layer_4)

  encoder = enc_layer_4

  # Then build the decoder
  dec_layer_1 = Dense(100, kernel_initializer=kernel)(encoder)
  #dec_layer_1 = tf.keras.layers.BatchNormalization()(dec_layer_1)
  dec_layer_1 = tf.keras.activations.elu(dec_layer_1)

  dec_layer_2 = Dense(300, kernel_initializer=kernel)(dec_layer_1)
  #dec_layer_2 = tf.keras.layers.BatchNormalization()(dec_layer_2)
  dec_layer_2 = tf.keras.activations.elu(dec_layer_2)

  dec_layer_3 = Dense(500, kernel_initializer=kernel)(dec_layer_2)
  #dec_layer_3 = tf.keras.layers.BatchNormalization()(dec_layer_3)
  dec_layer_3 = tf.keras.activations.elu(dec_layer_3)

  dec_layer_4 = Dense(input_dim, activation='sigmoid', name="Ric_Output")(dec_layer_3)
  decoder = dec_layer_4

  # Connect both encoder and decoder
  autoencoder_mult = Model(inputs = input_layer, outputs = [decoder, output_1], name="Deep_Autoencoder")

  # Latent representation (Optional)
  latent_model = Model(input_layer, encoder)

  return autoencoder_mult, latent_model

In [None]:
autoencoder_prellin, latent_model_prellin = model_autoencoder_mult()

# Get summary
autoencoder_prellin.summary()

In [None]:
# Compile the autoencoder model
optimizer = tf.keras.optimizers.Adam()
loss_0 = 'mse'
loss_1 = 'categorical_crossentropy'#

autoencoder_prellin.compile(loss=[loss_0, loss_1], loss_weights=[0.75,0.25], optimizer=optimizer)

# Train the autoencoder with MNIST data
history_prellin = autoencoder_prellin.fit(X_t, [X_t, y_t_], epochs=120, batch_size=128, callbacks = [early_stop, Checkpoint, lr],
                                       shuffle=True, validation_data=(X_v, [X_v, y_v_]))

autoencoder_prellin.save(f'/content/drive/MyDrive/Exercises/Autoencoders/elu_lin_mo/enc/autoencoder')
latent_model_prellin.save(f'/content/drive/MyDrive/Exercises/Autoencoders/elu_lin_mo/enc/encoder')

In [None]:
# Plot training and validation loss scores
# against the number of epochs.
plt.figure(figsize=(10, 7))
plt.plot(history_prellin.history['loss'], label='Train')
plt.plot(history_prellin.history['val_loss'], label='Validation')
plt.ylabel('MSE')
plt.xlabel('Epoch')
plt.title('Autoencoder Reconstruction Loss', pad=13)
plt.legend(loc='upper right')

In [None]:
compressed_images = autoencoder_prellin.predict(test_images)[0]
n = 15
plt.figure(figsize=(22, 2))
for i in range(n):
  ax = plt.subplot(1, n, i+1)
  plt.imshow(compressed_images[i].reshape(28, 28), cmap="gray")
  ax.axis('off')

plt.show()

In [None]:
latent_representation = latent_model_prellin.predict(test_images)

plt.figure(figsize=(10, 7))

sns.scatterplot(x=latent_representation[:,0],
                y=latent_representation[:,1],
                hue=test_labels, palette='tab10')

plt.xlabel("Encoder first dimension")
plt.ylabel("Encoder second dimension")

plt.grid(linestyle='--', alpha=0.5)

plt.legend(bbox_to_anchor=(1.01, 1),
           borderaxespad=0);

In [None]:
latent_representation = latent_model_prellin.predict(X_t)

plt.figure(figsize=(10, 7))

sns.scatterplot(x=latent_representation[:,0],
                y=latent_representation[:,1],
                hue=y_t, palette='tab10')

plt.xlabel("Encoder first dimension")
plt.ylabel("Encoder second dimension")

plt.grid(linestyle='--', alpha=0.5)

plt.legend(bbox_to_anchor=(1.01, 1),
           borderaxespad=0);

In [None]:
latent_representation = latent_model_prellin.predict(X_v)

plt.figure(figsize=(10, 7))

sns.scatterplot(x=latent_representation[:,0],
                y=latent_representation[:,1],
                hue=y_v, palette='tab10')

plt.xlabel("Encoder first dimension")
plt.ylabel("Encoder second dimension")

plt.grid(linestyle='--', alpha=0.5)

plt.legend(bbox_to_anchor=(1.01, 1),
           borderaxespad=0);

## 7.6 Hidden Layers: Elu - Latent Space: Linear: MULTIPLE OUTPUT and L1 reg

In [None]:
from tensorflow.keras.utils import to_categorical

y_t_ = to_categorical(y_t)
y_v_ = to_categorical(y_v)
y_test = to_categorical(test_labels)

print(y_t.shape, y_v.shape, y_test.shape)

In [None]:
def model_autoencoder_mult(kernel="he_normal"):
  input_dim = 28*28
  latent_vec_dim = 2

  input_layer = Input(shape=(input_dim,))

  # Define the autoencoder architecture
  # First build the encoder
  enc_layer_1 = Dense(500, kernel_initializer=kernel)(input_layer)
  #enc_layer_1 = tf.keras.layers.BatchNormalization()(enc_layer_1) #bn worsen the
  enc_layer_1 = tf.keras.activations.elu(enc_layer_1)

  enc_layer_2 = Dense(300, kernel_initializer=kernel)(enc_layer_1)
  #enc_layer_2 = tf.keras.layers.BatchNormalization()(enc_layer_2)
  enc_layer_2 = tf.keras.activations.elu(enc_layer_2)

  enc_layer_3 = Dense(100, kernel_initializer=kernel)(enc_layer_2)
  #enc_layer_3 = tf.keras.layers.BatchNormalization()(enc_layer_3)
  enc_layer_3 = tf.keras.activations.elu(enc_layer_3)

  enc_layer_4 = Dense(latent_vec_dim, activation='linear', kernel_regularizer = tf.keras.regularizers.L1(l1=0.01))(enc_layer_3) #, kernel_regularizer = tf.keras.regularizers.L1(l1=0.01)
  #enc_layer_4 = tf.keras.layers.BatchNormalization()(enc_layer_4)

  output_1 = Dense(10, activation='softmax', name="Class_Output")(enc_layer_4)

  encoder = enc_layer_4

  # Then build the decoder
  dec_layer_1 = Dense(100, kernel_initializer=kernel)(encoder)
  #dec_layer_1 = tf.keras.layers.BatchNormalization()(dec_layer_1)
  dec_layer_1 = tf.keras.activations.elu(dec_layer_1)

  dec_layer_2 = Dense(300, kernel_initializer=kernel)(dec_layer_1)
  #dec_layer_2 = tf.keras.layers.BatchNormalization()(dec_layer_2)
  dec_layer_2 = tf.keras.activations.elu(dec_layer_2)

  dec_layer_3 = Dense(500, kernel_initializer=kernel)(dec_layer_2)
  #dec_layer_3 = tf.keras.layers.BatchNormalization()(dec_layer_3)
  dec_layer_3 = tf.keras.activations.elu(dec_layer_3)

  dec_layer_4 = Dense(input_dim, activation='sigmoid', name="Ric_Output")(dec_layer_3)
  decoder = dec_layer_4

  # Connect both encoder and decoder
  autoencoder_mult = Model(inputs = input_layer, outputs = [decoder, output_1], name="Deep_Autoencoder")

  # Latent representation (Optional)
  latent_model = Model(input_layer, encoder)

  return autoencoder_mult, latent_model

In [None]:
autoencoder_prellin, latent_model_prellin = model_autoencoder_mult()

# Get summary
autoencoder_prellin.summary()

In [None]:
# Compile the autoencoder model
optimizer = tf.keras.optimizers.Adam()
loss_0 = 'mse'
loss_1 = 'categorical_crossentropy'#

autoencoder_prellin.compile(loss=[loss_0, loss_1], loss_weights=[0.75,0.25], optimizer=optimizer)

# Train the autoencoder with MNIST data
history_prellin = autoencoder_prellin.fit(X_t, [X_t, y_t_], epochs=120, batch_size=128, callbacks = [early_stop, Checkpoint, lr],
                                       shuffle=True, validation_data=(X_v, [X_v, y_v_]))

autoencoder_prellin.save(f'/content/drive/MyDrive/Exercises/Autoencoders/elu_lin_l1_mo/enc/autoencoder')
latent_model_prellin.save(f'/content/drive/MyDrive/Exercises/Autoencoders/elu_lin_l1_mo/enc/encoder')

In [None]:
# Plot training and validation loss scores
# against the number of epochs.
plt.figure(figsize=(10, 7))
plt.plot(history_prellin.history['loss'], label='Train')
plt.plot(history_prellin.history['val_loss'], label='Validation')
plt.ylabel('MSE')
plt.xlabel('Epoch')
plt.title('Autoencoder Reconstruction Loss', pad=13)
plt.legend(loc='upper right')

In [None]:
compressed_images = autoencoder_prellin.predict(test_images)[0]
n = 15
plt.figure(figsize=(22, 2))
for i in range(n):
  ax = plt.subplot(1, n, i+1)
  plt.imshow(compressed_images[i].reshape(28, 28), cmap="gray")
  ax.axis('off')

plt.show()

In [None]:
latent_representation = latent_model_prellin.predict(test_images)

plt.figure(figsize=(10, 7))

sns.scatterplot(x=latent_representation[:,0],
                y=latent_representation[:,1],
                hue=test_labels, palette='tab10')

plt.xlabel("Encoder first dimension")
plt.ylabel("Encoder second dimension")

plt.grid(linestyle='--', alpha=0.5)

plt.legend(bbox_to_anchor=(1.01, 1),
           borderaxespad=0);

In [None]:
latent_representation = latent_model_prellin.predict(X_t)

plt.figure(figsize=(10, 7))

sns.scatterplot(x=latent_representation[:,0],
                y=latent_representation[:,1],
                hue=y_t, palette='tab10')

plt.xlabel("Encoder first dimension")
plt.ylabel("Encoder second dimension")

plt.grid(linestyle='--', alpha=0.5)

plt.legend(bbox_to_anchor=(1.01, 1),
           borderaxespad=0);

In [None]:
latent_representation = latent_model_prellin.predict(X_v)

plt.figure(figsize=(10, 7))

sns.scatterplot(x=latent_representation[:,0],
                y=latent_representation[:,1],
                hue=y_v, palette='tab10')

plt.xlabel("Encoder first dimension")
plt.ylabel("Encoder second dimension")

plt.grid(linestyle='--', alpha=0.5)

plt.legend(bbox_to_anchor=(1.01, 1),
           borderaxespad=0);

## 7.7 Hidden Layers: Relu - Latent Space: Linear: MULTIPLE OUTPUT

In [None]:
from tensorflow.keras.utils import to_categorical

y_t_ = to_categorical(y_t)
y_v_ = to_categorical(y_v)
y_test = to_categorical(test_labels)

print(y_t.shape, y_v.shape, y_test.shape)

In [None]:
def model_autoencoder_mult(kernel="he_normal"):
  input_dim = 28*28
  latent_vec_dim = 2

  input_layer = Input(shape=(input_dim,))

  # Define the autoencoder architecture
  # First build the encoder
  enc_layer_1 = Dense(500, kernel_initializer=kernel)(input_layer)
  enc_layer_1 = tf.keras.layers.BatchNormalization()(enc_layer_1) #bn worsen the
  enc_layer_1 = tf.keras.activations.relu(enc_layer_1)

  enc_layer_2 = Dense(300, kernel_initializer=kernel)(enc_layer_1)
  enc_layer_2 = tf.keras.layers.BatchNormalization()(enc_layer_2)
  enc_layer_2 = tf.keras.activations.relu(enc_layer_2)

  enc_layer_3 = Dense(100, kernel_initializer=kernel)(enc_layer_2)
  enc_layer_3 = tf.keras.layers.BatchNormalization()(enc_layer_3)
  enc_layer_3 = tf.keras.activations.relu(enc_layer_3)

  enc_layer_4 = Dense(latent_vec_dim, activation='linear')(enc_layer_3) #, kernel_regularizer = tf.keras.regularizers.L1(l1=0.01)
  #enc_layer_4 = tf.keras.layers.BatchNormalization()(enc_layer_4)

  output_1 = Dense(10, activation='softmax', name="Class_Output")(enc_layer_4)

  encoder = enc_layer_4

  # Then build the decoder
  dec_layer_1 = Dense(100, kernel_initializer=kernel)(encoder)
  dec_layer_1 = tf.keras.layers.BatchNormalization()(dec_layer_1)
  dec_layer_1 = tf.keras.activations.relu(dec_layer_1)

  dec_layer_2 = Dense(300, kernel_initializer=kernel)(dec_layer_1)
  dec_layer_2 = tf.keras.layers.BatchNormalization()(dec_layer_2)
  dec_layer_2 = tf.keras.activations.relu(dec_layer_2)

  dec_layer_3 = Dense(500, kernel_initializer=kernel)(dec_layer_2)
  dec_layer_3 = tf.keras.layers.BatchNormalization()(dec_layer_3)
  dec_layer_3 = tf.keras.activations.relu(dec_layer_3)

  dec_layer_4 = Dense(input_dim, activation='sigmoid', name="Ric_Output")(dec_layer_3)
  decoder = dec_layer_4

  # Connect both encoder and decoder
  autoencoder_mult = Model(inputs = input_layer, outputs = [decoder, output_1], name="Deep_Autoencoder")

  # Latent representation (Optional)
  latent_model = Model(input_layer, encoder)

  return autoencoder_mult, latent_model

In [None]:
autoencoder_prellin, latent_model_prellin = model_autoencoder_mult()

# Get summary
autoencoder_prellin.summary()

In [None]:
# Compile the autoencoder model
optimizer = tf.keras.optimizers.Adam()
loss_0 = 'mse'
loss_1 = 'categorical_crossentropy'#

autoencoder_prellin.compile(loss=[loss_0, loss_1], loss_weights=[0.75,0.25], optimizer=optimizer)

# Train the autoencoder with MNIST data
history_prellin = autoencoder_prellin.fit(X_t, [X_t, y_t_], epochs=120, batch_size=128, callbacks = [early_stop, Checkpoint, lr],
                                       shuffle=True, validation_data=(X_v, [X_v, y_v_]))

autoencoder_prellin.save(f'/content/drive/MyDrive/Exercises/Autoencoders/relu_lin_bn_mo/enc/autoencoder')
latent_model_prellin.save(f'/content/drive/MyDrive/Exercises/Autoencoders/relu_lin_bn_mo/enc/encoder')

In [None]:
# Plot training and validation loss scores
# against the number of epochs.
plt.figure(figsize=(10, 7))
plt.plot(history_prellin.history['loss'], label='Train')
plt.plot(history_prellin.history['val_loss'], label='Validation')
plt.ylabel('MSE')
plt.xlabel('Epoch')
plt.title('Autoencoder Reconstruction Loss', pad=13)
plt.legend(loc='upper right')

In [None]:
compressed_images = autoencoder_prellin.predict(test_images)[0]
n = 15
plt.figure(figsize=(22, 2))
for i in range(n):
  ax = plt.subplot(1, n, i+1)
  plt.imshow(compressed_images[i].reshape(28, 28), cmap="gray")
  ax.axis('off')

plt.show()

In [None]:
latent_representation = latent_model_prellin.predict(test_images)

plt.figure(figsize=(10, 7))

sns.scatterplot(x=latent_representation[:,0],
                y=latent_representation[:,1],
                hue=test_labels, palette='tab10')

plt.xlabel("Encoder first dimension")
plt.ylabel("Encoder second dimension")

plt.grid(linestyle='--', alpha=0.5)

plt.legend(bbox_to_anchor=(1.01, 1),
           borderaxespad=0);

In [None]:
latent_representation = latent_model_prellin.predict(X_t)

plt.figure(figsize=(10, 7))

sns.scatterplot(x=latent_representation[:,0],
                y=latent_representation[:,1],
                hue=y_t, palette='tab10')

plt.xlabel("Encoder first dimension")
plt.ylabel("Encoder second dimension")

plt.grid(linestyle='--', alpha=0.5)

plt.legend(bbox_to_anchor=(1.01, 1),
           borderaxespad=0);

In [None]:
latent_representation = latent_model_prellin.predict(X_v)

plt.figure(figsize=(10, 7))

sns.scatterplot(x=latent_representation[:,0],
                y=latent_representation[:,1],
                hue=y_v, palette='tab10')

plt.xlabel("Encoder first dimension")
plt.ylabel("Encoder second dimension")

plt.grid(linestyle='--', alpha=0.5)

plt.legend(bbox_to_anchor=(1.01, 1),
           borderaxespad=0);

## 7.8 Hidden Layers: Relu - Latent Space: Linear: MULTIPLE OUTPUT and L1 reg

In [None]:
from tensorflow.keras.utils import to_categorical

y_t_ = to_categorical(y_t)
y_v_ = to_categorical(y_v)
y_test = to_categorical(test_labels)

print(y_t.shape, y_v.shape, y_test.shape)

In [None]:
def model_autoencoder_mult(kernel="he_normal"):
  input_dim = 28*28
  latent_vec_dim = 2

  input_layer = Input(shape=(input_dim,))

  # Define the autoencoder architecture
  # First build the encoder
  enc_layer_1 = Dense(500, kernel_initializer=kernel)(input_layer)
  enc_layer_1 = tf.keras.layers.BatchNormalization()(enc_layer_1) #bn worsen the
  enc_layer_1 = tf.keras.activations.relu(enc_layer_1)

  enc_layer_2 = Dense(300, kernel_initializer=kernel)(enc_layer_1)
  enc_layer_2 = tf.keras.layers.BatchNormalization()(enc_layer_2)
  enc_layer_2 = tf.keras.activations.relu(enc_layer_2)

  enc_layer_3 = Dense(100, kernel_initializer=kernel)(enc_layer_2)
  enc_layer_3 = tf.keras.layers.BatchNormalization()(enc_layer_3)
  enc_layer_3 = tf.keras.activations.relu(enc_layer_3)

  enc_layer_4 = Dense(latent_vec_dim, activation='linear', kernel_regularizer = tf.keras.regularizers.L1(l1=0.01))(enc_layer_3) #, kernel_regularizer = tf.keras.regularizers.L1(l1=0.01)
  #enc_layer_4 = tf.keras.layers.BatchNormalization()(enc_layer_4)

  output_1 = Dense(10, activation='softmax', name="Class_Output")(enc_layer_4)

  encoder = enc_layer_4

  # Then build the decoder
  dec_layer_1 = Dense(100, kernel_initializer=kernel)(encoder)
  dec_layer_1 = tf.keras.layers.BatchNormalization()(dec_layer_1)
  dec_layer_1 = tf.keras.activations.relu(dec_layer_1)

  dec_layer_2 = Dense(300, kernel_initializer=kernel)(dec_layer_1)
  dec_layer_2 = tf.keras.layers.BatchNormalization()(dec_layer_2)
  dec_layer_2 = tf.keras.activations.relu(dec_layer_2)

  dec_layer_3 = Dense(500, kernel_initializer=kernel)(dec_layer_2)
  dec_layer_3 = tf.keras.layers.BatchNormalization()(dec_layer_3)
  dec_layer_3 = tf.keras.activations.relu(dec_layer_3)

  dec_layer_4 = Dense(input_dim, activation='sigmoid', name="Ric_Output")(dec_layer_3)
  decoder = dec_layer_4

  # Connect both encoder and decoder
  autoencoder_mult = Model(inputs = input_layer, outputs = [decoder, output_1], name="Deep_Autoencoder")

  # Latent representation (Optional)
  latent_model = Model(input_layer, encoder)

  return autoencoder_mult, latent_model

In [None]:
autoencoder_prellin, latent_model_prellin = model_autoencoder_mult()

# Get summary
autoencoder_prellin.summary()

In [None]:
# Compile the autoencoder model
optimizer = tf.keras.optimizers.Adam()
loss_0 = 'mse'
loss_1 = 'categorical_crossentropy'#

autoencoder_prellin.compile(loss=[loss_0, loss_1], loss_weights=[0.75,0.25], optimizer=optimizer)

# Train the autoencoder with MNIST data
history_prellin = autoencoder_prellin.fit(X_t, [X_t, y_t_], epochs=120, batch_size=128, callbacks = [early_stop, Checkpoint, lr],
                                       shuffle=True, validation_data=(X_v, [X_v, y_v_]))

autoencoder_prellin.save(f'/content/drive/MyDrive/Exercises/Autoencoders/relu_lin_bn_mo/enc/autoencoder')
latent_model_prellin.save(f'/content/drive/MyDrive/Exercises/Autoencoders/relu_lin_bn_mo/enc/encoder')

In [None]:
# Plot training and validation loss scores
# against the number of epochs.
plt.figure(figsize=(10, 7))
plt.plot(history_prellin.history['loss'], label='Train')
plt.plot(history_prellin.history['val_loss'], label='Validation')
plt.ylabel('MSE')
plt.xlabel('Epoch')
plt.title('Autoencoder Reconstruction Loss', pad=13)
plt.legend(loc='upper right')

In [None]:
compressed_images = autoencoder_prellin.predict(test_images)[0]
n = 15
plt.figure(figsize=(22, 2))
for i in range(n):
  ax = plt.subplot(1, n, i+1)
  plt.imshow(compressed_images[i].reshape(28, 28), cmap="gray")
  ax.axis('off')

plt.show()

In [None]:
latent_representation = latent_model_prellin.predict(test_images)

plt.figure(figsize=(10, 7))

sns.scatterplot(x=latent_representation[:,0],
                y=latent_representation[:,1],
                hue=test_labels, palette='tab10')

plt.xlabel("Encoder first dimension")
plt.ylabel("Encoder second dimension")

plt.grid(linestyle='--', alpha=0.5)

plt.legend(bbox_to_anchor=(1.01, 1),
           borderaxespad=0);

In [None]:
latent_representation = latent_model_prellin.predict(X_t)

plt.figure(figsize=(10, 7))

sns.scatterplot(x=latent_representation[:,0],
                y=latent_representation[:,1],
                hue=y_t, palette='tab10')

plt.xlabel("Encoder first dimension")
plt.ylabel("Encoder second dimension")

plt.grid(linestyle='--', alpha=0.5)

plt.legend(bbox_to_anchor=(1.01, 1),
           borderaxespad=0);

In [None]:
latent_representation = latent_model_prellin.predict(X_v)

plt.figure(figsize=(10, 7))

sns.scatterplot(x=latent_representation[:,0],
                y=latent_representation[:,1],
                hue=y_v, palette='tab10')

plt.xlabel("Encoder first dimension")
plt.ylabel("Encoder second dimension")

plt.grid(linestyle='--', alpha=0.5)

plt.legend(bbox_to_anchor=(1.01, 1),
           borderaxespad=0);

## 7.9 Hidden Layers: Relu - Latent Space: Linear: MULTIPLE OUTPUT and L2 reg

In [None]:
from tensorflow.keras.utils import to_categorical

y_t_ = to_categorical(y_t)
y_v_ = to_categorical(y_v)
y_test = to_categorical(test_labels)

print(y_t.shape, y_v.shape, y_test.shape)

In [None]:
def model_autoencoder_mult(kernel="he_normal"):
  input_dim = 28*28
  latent_vec_dim = 2

  input_layer = Input(shape=(input_dim,))

  # Define the autoencoder architecture
  # First build the encoder
  enc_layer_1 = Dense(500, kernel_initializer=kernel)(input_layer)
  enc_layer_1 = tf.keras.layers.BatchNormalization()(enc_layer_1) #bn worsen the
  enc_layer_1 = tf.keras.activations.relu(enc_layer_1)

  enc_layer_2 = Dense(300, kernel_initializer=kernel)(enc_layer_1)
  enc_layer_2 = tf.keras.layers.BatchNormalization()(enc_layer_2)
  enc_layer_2 = tf.keras.activations.relu(enc_layer_2)

  enc_layer_3 = Dense(100, kernel_initializer=kernel)(enc_layer_2)
  enc_layer_3 = tf.keras.layers.BatchNormalization()(enc_layer_3)
  enc_layer_3 = tf.keras.activations.relu(enc_layer_3)

  enc_layer_4 = Dense(latent_vec_dim, activation='linear', kernel_regularizer = tf.keras.regularizers.L2(l2=0.1))(enc_layer_3) #, kernel_regularizer = tf.keras.regularizers.L1(l1=0.01)
  #enc_layer_4 = tf.keras.layers.BatchNormalization()(enc_layer_4)

  output_1 = Dense(10, activation='softmax', name="Class_Output")(enc_layer_4)

  encoder = enc_layer_4

  # Then build the decoder
  dec_layer_1 = Dense(100, kernel_initializer=kernel)(encoder)
  dec_layer_1 = tf.keras.layers.BatchNormalization()(dec_layer_1)
  dec_layer_1 = tf.keras.activations.relu(dec_layer_1)

  dec_layer_2 = Dense(300, kernel_initializer=kernel)(dec_layer_1)
  dec_layer_2 = tf.keras.layers.BatchNormalization()(dec_layer_2)
  dec_layer_2 = tf.keras.activations.relu(dec_layer_2)

  dec_layer_3 = Dense(500, kernel_initializer=kernel)(dec_layer_2)
  dec_layer_3 = tf.keras.layers.BatchNormalization()(dec_layer_3)
  dec_layer_3 = tf.keras.activations.relu(dec_layer_3)

  dec_layer_4 = Dense(input_dim, activation='sigmoid', name="Ric_Output")(dec_layer_3)
  decoder = dec_layer_4

  # Connect both encoder and decoder
  autoencoder_mult = Model(inputs = input_layer, outputs = [decoder, output_1], name="Deep_Autoencoder")

  # Latent representation (Optional)
  latent_model = Model(input_layer, encoder)

  return autoencoder_mult, latent_model

In [None]:
autoencoder_prellin, latent_model_prellin = model_autoencoder_mult()

# Get summary
autoencoder_prellin.summary()

In [None]:
# Compile the autoencoder model
optimizer = tf.keras.optimizers.Adam()
loss_0 = 'mse'
loss_1 = 'categorical_crossentropy'#

autoencoder_prellin.compile(loss=[loss_0, loss_1], loss_weights=[0.75,0.25], optimizer=optimizer)

# Train the autoencoder with MNIST data
history_prellin = autoencoder_prellin.fit(X_t, [X_t, y_t_], epochs=120, batch_size=128, callbacks = [early_stop, Checkpoint, lr],
                                       shuffle=True, validation_data=(X_v, [X_v, y_v_]))

autoencoder_prellin.save(f'/content/drive/MyDrive/Exercises/Autoencoders/relu_lin_bn_l2_mo/enc/autoencoder')
latent_model_prellin.save(f'/content/drive/MyDrive/Exercises/Autoencoders/relu_lin_bn_l2_mo/enc/encoder')

In [None]:
# Plot training and validation loss scores
# against the number of epochs.
plt.figure(figsize=(10, 7))
plt.plot(history_prellin.history['loss'], label='Train')
plt.plot(history_prellin.history['val_loss'], label='Validation')
plt.ylabel('MSE')
plt.xlabel('Epoch')
plt.title('Autoencoder Reconstruction Loss', pad=13)
plt.legend(loc='upper right')

In [None]:
compressed_images = autoencoder_prellin.predict(test_images)[0]
n = 15
plt.figure(figsize=(22, 2))
for i in range(n):
  ax = plt.subplot(1, n, i+1)
  plt.imshow(compressed_images[i].reshape(28, 28), cmap="gray")
  ax.axis('off')

plt.show()

In [None]:
latent_representation = latent_model_prellin.predict(test_images)

plt.figure(figsize=(10, 7))

sns.scatterplot(x=latent_representation[:,0],
                y=latent_representation[:,1],
                hue=test_labels, palette='tab10')

plt.xlabel("Encoder first dimension")
plt.ylabel("Encoder second dimension")

plt.grid(linestyle='--', alpha=0.5)

plt.legend(bbox_to_anchor=(1.01, 1),
           borderaxespad=0);

In [None]:
latent_representation = latent_model_prellin.predict(X_t)

plt.figure(figsize=(10, 7))

sns.scatterplot(x=latent_representation[:,0],
                y=latent_representation[:,1],
                hue=y_t, palette='tab10')

plt.xlabel("Encoder first dimension")
plt.ylabel("Encoder second dimension")

plt.grid(linestyle='--', alpha=0.5)

plt.legend(bbox_to_anchor=(1.01, 1),
           borderaxespad=0);

In [None]:
latent_representation = latent_model_prellin.predict(X_v)

plt.figure(figsize=(10, 7))

sns.scatterplot(x=latent_representation[:,0],
                y=latent_representation[:,1],
                hue=y_v, palette='tab10')

plt.xlabel("Encoder first dimension")
plt.ylabel("Encoder second dimension")

plt.grid(linestyle='--', alpha=0.5)

plt.legend(bbox_to_anchor=(1.01, 1),
           borderaxespad=0);

## 7.10 Hidden Layers: Relu - Latent Space: Linear: MULTIPLE OUTPUT and L2 reg v2

In [None]:
from tensorflow.keras.utils import to_categorical

y_t_ = to_categorical(y_t)
y_v_ = to_categorical(y_v)
y_test = to_categorical(test_labels)

print(y_t.shape, y_v.shape, y_test.shape)

In [None]:
def model_autoencoder_mult(kernel="he_normal"):
  input_dim = 28*28
  latent_vec_dim = 2

  input_layer = Input(shape=(input_dim,))

  # Define the autoencoder architecture
  # First build the encoder
  enc_layer_1 = Dense(500, kernel_initializer=kernel)(input_layer)
  enc_layer_1 = tf.keras.layers.BatchNormalization()(enc_layer_1) #bn worsen the
  enc_layer_1 = tf.keras.activations.relu(enc_layer_1)

  enc_layer_2 = Dense(300, kernel_initializer=kernel)(enc_layer_1)
  enc_layer_2 = tf.keras.layers.BatchNormalization()(enc_layer_2)
  enc_layer_2 = tf.keras.activations.relu(enc_layer_2)

  enc_layer_3 = Dense(100, kernel_initializer=kernel)(enc_layer_2)
  enc_layer_3 = tf.keras.layers.BatchNormalization()(enc_layer_3)
  enc_layer_3 = tf.keras.activations.relu(enc_layer_3)

  enc_layer_4 = Dense(latent_vec_dim, activation='linear', kernel_regularizer = tf.keras.regularizers.L2(l2=1.0))(enc_layer_3) #, kernel_regularizer = tf.keras.regularizers.L1(l1=0.01)
  #enc_layer_4 = tf.keras.layers.BatchNormalization()(enc_layer_4)

  output_1 = Dense(10, activation='softmax', name="Class_Output")(enc_layer_4)

  encoder = enc_layer_4

  # Then build the decoder
  dec_layer_1 = Dense(100, kernel_initializer=kernel)(encoder)
  dec_layer_1 = tf.keras.layers.BatchNormalization()(dec_layer_1)
  dec_layer_1 = tf.keras.activations.relu(dec_layer_1)

  dec_layer_2 = Dense(300, kernel_initializer=kernel)(dec_layer_1)
  dec_layer_2 = tf.keras.layers.BatchNormalization()(dec_layer_2)
  dec_layer_2 = tf.keras.activations.relu(dec_layer_2)

  dec_layer_3 = Dense(500, kernel_initializer=kernel)(dec_layer_2)
  dec_layer_3 = tf.keras.layers.BatchNormalization()(dec_layer_3)
  dec_layer_3 = tf.keras.activations.relu(dec_layer_3)

  dec_layer_4 = Dense(input_dim, activation='sigmoid', name="Ric_Output")(dec_layer_3)
  decoder = dec_layer_4

  # Connect both encoder and decoder
  autoencoder_mult = Model(inputs = input_layer, outputs = [decoder, output_1], name="Deep_Autoencoder")

  # Latent representation (Optional)
  latent_model = Model(input_layer, encoder)

  return autoencoder_mult, latent_model

In [None]:
autoencoder_prellin, latent_model_prellin = model_autoencoder_mult()

# Get summary
autoencoder_prellin.summary()

In [None]:
# Compile the autoencoder model
optimizer = tf.keras.optimizers.Adam()
loss_0 = 'mse'
loss_1 = 'categorical_crossentropy'#

autoencoder_prellin.compile(loss=[loss_0, loss_1], loss_weights=[0.75,0.25], optimizer=optimizer)

# Train the autoencoder with MNIST data
history_prellin = autoencoder_prellin.fit(X_t, [X_t, y_t_], epochs=120, batch_size=128, callbacks = [early_stop, Checkpoint, lr],
                                       shuffle=True, validation_data=(X_v, [X_v, y_v_]))

autoencoder_prellin.save(f'/content/drive/MyDrive/Exercises/Autoencoders/relu_lin_bn_l2_mo_v2/enc/autoencoder')
latent_model_prellin.save(f'/content/drive/MyDrive/Exercises/Autoencoders/relu_lin_bn_l2_mo_v2/enc/encoder')

In [None]:
# Plot training and validation loss scores
# against the number of epochs.
plt.figure(figsize=(10, 7))
plt.plot(history_prellin.history['loss'], label='Train')
plt.plot(history_prellin.history['val_loss'], label='Validation')
plt.ylabel('MSE')
plt.xlabel('Epoch')
plt.title('Autoencoder Reconstruction Loss', pad=13)
plt.legend(loc='upper right')

In [None]:
compressed_images = autoencoder_prellin.predict(test_images)[0]
n = 15
plt.figure(figsize=(22, 2))
for i in range(n):
  ax = plt.subplot(1, n, i+1)
  plt.imshow(compressed_images[i].reshape(28, 28), cmap="gray")
  ax.axis('off')

plt.show()

In [None]:
latent_representation = latent_model_prellin.predict(test_images)

plt.figure(figsize=(10, 7))

sns.scatterplot(x=latent_representation[:,0],
                y=latent_representation[:,1],
                hue=test_labels, palette='tab10')

plt.xlabel("Encoder first dimension")
plt.ylabel("Encoder second dimension")

plt.grid(linestyle='--', alpha=0.5)

plt.legend(bbox_to_anchor=(1.01, 1),
           borderaxespad=0);

In [None]:
latent_representation = latent_model_prellin.predict(X_t)

plt.figure(figsize=(10, 7))

sns.scatterplot(x=latent_representation[:,0],
                y=latent_representation[:,1],
                hue=y_t, palette='tab10')

plt.xlabel("Encoder first dimension")
plt.ylabel("Encoder second dimension")

plt.grid(linestyle='--', alpha=0.5)

plt.legend(bbox_to_anchor=(1.01, 1),
           borderaxespad=0);

In [None]:
latent_representation = latent_model_prellin.predict(X_v)

plt.figure(figsize=(10, 7))

sns.scatterplot(x=latent_representation[:,0],
                y=latent_representation[:,1],
                hue=y_v, palette='tab10')

plt.xlabel("Encoder first dimension")
plt.ylabel("Encoder second dimension")

plt.grid(linestyle='--', alpha=0.5)

plt.legend(bbox_to_anchor=(1.01, 1),
           borderaxespad=0);

## 7.11 Hidden Layers: Gelu - Latent Space: Linear: MULTIPLE OUTPUT

In [None]:
from tensorflow.keras.utils import to_categorical

y_t_ = to_categorical(y_t)
y_v_ = to_categorical(y_v)
y_test = to_categorical(test_labels)

print(y_t.shape, y_v.shape, y_test.shape)

In [None]:
def model_autoencoder_mult(kernel="he_normal"):
  input_dim = 28*28
  latent_vec_dim = 2

  input_layer = Input(shape=(input_dim,))

  # Define the autoencoder architecture
  # First build the encoder
  enc_layer_1 = Dense(500, kernel_initializer=kernel)(input_layer)
  #enc_layer_1 = tf.keras.layers.BatchNormalization()(enc_layer_1) #bn worsen the
  enc_layer_1 = tf.keras.activations.gelu(enc_layer_1)

  enc_layer_2 = Dense(300, kernel_initializer=kernel)(enc_layer_1)
  #enc_layer_2 = tf.keras.layers.BatchNormalization()(enc_layer_2)
  enc_layer_2 = tf.keras.activations.gelu(enc_layer_2)

  enc_layer_3 = Dense(100, kernel_initializer=kernel)(enc_layer_2)
  #enc_layer_3 = tf.keras.layers.BatchNormalization()(enc_layer_3)
  enc_layer_3 = tf.keras.activations.gelu(enc_layer_3)

  enc_layer_4 = Dense(latent_vec_dim, activation='linear',)(enc_layer_3) #, kernel_regularizer = tf.keras.regularizers.L1(l1=0.01)
  #enc_layer_4 = tf.keras.layers.BatchNormalization()(enc_layer_4)

  output_1 = Dense(10, activation='softmax', name="Class_Output")(enc_layer_4)

  encoder = enc_layer_4

  # Then build the decoder
  dec_layer_1 = Dense(100, kernel_initializer=kernel)(encoder)
  #dec_layer_1 = tf.keras.layers.BatchNormalization()(dec_layer_1)
  dec_layer_1 = tf.keras.activations.gelu(dec_layer_1)

  dec_layer_2 = Dense(300, kernel_initializer=kernel)(dec_layer_1)
  #dec_layer_2 = tf.keras.layers.BatchNormalization()(dec_layer_2)
  dec_layer_2 = tf.keras.activations.gelu(dec_layer_2)

  dec_layer_3 = Dense(500, kernel_initializer=kernel)(dec_layer_2)
  #dec_layer_3 = tf.keras.layers.BatchNormalization()(dec_layer_3)
  dec_layer_3 = tf.keras.activations.gelu(dec_layer_3)

  dec_layer_4 = Dense(input_dim, activation='sigmoid', name="Ric_Output")(dec_layer_3)
  decoder = dec_layer_4

  # Connect both encoder and decoder
  autoencoder_mult = Model(inputs = input_layer, outputs = [decoder, output_1], name="Deep_Autoencoder")

  # Latent representation (Optional)
  latent_model = Model(input_layer, encoder)

  return autoencoder_mult, latent_model

In [None]:
autoencoder_prellin, latent_model_prellin = model_autoencoder_mult()

# Get summary
autoencoder_prellin.summary()

In [None]:
# Compile the autoencoder model
optimizer = tf.keras.optimizers.Adam()
loss_0 = 'mse'
loss_1 = 'categorical_crossentropy'#

autoencoder_prellin.compile(loss=[loss_0, loss_1], loss_weights=[0.75,0.25], optimizer=optimizer)

# Train the autoencoder with MNIST data
history_prellin = autoencoder_prellin.fit(X_t, [X_t, y_t_], epochs=120, batch_size=128, callbacks = [early_stop, Checkpoint, lr],
                                       shuffle=True, validation_data=(X_v, [X_v, y_v_]))

autoencoder_prellin.save(f'/content/drive/MyDrive/Exercises/Autoencoders/gelu_lin_mo/enc/autoencoder')
latent_model_prellin.save(f'/content/drive/MyDrive/Exercises/Autoencoders/gelu_lin_mo/enc/encoder')

In [None]:
# Plot training and validation loss scores
# against the number of epochs.
plt.figure(figsize=(10, 7))
plt.plot(history_prellin.history['loss'], label='Train')
plt.plot(history_prellin.history['val_loss'], label='Validation')
plt.ylabel('MSE')
plt.xlabel('Epoch')
plt.title('Autoencoder Reconstruction Loss', pad=13)
plt.legend(loc='upper right')

In [None]:
compressed_images = autoencoder_prellin.predict(test_images)[0]
n = 15
plt.figure(figsize=(22, 2))
for i in range(n):
  ax = plt.subplot(1, n, i+1)
  plt.imshow(compressed_images[i].reshape(28, 28), cmap="gray")
  ax.axis('off')

plt.show()

In [None]:
latent_representation = latent_model_prellin.predict(test_images)

plt.figure(figsize=(10, 7))

sns.scatterplot(x=latent_representation[:,0],
                y=latent_representation[:,1],
                hue=test_labels, palette='tab10')

plt.xlabel("Encoder first dimension")
plt.ylabel("Encoder second dimension")

plt.grid(linestyle='--', alpha=0.5)

plt.legend(bbox_to_anchor=(1.01, 1),
           borderaxespad=0);

In [None]:
latent_representation = latent_model_prellin.predict(X_t)

plt.figure(figsize=(10, 7))

sns.scatterplot(x=latent_representation[:,0],
                y=latent_representation[:,1],
                hue=y_t, palette='tab10')

plt.xlabel("Encoder first dimension")
plt.ylabel("Encoder second dimension")

plt.grid(linestyle='--', alpha=0.5)

plt.legend(bbox_to_anchor=(1.01, 1),
           borderaxespad=0);

In [None]:
latent_representation = latent_model_prellin.predict(X_v)

plt.figure(figsize=(10, 7))

sns.scatterplot(x=latent_representation[:,0],
                y=latent_representation[:,1],
                hue=y_v, palette='tab10')

plt.xlabel("Encoder first dimension")
plt.ylabel("Encoder second dimension")

plt.grid(linestyle='--', alpha=0.5)

plt.legend(bbox_to_anchor=(1.01, 1),
           borderaxespad=0);

## 7.1 Hidden Layers: Gelu - Latent Space: Linear: MULTIPLE OUTPUT + L1 reg

In [None]:
from tensorflow.keras.utils import to_categorical

y_t_ = to_categorical(y_t)
y_v_ = to_categorical(y_v)
y_test = to_categorical(test_labels)

print(y_t.shape, y_v.shape, y_test.shape)

In [None]:
def model_autoencoder_mult(kernel="he_normal"):
  input_dim = 28*28
  latent_vec_dim = 2

  input_layer = Input(shape=(input_dim,))

  # Define the autoencoder architecture
  # First build the encoder
  enc_layer_1 = Dense(500, kernel_initializer=kernel)(input_layer)
  #enc_layer_1 = tf.keras.layers.BatchNormalization()(enc_layer_1) #bn worsen the
  enc_layer_1 = tf.keras.activations.gelu(enc_layer_1)

  enc_layer_2 = Dense(300, kernel_initializer=kernel)(enc_layer_1)
  #enc_layer_2 = tf.keras.layers.BatchNormalization()(enc_layer_2)
  enc_layer_2 = tf.keras.activations.gelu(enc_layer_2)

  enc_layer_3 = Dense(100, kernel_initializer=kernel)(enc_layer_2)
  #enc_layer_3 = tf.keras.layers.BatchNormalization()(enc_layer_3)
  enc_layer_3 = tf.keras.activations.gelu(enc_layer_3)

  enc_layer_4 = Dense(latent_vec_dim, activation='linear', kernel_regularizer = tf.keras.regularizers.L1(l1=0.1))(enc_layer_3) #, kernel_regularizer = tf.keras.regularizers.L1(l1=0.01)
  #enc_layer_4 = tf.keras.layers.BatchNormalization()(enc_layer_4)

  output_1 = Dense(10, activation='softmax', name="Class_Output")(enc_layer_4)

  encoder = enc_layer_4

  # Then build the decoder
  dec_layer_1 = Dense(100, kernel_initializer=kernel)(encoder)
  #dec_layer_1 = tf.keras.layers.BatchNormalization()(dec_layer_1)
  dec_layer_1 = tf.keras.activations.gelu(dec_layer_1)

  dec_layer_2 = Dense(300, kernel_initializer=kernel)(dec_layer_1)
  #dec_layer_2 = tf.keras.layers.BatchNormalization()(dec_layer_2)
  dec_layer_2 = tf.keras.activations.gelu(dec_layer_2)

  dec_layer_3 = Dense(500, kernel_initializer=kernel)(dec_layer_2)
  #dec_layer_3 = tf.keras.layers.BatchNormalization()(dec_layer_3)
  dec_layer_3 = tf.keras.activations.gelu(dec_layer_3)

  dec_layer_4 = Dense(input_dim, activation='sigmoid', name="Ric_Output")(dec_layer_3)
  decoder = dec_layer_4

  # Connect both encoder and decoder
  autoencoder_mult = Model(inputs = input_layer, outputs = [decoder, output_1], name="Deep_Autoencoder")

  # Latent representation (Optional)
  latent_model = Model(input_layer, encoder)

  return autoencoder_mult, latent_model

In [None]:
autoencoder_prellin, latent_model_prellin = model_autoencoder_mult()

# Get summary
autoencoder_prellin.summary()

In [None]:
# Compile the autoencoder model
optimizer = tf.keras.optimizers.Adam()
loss_0 = 'mse'
loss_1 = 'categorical_crossentropy'#

autoencoder_prellin.compile(loss=[loss_0, loss_1], loss_weights=[0.75,0.25], optimizer=optimizer)

# Train the autoencoder with MNIST data
history_prellin = autoencoder_prellin.fit(X_t, [X_t, y_t_], epochs=120, batch_size=128, callbacks = [early_stop, Checkpoint, lr],
                                       shuffle=True, validation_data=(X_v, [X_v, y_v_]))

autoencoder_prellin.save(f'/content/drive/MyDrive/Exercises/Autoencoders/gelu_lin_mo_l1/enc/autoencoder')
latent_model_prellin.save(f'/content/drive/MyDrive/Exercises/Autoencoders/gelu_lin_mo_l1/enc/encoder')

In [None]:
# Plot training and validation loss scores
# against the number of epochs.
plt.figure(figsize=(10, 7))
plt.plot(history_prellin.history['loss'], label='Train')
plt.plot(history_prellin.history['val_loss'], label='Validation')
plt.ylabel('MSE')
plt.xlabel('Epoch')
plt.title('Autoencoder Reconstruction Loss', pad=13)
plt.legend(loc='upper right')

In [None]:
compressed_images = autoencoder_prellin.predict(test_images)[0]
n = 15
plt.figure(figsize=(22, 2))
for i in range(n):
  ax = plt.subplot(1, n, i+1)
  plt.imshow(compressed_images[i].reshape(28, 28), cmap="gray")
  ax.axis('off')

plt.show()

In [None]:
latent_representation = latent_model_prellin.predict(test_images)

plt.figure(figsize=(10, 7))

sns.scatterplot(x=latent_representation[:,0],
                y=latent_representation[:,1],
                hue=test_labels, palette='tab10')

plt.xlabel("Encoder first dimension")
plt.ylabel("Encoder second dimension")

plt.grid(linestyle='--', alpha=0.5)

plt.legend(bbox_to_anchor=(1.01, 1),
           borderaxespad=0);

In [None]:
latent_representation = latent_model_prellin.predict(X_t)

plt.figure(figsize=(10, 7))

sns.scatterplot(x=latent_representation[:,0],
                y=latent_representation[:,1],
                hue=y_t, palette='tab10')

plt.xlabel("Encoder first dimension")
plt.ylabel("Encoder second dimension")

plt.grid(linestyle='--', alpha=0.5)

plt.legend(bbox_to_anchor=(1.01, 1),
           borderaxespad=0);

In [None]:
latent_representation = latent_model_prellin.predict(X_v)

plt.figure(figsize=(10, 7))

sns.scatterplot(x=latent_representation[:,0],
                y=latent_representation[:,1],
                hue=y_v, palette='tab10')

plt.xlabel("Encoder first dimension")
plt.ylabel("Encoder second dimension")

plt.grid(linestyle='--', alpha=0.5)

plt.legend(bbox_to_anchor=(1.01, 1),
           borderaxespad=0);

## 7.1 Hidden Layers: Gelu - Latent Space: Linear: MULTIPLE OUTPUT + L2 reg

In [None]:
from tensorflow.keras.utils import to_categorical

y_t_ = to_categorical(y_t)
y_v_ = to_categorical(y_v)
y_test = to_categorical(test_labels)

print(y_t.shape, y_v.shape, y_test.shape)

In [None]:
def model_autoencoder_mult(kernel="he_normal"):
  input_dim = 28*28
  latent_vec_dim = 2

  input_layer = Input(shape=(input_dim,))

  # Define the autoencoder architecture
  # First build the encoder
  enc_layer_1 = Dense(500, kernel_initializer=kernel)(input_layer)
  #enc_layer_1 = tf.keras.layers.BatchNormalization()(enc_layer_1) #bn worsen the
  enc_layer_1 = tf.keras.activations.gelu(enc_layer_1)

  enc_layer_2 = Dense(300, kernel_initializer=kernel)(enc_layer_1)
  #enc_layer_2 = tf.keras.layers.BatchNormalization()(enc_layer_2)
  enc_layer_2 = tf.keras.activations.gelu(enc_layer_2)

  enc_layer_3 = Dense(100, kernel_initializer=kernel)(enc_layer_2)
  #enc_layer_3 = tf.keras.layers.BatchNormalization()(enc_layer_3)
  enc_layer_3 = tf.keras.activations.gelu(enc_layer_3)

  enc_layer_4 = Dense(latent_vec_dim, activation='linear', kernel_regularizer = tf.keras.regularizers.L2(l2=0.01))(enc_layer_3) #, kernel_regularizer = tf.keras.regularizers.L1(l1=0.01)
  #enc_layer_4 = tf.keras.layers.BatchNormalization()(enc_layer_4)

  output_1 = Dense(10, activation='softmax', name="Class_Output")(enc_layer_4)

  encoder = enc_layer_4

  # Then build the decoder
  dec_layer_1 = Dense(100, kernel_initializer=kernel)(encoder)
  #dec_layer_1 = tf.keras.layers.BatchNormalization()(dec_layer_1)
  dec_layer_1 = tf.keras.activations.gelu(dec_layer_1)

  dec_layer_2 = Dense(300, kernel_initializer=kernel)(dec_layer_1)
  #dec_layer_2 = tf.keras.layers.BatchNormalization()(dec_layer_2)
  dec_layer_2 = tf.keras.activations.gelu(dec_layer_2)

  dec_layer_3 = Dense(500, kernel_initializer=kernel)(dec_layer_2)
  #dec_layer_3 = tf.keras.layers.BatchNormalization()(dec_layer_3)
  dec_layer_3 = tf.keras.activations.gelu(dec_layer_3)

  dec_layer_4 = Dense(input_dim, activation='sigmoid', name="Ric_Output")(dec_layer_3)
  decoder = dec_layer_4

  # Connect both encoder and decoder
  autoencoder_mult = Model(inputs = input_layer, outputs = [decoder, output_1], name="Deep_Autoencoder")

  # Latent representation (Optional)
  latent_model = Model(input_layer, encoder)

  return autoencoder_mult, latent_model

In [None]:
autoencoder_prellin, latent_model_prellin = model_autoencoder_mult()

# Get summary
autoencoder_prellin.summary()

In [None]:
# Compile the autoencoder model
optimizer = tf.keras.optimizers.Adam()
loss_0 = 'mse'
loss_1 = 'categorical_crossentropy'#

autoencoder_prellin.compile(loss=[loss_0, loss_1], loss_weights=[0.75,0.25], optimizer=optimizer)

# Train the autoencoder with MNIST data
history_prellin = autoencoder_prellin.fit(X_t, [X_t, y_t_], epochs=120, batch_size=128, callbacks = [early_stop, Checkpoint, lr],
                                       shuffle=True, validation_data=(X_v, [X_v, y_v_]))

autoencoder_prellin.save(f'/content/drive/MyDrive/Exercises/Autoencoders/gelu_lin_mo_l2/enc/autoencoder')
latent_model_prellin.save(f'/content/drive/MyDrive/Exercises/Autoencoders/gelu_lin_mo_l2/enc/encoder')

In [None]:
# Plot training and validation loss scores
# against the number of epochs.
plt.figure(figsize=(10, 7))
plt.plot(history_prellin.history['loss'], label='Train')
plt.plot(history_prellin.history['val_loss'], label='Validation')
plt.ylabel('MSE')
plt.xlabel('Epoch')
plt.title('Autoencoder Reconstruction Loss', pad=13)
plt.legend(loc='upper right')

In [None]:
compressed_images = autoencoder_prellin.predict(test_images)[0]
n = 18
plt.figure(figsize=(25, 2))
for i in range(n):
  ax = plt.subplot(1, n, i+1)
  plt.imshow(compressed_images[i].reshape(28, 28), cmap="gray")
  ax.axis('off')

plt.show()

In [None]:
latent_representation = latent_model_prellin.predict(test_images)

plt.figure(figsize=(10, 7))

sns.scatterplot(x=latent_representation[:,0],
                y=latent_representation[:,1],
                hue=test_labels, palette='tab10')

plt.xlabel("Encoder first dimension")
plt.ylabel("Encoder second dimension")

plt.grid(linestyle='--', alpha=0.5)

plt.legend(bbox_to_anchor=(1.01, 1),
           borderaxespad=0);

In [None]:
latent_representation = latent_model_prellin.predict(X_t)

plt.figure(figsize=(10, 7))

sns.scatterplot(x=latent_representation[:,0],
                y=latent_representation[:,1],
                hue=y_t, palette='tab10')

plt.xlabel("Encoder first dimension")
plt.ylabel("Encoder second dimension")

plt.grid(linestyle='--', alpha=0.5)

plt.legend(bbox_to_anchor=(1.01, 1),
           borderaxespad=0);

In [None]:
latent_representation = latent_model_prellin.predict(X_v)

plt.figure(figsize=(10, 7))

sns.scatterplot(x=latent_representation[:,0],
                y=latent_representation[:,1],
                hue=y_v, palette='tab10')

plt.xlabel("Encoder first dimension")
plt.ylabel("Encoder second dimension")

plt.grid(linestyle='--', alpha=0.5)

plt.legend(bbox_to_anchor=(1.01, 1),
           borderaxespad=0);

## 7.10 VAE: MULTIPLE OUTPUT

In [None]:
from tensorflow.keras.utils import to_categorical

y_t_ = to_categorical(y_t)
y_v_ = to_categorical(y_v)
y_test = to_categorical(test_labels)

print(y_t.shape, y_v.shape, y_test.shape)

In [None]:
class Sampling(layers.Layer):
    """Uses (z_mean, z_log_var) to sample z, the vector encoding a digit."""

    def call(self, inputs):
        z_mean, z_log_var = inputs
        batch = tf.shape(z_mean)[0]
        dim = tf.shape(z_mean)[1]
        epsilon = tf.keras.backend.random_normal(shape=(batch, dim))
        return z_mean + tf.exp(0.5 * z_log_var) * epsilon

In [None]:
input_dim = 28*28
latent_vec_dim = 2

input_layer = Input(shape=(input_dim,))

# Define the autoencoder architecture
# First build the encoder
enc_layer_1 = Dense(500, kernel_initializer='he_normal')(input_layer)
enc_layer_1 = tf.keras.layers.PReLU()(enc_layer_1)

enc_layer_2 = Dense(300, kernel_initializer='he_normal')(enc_layer_1)
enc_layer_2 = tf.keras.layers.PReLU()(enc_layer_2)

enc_layer_3 = Dense(100, kernel_initializer='he_normal')(enc_layer_2)
enc_layer_3 = tf.keras.layers.PReLU()(enc_layer_3)

enc_layer_4 = Dense(32, kernel_initializer='he_normal')(enc_layer_3)
enc_layer_4 = tf.keras.layers.PReLU()(enc_layer_4)

z_mean = layers.Dense(latent_vec_dim, name="z_mean")(enc_layer_4)
z_log_var = layers.Dense(latent_vec_dim, name="z_log_var")(enc_layer_4)
z = Sampling()([z_mean, z_log_var])
encoder = keras.Model(input_layer, [z_mean, z_log_var, z], name="encoder")
encoder.summary()


In [None]:
# Then build the decoder
latent_inputs = keras.Input(shape=(latent_vec_dim,))

dec_layer_0 = Dense(32, kernel_initializer='he_normal')(latent_inputs)
dec_layer_0 = tf.keras.layers.PReLU()(dec_layer_0)

dec_layer_1 = Dense(100, kernel_initializer='he_normal')(dec_layer_0)
dec_layer_1 = tf.keras.layers.PReLU()(dec_layer_1)


dec_layer_2 = Dense(300, kernel_initializer='he_normal')(dec_layer_1)
dec_layer_2 = tf.keras.layers.PReLU()(dec_layer_2)

dec_layer_3 = Dense(500, kernel_initializer='he_normal')(dec_layer_2)
dec_layer_3 = tf.keras.layers.PReLU()(dec_layer_3)

output_1 = Dense(input_dim, activation='sigmoid')(dec_layer_3)
output_2 = Dense(10, activation='softmax', name="Class_Output")(latent_inputs)


decoder = keras.Model(latent_inputs, [output_1,output_2], name="decoder")
decoder.summary()

In [None]:
# instantiate VAE model
outputs = decoder(encoder(input_layer)[2])
vae = keras.Model(input_layer, outputs, name='vae_mlp')

What we've done so far allows us to instantiate 3 models:

* an end-to-end autoencoder mapping inputs to reconstructions
* an encoder mapping inputs to the latent space
* a generator that can take points on the latent space and will output the corresponding reconstructed samples.

We train the model using the end-to-end model, with a custom loss function: the sum of a reconstruction term, and the KL divergence regularization term.

In [None]:
#reconstruction_loss = keras.losses.binary_crossentropy(input_layer, outputs[0])
#reconstruction_loss *= input_dim
#kl_loss = 1 + z_log_var - K.square(z_mean) - K.exp(z_log_var)
#kl_loss = K.sum(kl_loss, axis=-1)
#kl_loss *= -0.5
#vae_loss = K.mean(reconstruction_loss + kl_loss)
#vae.add_loss(vae_loss)
#vae.compile(optimizer='adam')

def vae_loss(x, z_decoded):
        # Reconstruction loss
        xent_loss = keras.metrics.binary_crossentropy(x, z_decoded)
        # KL divergence
        kl_loss = -5e-4 * K.mean(1 + z_log_var - K.square(z_mean) - K.exp(z_log_var), axis=-1)
        return K.mean(xent_loss + kl_loss)

In [None]:
# Compile the autoencoder model
optimizer = tf.keras.optimizers.Adam()
loss_0 = vae_loss
loss_1 = 'categorical_crossentropy'

vae.compile(optimizer=optimizer,loss=[vae_loss, loss_1],loss_weights=[0.75,0.25],)

In [None]:
history_vae = vae.fit(X_t, [X_t, y_t_],
                      epochs=120,
                      batch_size=128,
                      callbacks = [early_stop, Checkpoint, lr],
                      shuffle=True,
                      validation_data=(X_v, [X_v, y_v_]))

In [None]:
latent_representation = encoder.predict(test_images)

plt.figure(figsize=(15, 9))

sns.scatterplot(x=latent_representation[2][:,0],
                y=latent_representation[2][:,1],
                hue=test_labels, palette='tab10')

plt.xlabel("Encoder first dimension")
plt.ylabel("Encoder second dimension")

plt.grid(linestyle='--', alpha=0.5)

plt.legend(bbox_to_anchor=(1.01, 1),
           borderaxespad=0);

In [None]:
plt.figure(figsize=(10, 7))

sns.scatterplot(x=latent_representation[1][:,0],
                y=latent_representation[1][:,1],
                hue=test_labels, palette='tab10')

plt.xlabel("Encoder first dimension")
plt.ylabel("Encoder second dimension")

plt.grid(linestyle='--', alpha=0.5)

plt.legend(bbox_to_anchor=(1.01, 1),
           borderaxespad=0);

In [None]:
# Plot training and validation loss scores
# against the number of epochs.
plt.figure(figsize=(10, 7))
plt.plot(history_vae.history['loss'], label='Train')
plt.plot(history_vae.history['val_loss'], label='Validation')
plt.ylabel('Customized Loss')
plt.xlabel('Epoch')
plt.title('Autoencoder Reconstruction Loss - Nadam', pad=13)
plt.legend(loc='upper right')

In [None]:
compressed_images = vae.predict(test_images)
n = 5
plt.figure(figsize=(9, 2))
for i in range(n):
  ax = plt.subplot(1, n, i+1)
  plt.imshow(compressed_images[i].reshape(28, 28), cmap="gray")

  plt.title(test_labels[i])
  ax.axis('off')

plt.show()

In [None]:
# Display a 2D manifold of the digits
n = 20  # figure with 15x15 digits
digit_size = 28
figure = np.zeros((digit_size * n, digit_size * n))
# We will sample n points within [-15, 15] standard deviations
grid_x = np.linspace(-3, 3, n)
grid_y = np.linspace(-3, 3, n)

for i, yi in enumerate(grid_x):
    for j, xi in enumerate(grid_y):
        z_sample = np.array([[xi, yi]])
        x_decoded = decoder.predict(z_sample)
        digit = x_decoded[0].reshape(digit_size, digit_size)
        figure[i * digit_size: (i + 1) * digit_size,
               j * digit_size: (j + 1) * digit_size] = digit

plt.figure(figsize=(10, 10))
plt.imshow(figure, cmap="gray")
plt.show()

In [None]:
vae.save(f'/content/drive/MyDrive/Exercises/Autoencoders/vae_prelu/autoencoder')
encoder.save(f'/content/drive/MyDrive/Exercises/Autoencoders/vae_prelu/encoder')

In [None]:
compressed_images = vae.predict(test_images)[0]
n = 15
plt.figure(figsize=(22, 2))
for i in range(n):
  ax = plt.subplot(1, n, i+1)
  plt.imshow(compressed_images[i].reshape(28, 28), cmap="gray")
  ax.axis('off')

plt.show()

# END