<a href="https://colab.research.google.com/github/iPoetDev/ibm-skills-ai-colab-sessions/blob/main/Session3_VAE.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import tensorflow as tf
from tensorflow.keras import layers
import numpy as np
import matplotlib.pyplot as plt

# Load the MNIST dataset

In [None]:

# Load MNIST dataset - pictures of handwritten numbers
(x_train, _), (x_test, _) = tf.keras.datasets.mnist.load_data()

# Convert the Data to Floats and Normalize
# data is made with numbers between 0 and 255
x_train = x_train.astype('float32') / 255.
x_test = x_test.astype('float32') / 255.

# The pictures are 28x28 pixels, each pixel has a number that shows how dark it is
# We reshape the pictures to tell the computer that each picture is 28x28 and
# 1 color channel because they are black and white pictures
x_train = np.reshape(x_train, (len(x_train), 28, 28, 1))
x_test = np.reshape(x_test, (len(x_test), 28, 28, 1))

# Encoder

In [None]:
# Encoder
latent_dim = 2
# Setting up the input encoder, the shape must match our data
encoder_inputs = tf.keras.Input(shape=(28, 28, 1))

# First convolutional layer - applies filter to the input  image to highlight
# important features
# We are use 32 filters, each filter is 3x3 pixels, use relu activation
# Strides means move the filter 2 pixels at time
# Padding same mean keep output size same as input size
x = layers.Conv2D(32, 3, activation='relu', strides=2, padding='same')(encoder_inputs)

# Similar to First convolutional layer but with 64 filters, learns more complex
# featurs from the images
x = layers.Conv2D(64, 3, activation='relu', strides=2, padding='same')(x)

# Flattens to 1D
x = layers.Flatten()(x)
x = layers.Dense(16, activation='relu')(x)

# Represents the latent space, basically summarizing in just a few key points
z_mean = layers.Dense(latent_dim)(x)
z_log_var = layers.Dense(latent_dim)(x)



# VAE Sampling

In [None]:
# Sampling function for the VAE
# This function is generating new samples in the latent space by adding
# some random noise to the simplified data representation. This is important for
# creating diverse and realistice outputs in the models like a VAE.
def sampling(args):
    z_mean, z_log_var = args
    batch = tf.shape(z_mean)[0]
    dim = tf.shape(z_mean)[1]
    epsilon = tf.keras.backend.random_normal(shape=(batch, dim))
    return z_mean + tf.keras.backend.exp(0.5 * z_log_var) * epsilon

# the function we just defined using a sampling function 'Lambda'
z = layers.Lambda(sampling)([z_mean, z_log_var])



# Decoder

In [None]:
# Decoder
# This code is building the decoder part of a Variational Autoencoder (VAE).
# The decoder takes the simplified latent representation (latent_dim) and
# transforms it back into the original image format through a series of layers.
# These layers gradually upscale and reshape the data until it matches the
# size of the original input images, effectively reconstructing the images
# from the compressed latent space.
decoder_inputs = tf.keras.Input(shape=(latent_dim,))
x = layers.Dense(7 * 7 * 64, activation='relu')(decoder_inputs)
x = layers.Reshape((7, 7, 64))(x)
x = layers.Conv2DTranspose(64, 3, activation='relu', strides=2, padding='same')(x)
x = layers.Conv2DTranspose(32, 3, activation='relu', strides=2, padding='same')(x)
decoder_outputs = layers.Conv2DTranspose(1, 3, activation='sigmoid', padding='same')(x)



# VAE Model

In [None]:
# VAE Model
# This code is creating a Variational Autoencoder (VAE) by combining the
# encoder and decoder models. The encoder compresses the input images into
# a simplified latent representation, and the decoder reconstructs the images
# from this latent space. The final VAE model takes input images,
# processes them through the encoder to get the latent representation,
# and then uses the decoder to output the reconstructed images.
encoder = tf.keras.Model(encoder_inputs, [z_mean, z_log_var, z], name='encoder')
decoder = tf.keras.Model(decoder_inputs, decoder_outputs, name='decoder')
outputs = decoder(encoder(encoder_inputs)[2])
vae = tf.keras.Model(encoder_inputs, outputs, name='vae')

# VAE Loss

In [None]:
# VAE loss
# This code block is defining and adding a custom loss function to
# the Variational Autoencoder (VAE). It combines the reconstruction loss,
# which measures how well the VAE reconstructs the input images, and the
# KL divergence loss, which ensures the latent space is well-behaved and
# regularized. The combined loss helps the VAE learn to generate realistic
# and diverse outputs. Finally, the VAE is compiled with
# the Adam optimizer for training.
reconstruction_loss = tf.keras.losses.binary_crossentropy(tf.keras.backend.flatten(encoder_inputs), tf.keras.backend.flatten(outputs))
reconstruction_loss *= 28 * 28
kl_loss = 1 + z_log_var - tf.keras.backend.square(z_mean) - tf.keras.backend.exp(z_log_var)
kl_loss = tf.keras.backend.sum(kl_loss, axis=-1)
kl_loss *= -0.5
vae_loss = tf.keras.backend.mean(reconstruction_loss + kl_loss)
vae.add_loss(vae_loss)
vae.compile(optimizer='adam')



# Train the model

In [None]:
# Train the model
vae.fit(x_train, epochs=30, batch_size=128, validation_data=(x_test, None))



# Display the generated images - function

In [None]:
# Display generated images
def plot_latent_space(decoder, n=30, figsize=15):
    # Display a n*n 2D manifold of digits
    digit_size = 28
    scale = 1.0
    figure = np.zeros((digit_size * n, digit_size * n))
    # Linearly spaced coordinates on the unit square were transformed
    # through the inverse CDF (ppf) of the Gaussian to produce values
    # of the latent variables z, since the prior of the latent space
    # is Gaussian
    grid_x = np.linspace(-scale, scale, n)
    grid_y = np.linspace(-scale, scale, n)[::-1]

    for i, yi in enumerate(grid_y):
        for j, xi in enumerate(grid_x):
            z_sample = np.array([[xi, yi]])
            x_decoded = decoder.predict(z_sample)
            digit = x_decoded[0].reshape(digit_size, digit_size)
            figure[i * digit_size: (i + 1) * digit_size,
                   j * digit_size: (j + 1) * digit_size] = digit

    plt.figure(figsize=(figsize, figsize))
    start_range = digit_size // 2
    end_range = n * digit_size + start_range
    pixel_range = np.arange(start_range, end_range, digit_size)
    sample_range_x = np.round(grid_x, 1)
    sample_range_y = np.round(grid_y, 1)
    plt.xticks(pixel_range, sample_range_x)
    plt.yticks(pixel_range, sample_range_y)
    plt.xlabel("z[0]")
    plt.ylabel("z[1]")
    plt.imshow(figure, cmap='Greys_r')
    plt.show()


# Run the Model


In [None]:
# run the model
plot_latent_space(decoder)
