# Autoencoders for images

__Objective:__ understand how to encode images in a 2-dimensional latent space via an autoencoder model.

In [None]:
import sys
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
import seaborn as sns

sys.path.append('../../modules/')

from autoencoders import Encoder, Decoder
from keras_utilities import append_to_full_history, plot_history, get_intermediate_output

sns.set_theme()

%load_ext autoreload
%autoreload 2

## Load and preprocess data

In [None]:
def preprocess_images(img):
    """
    """
    # Normalize pixel values.
    img = img.astype('float32') / 255.

    # Add padding.
    img = np.pad(img, ((0, 0), (2, 2), (2, 2)), constant_values=0.)
    
    # The images come in grayscale without an explicit
    # channels dimensions. Here we add it.
    img = np.expand_dims(img, -1)

    return img

In [None]:
# Note: we don't really care about the labels in the y arrays.
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.fashion_mnist.load_data()

In [None]:
fig, axs = plt.subplots(nrows=1, ncols=3, figsize=(14, 6))

for i in range(len(axs)):
    axs[i].imshow(
        x_train[i, ...],
        cmap='gray'
    )
    
    axs[i].grid(False)

In [None]:
x_train = preprocess_images(x_train)
x_test = preprocess_images(x_test)

In [None]:
fig, axs = plt.subplots(nrows=1, ncols=3, figsize=(14, 6))

for i in range(len(axs)):
    axs[i].imshow(
        x_train[i, ...],
        cmap='gray'
    )
    
    axs[i].grid(False)

## Autoencoder model

Model definition.

In [None]:
encoder = Encoder()

encoder(x_train[:10]).shape

In [None]:
# Last image-like shape (rank-3 tensor) before the flattening
# layer in the encoder.
image_reshaping_size = tuple(get_intermediate_output(x_train[:1, ...], encoder, 3).shape[1:])

decoder = Decoder(image_reshaping_size)

decoder(encoder(x_train[:2, ...])).shape

In [None]:
autoencoder_input = tf.keras.Input(shape=x_train[0, ...].shape)

autoencoder_model = tf.keras.Model(
    inputs=autoencoder_input,
    outputs=decoder(encoder(autoencoder_input))
)

In [None]:
autoencoder_model(x_train[:10, ...]).shape

Model training.

In [None]:
autoencoder_model.compile(
    optimizer='adam',
    loss='binary_crossentropy'
)

full_history = dict([])

In [None]:
epochs = 5

history = autoencoder_model.fit(
    x=x_train,
    y=x_train,
    epochs=epochs,
    batch_size=100,
    shuffle=True,
    validation_data=(x_test, x_test)
)

append_to_full_history(history, full_history)

In [None]:
plot_history(full_history)

## Image reconstruction after training

In [None]:
nrows = 2
ncols = 6

reconstructed_images = tf.concat(
    [
        x_test[:ncols, ...][None, ...],
        autoencoder_model(x_test[:ncols, ...])[None, ...]
    ],
    axis=0
)

fig, axs = plt.subplots(nrows=nrows, ncols=ncols, figsize=(14, 4))

for i in range(nrows):
    for j in range(ncols):
        ax = axs[i][j]
        
        ax.imshow(
            reconstructed_images[i, j, ...],
            cmap='gray'
        )

        ax.grid(False)

## Exploration of the latent space

In [None]:
n_samples = 5000

latent_vectors = encoder(x_test[:n_samples, ...])

fig = plt.figure(figsize=(14, 6))

sns.scatterplot(
    x=latent_vectors[:, 0],
    y=latent_vectors[:, 1],
    hue=y_test[:n_samples],
    palette=sns.color_palette()
)

## Generating new images

**Idea:** we randomly sample the latent space and have the decoder produce an image from the new latent vectors.

In [None]:
n_samples = 6

# Bounds (along each dimension of the latent space)
# of the region of latent space we want to randomly
# sample from (a rectangle).
bounds = ((-5., 0.), (-5., 10.))

# Random 2-dimensional vectors in the chose region
# in latent space.
random_latent_vectors = tf.concat(
    [
        tf.random.uniform(shape=(n_samples, 1), minval=bounds[0][0], maxval=bounds[0][1]),
        tf.random.uniform(shape=(n_samples, 1), minval=bounds[1][0], maxval=bounds[1][1])
    ],
    axis=-1
)

# Decode the randomly-generated latent vectors into
# images via the decoder.
random_images = decoder(random_latent_vectors)


# Plot the position of the random latent vectors over
# existing samples.
fig = plt.figure(figsize=(14, 6))

sns.scatterplot(
    x=latent_vectors[:, 0],
    y=latent_vectors[:, 1],
    color=sns.color_palette()[0],
    alpha=.3
)

sns.scatterplot(
    x=random_latent_vectors[:, 0],
    y=random_latent_vectors[:, 1],
    color=sns.color_palette()[3],
)


# Show the decoded images corresponding to the random
# latent vectors.
fig, axs = plt.subplots(ncols=n_samples, figsize=(14, 4))

for i in range(n_samples):
    axs[i].imshow(
        random_images[i, ...],
        cmap='gray'
    )

    axs[i].grid(False)

    plt.sca(axs[i])
    plt.title(f'{random_latent_vectors[i, ...].numpy().round(2)}')

Consider a path (straight line) in latent space and generate samples going along it to see how they change. This should generate some kind of morphing between the images corresponding to the initial and final points, but nothing guarantees that it will be continuous nor that all the generated images will be realistic!

In [None]:
# Generate path in latent space.
n_points = 20

starting_point = tf.constant([-5., -2.5])
endpoint = tf.constant([-2.5, 2.5])

path = (endpoint - starting_point) * tf.linspace(0., 1., n_points)[..., None] + starting_point

# Generate images correponding to points along
# the path.
images_along_path = decoder(path)

# Plot generated images.
ncols = 10
nrows = n_points // ncols

fig, axs = plt.subplots(nrows=nrows, ncols=ncols, figsize=(14, 4))

for i in range(nrows):
    for j in range(ncols):
        ax = axs[i][j]
        
        ax.imshow(
            images_along_path[i * ncols + j, ...],
            cmap='gray'
        )

        ax.grid(False)

        plt.sca(ax)
        plt.xticks([])
        plt.yticks([])
        plt.title(f'{path[i * ncols + j, ...].numpy().round(2)}')