In [13]:
import pathlib
import os

os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
from tensorflow import keras

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import image_classification

# 6. Autoencoders
Let's try to encode and decode the MNIST dataset.

## 6.1 Load and Visualize the Dataset
Let's first load the data and look at some examples of the data.

In [None]:
# load the data and separate it into a train and a test set
(x_train, _), (x_test, y_test) = image_classification.load_mnist()
print("x_train shape:", x_train.shape)
print(x_train.shape[0], "train samples")
print(x_test.shape[0], "test samples")
image_classification.visualize_mnist(images = x_test, labels = y_test)

## 6.2 Building A Neural Network To Predict the Input
Try to build a neural network that takes `x` as an input and outputs `x` itself! We start with some architecture has a few hidden layers. Can you simplify it?

In [None]:
# Define the model
model = keras.Sequential(
    [
        keras.Input(shape=(28, 28, 1)),
        keras.layers.Flatten(),
        keras.layers.Dense(2000, activation="relu"),
        keras.layers.Dense(1000, activation="relu"),
        keras.layers.Dense(28 * 28, activation="sigmoid"),
        keras.layers.Reshape([28, 28, 1])
    ]
)

# Let's fit the model with the training data!
model.compile(loss="binary_crossentropy", optimizer=keras.optimizers.Adam(learning_rate=1e-3), metrics=["mse"])
model.fit(x_train, x_train, epochs=10, batch_size=32, validation_data=(x_test, x_test))

In [None]:
x_pred_nn = model.predict(x_test)
image_classification.visualize_mnist(images = x_test, labels = y_test, idxs=[0, 1, 2, 3, 4, 5])
image_classification.visualize_mnist(images = x_pred_nn, labels = y_test, idxs=[0, 1, 2, 3, 4, 5])

## Q6.2:
1. What is the minimal architecture that performs well?

## 6.3 Autoencoders
Now we create an autoencoder consisting of an **encoder** and a **decoder**. Try different architectures and see what happens!

In [None]:
x_test.shape

In [None]:
# Our encoder architecture.
encoder = keras.Sequential(
    [
        keras.Input(shape=(28, 28)),
        keras.layers.Flatten(),
        keras.layers.Dense(250, activation="relu"),
        keras.layers.Dense(50, activation="relu"),
        keras.layers.Dense(2, activation="relu")
    ]
)

# Our decoder architecture.
decoder = keras.Sequential(
    [
        keras.layers.Input(shape=[2]),
        keras.layers.Dense(50, activation="relu"),
        keras.layers.Dense(250, activation="relu"),
        keras.layers.Dense(28 * 28, activation="sigmoid"),
        keras.layers.Reshape([28, 28])
    ]
)

# Together, the encoder and decoder form the autoencoder.
autoencoder = keras.Sequential([encoder, decoder])

# Compile and fit the autoencoder on the train data.
autoencoder.compile(loss="binary_crossentropy", optimizer="adam", metrics=["binary_crossentropy", "mse"])
autoencoder.fit(x_train, x_train, epochs=30, batch_size=32, shuffle=True, validation_data=(x_test, x_test))

In [None]:
# Let's print out our performance on the test set.
score = autoencoder.evaluate(x_test, x_test, verbose=0)
print(f"Test score: {score}")

## 6.4 Visualizing the reconstructed images of the autoencoder.

In [None]:
x_pred = autoencoder.predict(x_test)
image_classification.visualize_mnist(images = x_test, labels = y_test, idxs=[0, 1, 2, 3, 4, 5])
image_classification.visualize_mnist(images = x_pred, labels = y_test, idxs=[0, 1, 2, 3, 4, 5])

### Q6.4:
1. What is the input and output of the encoder?
2. What is the input and output of the decoder?
3. Explain how images can be compressed with an autoencoder.

## 6.5 Create your own images
The latent space vectors $z = (z_1, z_2, ..., z_m)$ are the output of the encoder network. Each $z_i$ is a value between $-1$ and $1$. Try to construct new images in the following way:
1. Create a vector $z = (z_1, z_2, ..., z_m)$ that matches the output dimension of your encoder.
2. Use the decoder to estimate $x$.

Afterwards, we plot your newly created image.

In [None]:
# Compose your own z here.
z = np.array([[0.4, 12]])
x = decoder.predict(z)
plt.imshow(x[0], cmap="gray", interpolation="none")

## 6.6 Sweeping over the latent space
For two-dimensional latent spaces $z = (z_1, z_2)$, we can visualize the entire space:

In [None]:
assert encoder.layers[-1].output.shape[-1] == 2
image_classification.sweep_embedding_space(encoder, decoder, x_train)