It is highly recommended to use a powerful **GPU**, you can use it for free uploading this notebook to [Google Colab](https://colab.research.google.com/notebooks/intro.ipynb).
<table align="center">
 <td align="center"><a target="_blank" href="https://colab.research.google.com/github/ezponda/intro_deep_learning/blob/main/class/generative/autoencoder.ipynb">
        <img src="https://i.ibb.co/2P3SLwK/colab.png"  style="padding-bottom:5px;" />Run in Google Colab</a></td>
  <td align="center"><a target="_blank" href="https://github.com/ezponda/intro_deep_learning/blob/main/class/generative/autoencoder.ipynb">
        <img src="https://i.ibb.co/xfJbPmL/github.png"  height="70px" style="padding-bottom:5px;"  />View Source on GitHub</a></td>
</table>

Let's follow the tensorflow introduction to [autoencoders tutorial](https://www.tensorflow.org/tutorials/generative/autoencoder) and the [VAEs tutorial](https://www.tensorflow.org/tutorials/generative/cvae).

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import tensorflow as tf
import time

from sklearn.metrics import accuracy_score, precision_score, recall_score
from sklearn.model_selection import train_test_split
from tensorflow.keras import layers, losses
from tensorflow.keras.datasets import fashion_mnist
from tensorflow.keras.models import Model

## Load the dataset
To start, you will train the basic autoencoder using the Fashon MNIST dataset. Each image in this dataset is 28x28 pixels. 

In [None]:
(x_train, y_train), (x_test, y_test) = fashion_mnist.load_data()

x_train = x_train / 255.0
x_test = x_test / 255.0

print (x_train.shape)
print (x_test.shape)

In [None]:
plt.figure(figsize=(10,10))
for i in range(25):
    plt.subplot(5,5,i+1)
    plt.xticks([])
    plt.yticks([])
    plt.grid(False)
    plt.imshow(x_train[i], cmap='gray')
plt.show()

##  Basic Autoencoder

Define an autoencoder with two Dense layers: an `encoder`, which compresses the images into a `latent_dim` dimensional latent vector, and a `decoder`, that reconstructs the original image from the latent space.

To define your model, use the [Keras Model Subclassing API](https://www.tensorflow.org/guide/keras/custom_layers_and_models).


In [None]:
class Autoencoder(tf.keras.Model):
    def __init__(self, latent_dim):
        super(Autoencoder, self).__init__()
        self.latent_dim = latent_dim
        self.encoder = tf.keras.Sequential([
            layers.Flatten(),
            layers.Dense(self.latent_dim, activation='relu'),
        ])
        self.decoder = tf.keras.Sequential([
            layers.Dense(28*28, activation='sigmoid'),
            layers.Reshape((28, 28))
        ])
    

    def call(self, x):
        encoded = self.encoder(x)
        decoded = self.decoder(encoded)
        return decoded

latent_dim = 64
autoencoder = Autoencoder(latent_dim)

In [None]:
class Encoder(layers.Layer):
    def __init__(self, latent_dim=32, name="encoder", **kwargs):
        super(Encoder, self).__init__(name=name, **kwargs)
        self.latent_dim = latent_dim
        self.dense = layers.Dense(self.latent_dim, activation='relu')

    def call(self, inputs):
        x = layers.Flatten()(inputs)
        x = self.dense(x)
        return x


class Decoder(layers.Layer):
    """Converts z, the encoded digit vector, back into a readable digit."""

    def __init__(self, original_dim, name="decoder", **kwargs):
        super(Decoder, self).__init__(name=name, **kwargs)
        self.original_dim = original_dim
        self.dense = layers.Dense(self.original_dim, activation='relu')

    def call(self, inputs):
        x = self.dense(inputs)
        x = layers.Reshape((28, 28))(x)
        return x


class AutoEncoder2(tf.keras.Model):
    """Combines the encoder and decoder into an end-to-end model for training."""

    def __init__(
        self,
        original_dim,
        latent_dim,
        name="autoencoder",
        **kwargs
    ):
        super(AutoEncoder2, self).__init__(name=name, **kwargs)
        self.latent_dim = latent_dim
        self.original_dim = original_dim
        self.encoder = Encoder(latent_dim=latent_dim)
        self.decoder = Decoder(original_dim=original_dim)

    def call(self, inputs):
        x = self.encoder(inputs)
        x = self.decoder(x)
        return x

autoencoder2 = AutoEncoder2(original_dim=28*28, latent_dim=latent_dim)

In [None]:
def get_autoencoder(latent_dim, input_shape=(28, 28)):
    inputs = tf.keras.Input(shape=input_shape, name='input')
    # encoder
    encoded = layers.Flatten()(inputs)
    encoded = layers.Dense(latent_dim, activation='relu')(encoded)
    # decoder
    decoded = layers.Dense(np.prod(input_shape), activation='sigmoid')(encoded)
    decoded = layers.Reshape((28, 28))(decoded)

    # model
    autoencoder = tf.keras.Model(inputs=inputs, outputs=decoded)
    autoencoder.compile(optimizer='adam', loss='mse')
    return autoencoder


autoencoder = get_autoencoder(latent_dim, input_shape=(28, 28))

Train the model using `x_train` as both the input and the target. The `encoder` will learn to compress the dataset from `latent_dim` dimensions to the latent space, and the `decoder` will learn to reconstruct the original images.
.

In [None]:
autoencoder.fit(x_train, x_train,
                epochs=10,
                batch_size=64,
                validation_data=(x_test, x_test))

Compute the `test_mse`

In [None]:
test_mse = autoencoder.evaluate(x_test, x_test)
print('MSE Test:', test_mse)

Now that the model is trained, let's test it by encoding and decoding images from the test set.

In [None]:
decoded_imgs = autoencoder(x_test).numpy()

In [None]:
n = 10
plt.figure(figsize=(20, 4))
for i in range(n):
    # display original
    ax = plt.subplot(2, n, i + 1)
    plt.imshow(x_test[i])
    plt.title("original")
    plt.gray()
    ax.get_xaxis().set_visible(False)
    ax.get_yaxis().set_visible(False)

    # display reconstruction
    ax = plt.subplot(2, n, i + 1 + n)
    plt.imshow(decoded_imgs[i])
    plt.title("reconstructed")
    plt.gray()
    ax.get_xaxis().set_visible(False)
    ax.get_yaxis().set_visible(False)
plt.show()

Lets try different dimensions

In [None]:
for latent_dim in [16, 32, 64, 128, 256, 512, 784]:
    t = time.time()
    autoencoder = get_autoencoder(latent_dim, input_shape=(28, 28))
    autoencoder.fit(x_train, x_train, epochs=7, batch_size=64, verbose=0)
    test_mse = autoencoder.evaluate(x_test, x_test, verbose=0)
    elapsed_time = time.time() - t
    print('#' * 100)
    print('Latent dimension: {0}, MSE Test: {1}, elapsed time: {2}'.format(
        latent_dim, test_mse, elapsed_time))
    n = 10
    reconstructed = autoencoder(x_test).numpy()
    plt.figure(figsize=(20, 4))
    for i in range(n):
        # display original
        ax = plt.subplot(2, n, i + 1)
        plt.imshow(x_test[i])
        plt.title("original")
        plt.gray()
        ax.get_xaxis().set_visible(False)
        ax.get_yaxis().set_visible(False)

        # display reconstruction
        ax = plt.subplot(2, n, i + 1 + n)
        plt.imshow(reconstructed[i])
        plt.title("reconstructed")
        plt.gray()
        ax.get_xaxis().set_visible(False)
        ax.get_yaxis().set_visible(False)
    plt.show()

We can add more complexity to the model

In [None]:
latent_dim = 64

def get_autoencoder(latent_dim, input_shape=(28, 28)):
    inputs = tf.keras.Input(shape=input_shape, name='input')
    # encoder
    encoded = layers.Flatten()(inputs)
    encoded = layers.Dense(2 * latent_dim, activation='relu')(encoded)
    encoded = layers.Dense(latent_dim, activation='relu')(encoded)
    # decoder
    decoded = layers.Dense(2 * latent_dim, activation='relu')(encoded)
    decoded = layers.Dense(np.prod(input_shape), activation='sigmoid')(encoded)
    decoded = layers.Reshape((28, 28))(decoded)

    # model
    autoencoder = tf.keras.Model(inputs=inputs, outputs=decoded)
    autoencoder.compile(optimizer='adam', loss='mse')
    return autoencoder

latent_dim = 128
autoencoder = get_autoencoder(latent_dim, input_shape=(28, 28))

In [None]:
autoencoder.fit(x_train, x_train,
                epochs=10,
                batch_size=64,
                validation_data=(x_test, x_test))

In [None]:
test_mse = autoencoder.evaluate(x_test, x_test)
print('MSE Test:', test_mse)

In [None]:
decoded_imgs = autoencoder(x_test).numpy()

In [None]:
n = 10
plt.figure(figsize=(20, 4))
for i in range(n):
    # display original
    ax = plt.subplot(2, n, i + 1)
    plt.imshow(x_test[i])
    plt.title("original")
    plt.gray()
    ax.get_xaxis().set_visible(False)
    ax.get_yaxis().set_visible(False)

    # display reconstruction
    ax = plt.subplot(2, n, i + 1 + n)
    plt.imshow(decoded_imgs[i])
    plt.title("reconstructed")
    plt.gray()
    ax.get_xaxis().set_visible(False)
    ax.get_yaxis().set_visible(False)
plt.show()

###  Convolutional autoencoder

We will use Conv2D layer for the encoder and [Transposed convolution layer](https://www.tensorflow.org/api_docs/python/tf/keras/layers/Conv2DTranspose) (sometimes called Deconvolution) for the decoder

```python
tf.keras.layers.Conv2DTranspose(
    filters, kernel_size, strides=(1, 1)
```

- **Input shape**: (batch_size, rows, cols, channels)

- **Output shape**: (batch_size, new_rows, new_cols, filters)
    - new_rows = ((rows - 1) * strides[0] + kernel_size[0] - 2 * padding[0] +output_padding[0])
    - new_cols = ((cols - 1) * strides[1] + kernel_size[1] - 2 * padding[1] +output_padding[1])

In [None]:
x = tf.random.normal((4, 28, 28, 1))
print('x shape: ', x.shape)
x1 = tf.keras.layers.Conv2D(6, (3,3), padding='same')(x)
print('x1 shape: ', x1.shape)
x2 = tf.keras.layers.Conv2DTranspose(1, (3,3), padding='same')(x1)
print('x2 shape: ', x2.shape)
print('x2 shape padding valid: ', tf.keras.layers.Conv2DTranspose(1, (3,3), padding='valid')(x1).shape)

In [None]:

def get_conv_autoencoder(input_shape=(28, 28, 1)):
    inputs = tf.keras.Input(shape=input_shape, name='input')
    # encoder
    encoded = layers.Conv2D(16, (3,3), strides=(2,2),activation='relu', padding='same')(inputs)
    encoded = layers.Conv2D(16, (3,3), strides=(2,2), activation='relu', padding='same')(encoded)
    # decoder
    decoded = layers.Conv2DTranspose(16, kernel_size=3, strides=(2,2), activation='relu', padding='same')(encoded)
    decoded = layers.Conv2DTranspose(16, kernel_size=3, strides=(2,2), activation='relu', padding='same')(decoded)
    decoded = layers.Conv2DTranspose(1, kernel_size=(3,3), activation='sigmoid', padding='same')(decoded)

    # model
    autoencoder = tf.keras.Model(inputs=inputs, outputs=decoded)
    autoencoder.compile(optimizer='adam', loss='mse')
    return autoencoder

autoencoder = get_conv_autoencoder(input_shape=(28, 28, 1))    
autoencoder.summary()

In [None]:
(x_train, _), (x_test, _) = fashion_mnist.load_data()

x_train = x_train.astype('float32') / 255.
x_test = x_test.astype('float32') / 255.

x_train = np.expand_dims(x_train, -1)
x_test = np.expand_dims(x_test, -1)

print(x_train.shape)

In [None]:
autoencoder.fit(x_train, x_train,
                epochs=10,
                batch_size=64,
                validation_data=(x_test, x_test))

In [None]:
test_mse = autoencoder.evaluate(x_test, x_test)
print('MSE Test:', test_mse)

In [None]:
decoded_imgs = autoencoder(x_test).numpy()

In [None]:
n = 10
plt.figure(figsize=(20, 4))
for i in range(n):
    # display original
    ax = plt.subplot(2, n, i + 1)
    plt.imshow(x_test[i])
    plt.title("original")
    plt.gray()
    ax.get_xaxis().set_visible(False)
    ax.get_yaxis().set_visible(False)

    # display reconstruction
    ax = plt.subplot(2, n, i + 1 + n)
    plt.imshow(decoded_imgs[i])
    plt.title("reconstructed")
    plt.gray()
    ax.get_xaxis().set_visible(False)
    ax.get_yaxis().set_visible(False)
plt.show()

## Image denoising



An autoencoder can also be trained to remove noise from images. In the following section, you will create a noisy version of the Fashion MNIST dataset by applying random noise to each image. You will then train an autoencoder using the noisy image as input, and the original image as the target.

Let's reimport the dataset to omit the modifications made earlier.

Adding random noise to the images

In [None]:
sigma = 0.3
x_train_noisy = x_train + sigma * tf.random.normal(shape=x_train.shape) 
x_test_noisy = x_test + sigma * tf.random.normal(shape=x_test.shape) 

x_train_noisy = tf.clip_by_value(x_train_noisy, clip_value_min=0., clip_value_max=1.)
x_test_noisy = tf.clip_by_value(x_test_noisy, clip_value_min=0., clip_value_max=1.)

Plot the noisy images.


In [None]:
n = 10
plt.figure(figsize=(20, 2))
for i in range(n):
    ax = plt.subplot(1, n, i + 1)
    plt.title("original + noise")
    plt.imshow(tf.squeeze(x_test_noisy[i]))
    plt.gray()
plt.show()

In [None]:
autoencoder = get_conv_autoencoder(input_shape=(28, 28, 1))  

In [None]:
autoencoder.fit(x_train_noisy, x_train,
                epochs=10,
                shuffle=True,
                validation_data=(x_test_noisy, x_test))

In [None]:
test_mse = autoencoder.evaluate(x_test, x_test)
print('MSE Test:', test_mse)

In [None]:
decoded_imgs = autoencoder(x_test).numpy()

In [None]:
n = 10
plt.figure(figsize=(20, 4))
for i in range(n):
    # display original
    ax = plt.subplot(2, n, i + 1)
    plt.imshow(x_test_noisy[i])
    plt.title("original")
    plt.gray()
    ax.get_xaxis().set_visible(False)
    ax.get_yaxis().set_visible(False)

    # display reconstruction
    ax = plt.subplot(2, n, i + 1 + n)
    plt.imshow(decoded_imgs[i])
    plt.title("reconstructed")
    plt.gray()
    ax.get_xaxis().set_visible(False)
    ax.get_yaxis().set_visible(False)
plt.show()

We can change the noise during training.

In [None]:
autoencoder = get_conv_autoencoder(input_shape=(28, 28, 1))    

In [None]:
for epoch in range(5):
    print('#'*75)
    print('epoch :', epoch)
    x_train_noisy = x_train + sigma * tf.random.normal(shape=x_train.shape) 
    x_train_noisy = tf.clip_by_value(x_train_noisy, clip_value_min=0., clip_value_max=1.)
    autoencoder.fit(x_train_noisy, x_train,
                epochs=1,
                shuffle=True,
                validation_data=(x_test_noisy, x_test))
    
    decoded_imgs = autoencoder(x_test).numpy()
    n = 10
    plt.figure(figsize=(20, 4))
    for i in range(n):
        # display original
        ax = plt.subplot(2, n, i + 1)
        plt.imshow(x_test_noisy[i])
        plt.title("original")
        plt.gray()
        ax.get_xaxis().set_visible(False)
        ax.get_yaxis().set_visible(False)

        # display reconstruction
        ax = plt.subplot(2, n, i + 1 + n)
        plt.imshow(decoded_imgs[i])
        plt.title("reconstructed")
        plt.gray()
        ax.get_xaxis().set_visible(False)
        ax.get_yaxis().set_visible(False)
    plt.show()


## Practice: Create a denoising autoencoder for the flower's dataset  

In [None]:
import pathlib
import tensorflow as tf
dataset_url = 'https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz'
data_dir = tf.keras.utils.get_file('flower_photos', origin=dataset_url, untar=True)
data_dir = pathlib.Path(data_dir)

In [None]:
image_size = (128,128)
train_ds = tf.keras.preprocessing.image_dataset_from_directory(
  data_dir,
  validation_split=0.2,  # 80%  train, 20% validation
  subset='training',  # 'training' o 'validation', only  with 'validation_split'
  seed=1,
  image_size=image_size,  # Dimension (img_height, img_width) for rescaling
  batch_size=64
)

val_ds = tf.keras.preprocessing.image_dataset_from_directory(
  data_dir,
  validation_split=0.2,
  subset='validation',
  seed=1,
  image_size=image_size,
  batch_size=64)

In [None]:
def prepare_dataset(x_in, sigma=0.2):
    x = x_in / 255
    x_noisy = x + sigma * tf.random.normal(shape=tf.shape(x))
    x_noisy = tf.clip_by_value(x_noisy, clip_value_min=0., clip_value_max=1.)
    return (x_noisy, x)


train_ds = train_ds.map(lambda x, y: prepare_dataset(x))
val_ds = val_ds.map(lambda x, y: prepare_dataset(x))

AUTOTUNE = tf.data.AUTOTUNE

train_ds = train_ds.cache().shuffle(1000).prefetch(buffer_size=AUTOTUNE)
val_ds = val_ds.cache().prefetch(buffer_size=AUTOTUNE)

In [None]:
for x_noisy, x in val_ds.take(1):
    n = 5
    plt.figure(figsize=(20, 8))
    for i in range(n):
        # display original
        ax = plt.subplot(2, n, i + 1)
        plt.imshow(x[i])
        plt.title("original")
        plt.gray()
        ax.get_xaxis().set_visible(False)
        ax.get_yaxis().set_visible(False)

        # display reconstruction
        ax = plt.subplot(2, n, i + 1 + n)
        plt.imshow(x_noisy[i])
        plt.title("noisy")
        plt.gray()
        ax.get_xaxis().set_visible(False)
        ax.get_yaxis().set_visible(False)

In [None]:
def get_conv_autoencoder(input_shape=(28, 28, 1)):
    inputs = tf.keras.Input(shape=input_shape, name='input')
    # encoder
    ...
    # model
    autoencoder = tf.keras.Model(inputs=inputs, outputs=decoded)
    autoencoder.compile(optimizer='adam', loss='mse')
    return autoencoder

autoencoder = get_conv_autoencoder(input_shape=(128,128,3))    


In [None]:
for epoch in range(20):
    print('#' * 60)
    print('epoch', epoch)
    autoencoder.fit(train_ds,
                    epochs=1,
                    validation_data=val_ds,
                    validation_steps=10)
    for x_noisy, x in val_ds.take(1):
        decoded_imgs = autoencoder(x_noisy).numpy()
        n = 5
        plt.figure(figsize=(22, 10))
        for i in range(n):
            # display original
            ax = plt.subplot(2, n, i + 1)
            plt.imshow(x_noisy[i])
            plt.title("original")
            plt.gray()
            ax.get_xaxis().set_visible(False)
            ax.get_yaxis().set_visible(False)

            # display reconstruction
            ax = plt.subplot(2, n, i + 1 + n)
            plt.imshow(decoded_imgs[i])
            plt.title("reconstructed")
            plt.gray()
            ax.get_xaxis().set_visible(False)
            ax.get_yaxis().set_visible(False)
        plt.show()

In [None]:
for x_noisy, x in val_ds.take(1):
    decoded_imgs = autoencoder(x_noisy).numpy()
    n = 5
    plt.figure(figsize=(22, 10))
    for i in range(n):
        # display original
        ax = plt.subplot(2, n, i + 1)
        plt.imshow(x_noisy[i])
        plt.title("original")
        plt.gray()
        ax.get_xaxis().set_visible(False)
        ax.get_yaxis().set_visible(False)

        # display reconstruction
        ax = plt.subplot(2, n, i + 1 + n)
        plt.imshow(decoded_imgs[i])
        plt.title("reconstructed")
        plt.gray()
        ax.get_xaxis().set_visible(False)
        ax.get_yaxis().set_visible(False)