# Introduction: Autoencoders with Keras tutorial

In this notebook, we will follow along with the [Building Autoencoders in Keras](https://blog.keras.io/building-autoencoders-in-keras.html) tutorial. This tutorial was written by Francois Chollet, one of the core developers of Keras.

In [0]:
from keras.layers import Input, Dense
from keras.models import Model
from keras.preprocessing.image import ImageDataGenerator, array_to_img, img_to_array, load_img

import numpy as np

In [0]:
!pip install kaggle &> /dev/null

In [0]:
from google.colab import files
files.upload()

In [0]:
!rm -r /content

In [0]:
!cd ~
!mkdir /content/.kaggle
!mv kaggle.json  /content/.kaggle
!kaggle datasets download -d moltean/fruits

In [0]:
!unzip /content/.kaggle/datasets/moltean/fruits/'*.zip' -d /datasets/ &> /dev/null

# Building the AutoEncoder

In [0]:
# Dimensions for encoding
encoding_dim = 300

# The input image will be flattened to 100 * 100 * 3
input_img = Input(shape=(100 * 100 * 3, ))

# Encoded representation
encoded = Dense(encoding_dim, activation = 'relu')(input_img)

# Decoded output
decoded = Dense(100 * 100 * 3, activation = 'sigmoid')(encoded)

# Complete autoencoder
autoencoder = Model(input_img, decoded)

# Just the encoder
encoder = Model(input_img, encoded)

# Create a separate input for encoding layer
encoded_input = Input(shape = (encoding_dim, ))

# Create a separate decoding layer
decoder_layer = autoencoder.layers[-1]

# Decoder model
decoder = Model(encoded_input, decoder_layer(encoded_input))

In [0]:
autoencoder.compile(optimizer = 'adam', loss = 'binary_crossentropy')

In [0]:
datagen = ImageDataGenerator()

generator = datagen.flow_from_directory('/datasets/fruits-360/Training/',
                                         target_size = (100, 100), batch_size = 1)

index_to_label = {value: key for key, value in generator.class_indices.items()}

images = []
labels = []

i = 0
for batch in generator:
  # First index is the image, second index is the batch
  images.append(batch[0][0])
  labels.append(index_to_label.get(np.where(batch[1][0])[0][0], 'Not found'))
  i += 1
  if i > 10000:
    break

In [0]:
valid_datagen = ImageDataGenerator()

valid_generator = valid_datagen.flow_from_directory('/datasets/fruits-360/Validation/',
                                                    target_size = (100, 100), batch_size = 1)

valid_index_to_label = {value: key for key, value in valid_generator.class_indices.items()}

valid_images = []
valid_labels = []

i = 0
for batch in valid_generator:
  # First index is the image, second index is the batch
  valid_images.append(batch[0][0])
  valid_labels.append(valid_index_to_label.get(np.where(batch[1][0])[0][0], 'Not found'))
  i += 1
  if i > 1000:
    break

In [0]:
len(images)

In [0]:
x_train = np.array(images)
x_train = x_train.astype('float32') / 255.

x_train = x_train.reshape((len(x_train), np.prod(x_train.shape[1:])))
print('Training Images Shape: ', x_train.shape)

In [0]:
x_valid = np.array(valid_images)
x_valid = x_valid.astype('float32') / 255.

x_valid = x_valid.reshape((len(x_valid), np.prod(x_valid.shape[1:])))
print('Validation Images Shape: ', x_valid.shape)

In [0]:
# Train the autoencoder to learn a 32 dimension representation of the images
autoencoder.fit(x_train, x_train, epochs = 50, batch_size = 256,
                shuffle = True)

## Reconstruct Images from the Test Data

In [0]:
# Encode the validation images
encoded_images = encoder.predict(x_valid)

# Decode the validation images
decoded_images = decoder.predict(encoded_images)

In [0]:
import matplotlib.pyplot as plt

n = 10

plt.figure(figsize = (20, 4))
for i in range(n):
  ax = plt.subplot(2, n, i + 1)
  # Need to reshape the image back to 3 color channels
  plt.imshow(x_test[i].reshape((100, 100, 3)))
  ax.get_xaxis().set_visible(False)
  ax.get_yaxis().set_visible(False)
  
  ax = plt.subplot(2, n, i + 1 + n)
  # Reshape the reconstructed image
  plt.imshow(decoded_images[i].reshape((100, 100, 3)))
  
  ax.get_xaxis().set_visible(False)
  ax.get_yaxis().set_visible(False)
 
plt.show()