##### Copyright 2019 The TensorFlow Authors.

In [None]:
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Outlier Detection with Autoencoder

In [None]:
from __future__ import absolute_import, division, print_function, unicode_literals

import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf

from tensorflow.keras.datasets import mnist

In [None]:
class Encoder(tf.keras.layers.Layer):
    def __init__(self, intermediate_dim):
        super(Encoder, self).__init__()
        self.hidden_layer = tf.keras.layers.Dense(units=intermediate_dim, activation=tf.nn.relu)
        self.output_layer = tf.keras.layers.Dense(units=intermediate_dim, activation=tf.nn.relu)
    
    def call(self, input_features):
        activation = self.hidden_layer(input_features)
        return self.output_layer(activation)

In [None]:
class Decoder(tf.keras.layers.Layer):
    def __init__(self, intermediate_dim, original_dim):
        super(Decoder, self).__init__()
        self.hidden_layer = tf.keras.layers.Dense(units=intermediate_dim, activation=tf.nn.relu)
        self.output_layer = tf.keras.layers.Dense(units=original_dim, activation=tf.nn.relu)
  
    def call(self, code):
        activation = self.hidden_layer(code)
        return self.output_layer(activation)

In [None]:
class Autoencoder(tf.keras.Model):
  def __init__(self, intermediate_dim, original_dim):
    super(Autoencoder, self).__init__()
    self.loss = []
    self.encoder = Encoder(intermediate_dim=intermediate_dim)
    self.decoder = Decoder(intermediate_dim=intermediate_dim, original_dim=original_dim)

  def call(self, input_features):
    code = self.encoder(input_features)
    reconstructed = self.decoder(code)
    return reconstructed

In [None]:
def loss(preds, real):
  return tf.reduce_mean(tf.square(tf.subtract(preds, real)))

In [None]:
def train(loss, model, opt, original):
  with tf.GradientTape() as tape:
    preds = model(original)
    reconstruction_error = loss(preds, original)
  gradients = tape.gradient(reconstruction_error, model.trainable_variables)
  gradient_variables = zip(gradients, model.trainable_variables)
  opt.apply_gradients(gradient_variables)
  
  return reconstruction_error

In [None]:
def train_loop(model, opt, loss, dataset, epochs):
  for epoch in range(epochs):
    epoch_loss = 0
    for step, batch_features in enumerate(dataset):
      loss_values = train(loss, model, opt, batch_features)
      epoch_loss += loss_values
    model.loss.append(epoch_loss)
    print('Epoch {}/{}. Loss: {}'.format(epoch + 1, epochs, epoch_loss.numpy()))

## Process the dataset

In [None]:
(_x_train, _y_train), (_x_test, _y_test) = mnist.load_data()

_x_train = _x_train.astype(np.float32)
_x_test = _x_test.astype(np.float32)

_x_train = _x_train / 255.
_x_test = _x_test / 255.

_x_train = np.reshape(_x_train, (_x_train.shape[0], 784))
_x_test = np.reshape(_x_test, (_x_test.shape[0], 784))

## Train an autoencoder for a particular class label

In [None]:
label = 4

train_indices = np.argwhere(_y_train == label)[:, 0]
test_indices = np.argwhere(_y_test == label)[:, 0]

x_train = _x_train[train_indices]
y_train = _y_train[train_indices]
x_test = _x_test[test_indices]
y_test = _y_test[test_indices]

## Train the model

In [None]:
# hyperparameters
batch_size = 256
epochs = 20
intermediate_dim = 128

In [None]:
training_dataset = tf.data.Dataset.from_tensor_slices(x_train).batch(batch_size)

model = Autoencoder(intermediate_dim=intermediate_dim, original_dim=784)
opt = tf.keras.optimizers.Adam(learning_rate=1e-2)

train_loop(model, opt, loss, training_dataset, epochs)

## Plot the in-training performance

In [None]:
plt.plot(range(epochs), model.loss)
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.show()

## Visualization

In [None]:
def display(indices):
    number = len(indices)
    plt.figure(figsize=(20, 4))
    for i, index in enumerate(indices):
        # display original
        ax = plt.subplot(2, number, i + 1)
        original = x_test[index].reshape(28, 28)
        reconstructed = model(x_test)[index].numpy().reshape(28, 28)

        # the displayed error is scaled
        error = np.round(np.square(original - reconstructed).sum(), 3)

        plt.imshow(original * 255)
        plt.gray()
        ax.get_xaxis().set_visible(False)
        ax.get_yaxis().set_visible(False)

        # display reconstruction
        ax = plt.subplot(2, number, i + 1 + number)
        plt.imshow(reconstructed * 255)
        plt.gray()
        ax.get_xaxis().set_visible(False)
        ax.get_yaxis().set_visible(False)
        ax.text(0, -1, error, fontdict={'size': 25})
    plt.show()

In [None]:
display(range(10))

# Order training images by reconstruction error

In [None]:
originals = x_test
reconstructeds = model(x_test).numpy()

errors = np.square(originals - reconstructeds).sum(axis=-1).astype(int)

sorted_args = np.argsort(errors)
in_indices = sorted_args[:10]
out_indices = sorted_args[-10:]

In [None]:
display(in_indices)

In [None]:
display(out_indices)