<a href="https://colab.research.google.com/github/aneesh-kaitou1412/vae-mnist/blob/master/Convolutional_VAE_MNIST_denoising.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Convolutional VAE for Denoising Images

In [26]:
import tensorflow as tf
from keras import layers
from keras.datasets import mnist
from keras import backend as K
from keras.models import Model
import numpy as np
import matplotlib.pyplot as plt

## Loading MNIST and making a Noisy Dataset

In [27]:
def preprocess(images):
  images = np.expand_dims(images, -1)
  images = images.astype('float32') / 255
  return images

def generate_noisy(images, 
                   loc=0.5, 
                   scale=0.5, 
                   min_value=0.0, 
                   max_value=1.0,
                   percent_distortion=0.65):
  noise = np.random.normal(loc=loc, scale=scale, size=images.shape)
  return np.clip(images + percent_distortion * noise, 
                 min_value, 
                 max_value)

In [28]:
(x_train, y_train), (x_test, y_test) = mnist.load_data()

x_train = preprocess(x_train)
x_test = preprocess(x_test)

x_train_noisy = generate_noisy(x_train)

x_test_noisy = generate_noisy(x_test)

## Build the Variational Autoencoder Model

In [29]:
# Network Parameters
input_shape = (x_train.shape[1], x_train.shape[2], x_train.shape[3],)
batch_size = 128
latent_dim = 2

# Encoder Decoder number of Convolutional Layers and Filters, 
# Kernel Sizes, Stride Sizes
filter_sizes = [64, 32, 16]
kernel_sizes = [3, 3, 3]
stride_sizes = [2, 2, 1]

encoding_activation='selu'
decoding_activation='relu'

In [30]:
# Build the Encoder Model

class SamplingLayer(layers.Layer):
  """ Sampling Latent Space Vector using Reparametrization Trick """
  def call(self, inputs):
    z_mean, z_log_var = inputs
    batch_size = tf.shape(z_mean)[0]
    latent_dim = tf.shape(z_mean)[1]
    epsilon = K.random_normal(shape=(batch_size, latent_dim))
    return z_mean + tf.exp(0.5 * z_log_var) * epsilon


inputs = layers.Input(shape=input_shape, name='encoder_input')
x = inputs

# Stack Convolutional Encoding Layers
for filters, kernel_size, strides in \
    zip(filter_sizes, kernel_sizes, stride_sizes):
  x = layers.Conv2D(filters=filters,
                    kernel_size=kernel_size,
                    strides=strides,
                    activation=encoding_activation,
                    padding='same')(x)

# Final Shape before Dense Unit to build Decoder
encoder_out_shape = K.int_shape(x)
encoder_out_shape = (encoder_out_shape[1], 
                     encoder_out_shape[2], 
                     encoder_out_shape[3])

# Generate Latent Space Paramters
x = layers.Flatten()(x)
z_mean = layers.Dense(units=latent_dim, name='z_mean')(x)
z_log_var = layers.Dense(units=latent_dim, name='z_log_var')(x)
z = SamplingLayer()([z_mean, z_log_var])

# Make the Encoder Model
encoder = Model(inputs, [z_mean, z_log_var, z], name='encoder')
encoder.summary()

Model: "encoder"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
encoder_input (InputLayer)      [(None, 28, 28, 1)]  0                                            
__________________________________________________________________________________________________
conv2d_18 (Conv2D)              (None, 14, 14, 64)   640         encoder_input[0][0]              
__________________________________________________________________________________________________
conv2d_19 (Conv2D)              (None, 7, 7, 32)     18464       conv2d_18[0][0]                  
__________________________________________________________________________________________________
conv2d_20 (Conv2D)              (None, 7, 7, 16)     4624        conv2d_19[0][0]                  
____________________________________________________________________________________________

In [31]:
# Build the Decoder Model

z = layers.Input(shape=(latent_dim,), name='decoder_input')
x = layers.Dense(np.prod(encoder_out_shape))(z)
x = layers.Reshape(encoder_out_shape)(x)

# Stack Deconvolutional Decoding Layers
for filters, kernel_size, strides in \
    list(zip(filter_sizes, kernel_sizes, stride_sizes))[::-1]:
  x = layers.Conv2DTranspose(filters=filters,
                             kernel_size=kernel_size,
                             strides=strides,
                             activation=decoding_activation,
                             padding='same')(x)

# Finally make it into 28x28x1 and then sigmoid activation
x = layers.Conv2DTranspose(filters=1,
                           kernel_size=3,
                           padding='same')(x)
outputs = layers.Activation('sigmoid', name='decoder_output')(x)

# Make the Decoder Model
decoder = Model(z, outputs, name='decoder')
decoder.summary()

Model: "decoder"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
decoder_input (InputLayer)   [(None, 2)]               0         
_________________________________________________________________
dense_5 (Dense)              (None, 784)               2352      
_________________________________________________________________
reshape_5 (Reshape)          (None, 7, 7, 16)          0         
_________________________________________________________________
conv2d_transpose_20 (Conv2DT (None, 7, 7, 16)          2320      
_________________________________________________________________
conv2d_transpose_21 (Conv2DT (None, 14, 14, 32)        4640      
_________________________________________________________________
conv2d_transpose_22 (Conv2DT (None, 28, 28, 64)        18496     
_________________________________________________________________
conv2d_transpose_23 (Conv2DT (None, 28, 28, 1)         577 

In [32]:
# Build the Autoencoder Model by combining Encoder and Decoder Model

# Define the Losses
def autoencoder_loss(x_actual_batch, x_decoded_batch):          
  reconstruction_loss = tf.reduce_mean(
      keras.losses.binary_crossentropy(x_actual_batch, 
                                       x_decoded_batch)
  )
  reconstruction_loss *= np.prod(input_shape)
  kl_loss = 1 + z_log_var - tf.square(z_mean) - tf.exp(z_log_var)
  kl_loss = tf.reduce_mean(kl_loss)
  kl_loss *= -0.5
  total_loss = reconstruction_loss + kl_loss
  return total_loss

x = encoder(inputs)
reconstruction = decoder(x[2])

autoencoder = Model(inputs, reconstruction, name='autoencoder')
autoencoder.summary()

Model: "autoencoder"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
encoder_input (InputLayer)   [(None, 28, 28, 1)]       0         
_________________________________________________________________
encoder (Functional)         [(None, 2), (None, 2), (N 26868     
_________________________________________________________________
decoder (Functional)         (None, 28, 28, 1)         28385     
Total params: 55,253
Trainable params: 55,253
Non-trainable params: 0
_________________________________________________________________
