## DCGAN using MNIST

In [None]:
# Import Dependencies
import numpy as np

# Dataset
from keras.datasets.mnist import load_data

from keras.models import Sequential

# Common Layers
from keras.layers import Dense, Activation, BatchNormalization, Reshape, Flatten 
from keras.optimizers import Adam

# Layers specific to Generator
from keras.layers import Conv2DTranspose

# Layers specific to Discriminator
from keras.layers import Conv2D, LeakyReLU 

# Use this to pass an element-wise TensorFlow/Theano/CNTK function as an activation
import keras.backend as k

# Train Test Split
from sklearn.model_selection import train_test_split

import matplotlib.pyplot as plt
%matplotlib inline

For this code, I'll be starting with the MNIST Dataset and then using the same architecture on other datasets like CIFAR, Faces Dataset etc.

The MNIST dataset will be laoded using the Keras "load_data" functionality. When we load the data using this, it is loaded into training and test set as a tuple of each. i.e. a tuple of training features and labels and a tuple of test features and labels.

### Load Dataset and Visualize Data

In [None]:
# Load Dataset
(X_train, y_train), (X_test, y_test) = load_data()

In [None]:
# Get Data Analysis
print('Training Data: \n')
print('Num. Features: ',len(X_train)), print('Num. Labels: ',len(y_train))
print('Shape of Features: ',X_train.shape), print('Shape of Labels: ',y_train.shape)
print('\n\n')

print('Test Data: \n')
print('Num. Features: ',len(X_test)), print('Num. Labels: ',len(y_test))
print('Shape of Features: ',X_test.shape), print('Shape of Labels: ', y_test.shape)

In [None]:
# Shape of One Image
rand_idx = np.random.randint(0, len(X_train), 1)
print('Shape of one Image: ', X_train[rand_idx].shape)

In [None]:
# Visualize Images
fig, ax = plt.subplots(nrows=3, ncols=3, figsize=(10,5))
for i in range(0,3):
    for j in range(0,3):
        idx = np.random.randint(0, len(X_train), 1)
        idx = idx[0]
        ax[i,j].imshow(X_train[idx], cmap='gray')
        ax[i,j].set_axis_off()
        ax[i,j].title.set_text('Label: {}'.format(y_train[idx]))
        plt.tight_layout()

## Data Preprocessing

According to the paper, the input images were scaled to the range of the tanh activation function i.e. [-1,1]. Using this ensures that each input parameter i.e. the pixels in the case of images have a similar data distribution. This helps as it speeds up the convergence while training the model. So, next, we'll write a function that does just that.

In [None]:
# Taking a random image and looking at its pixel values
idx = np.random.randint(0, len(X_train), 1)
print('Image Index No.: ', idx)
print('\nImage Pixel Values [Before Normalization]: \n\n',X_train[idx])
print('\n\n Shape of Image: ',X_train[idx].shape)

In [None]:
# Function to apply Normalization similar to tanh activation function range i.e. [-1,1]
def normalize_images(img):
    # Reshape Image from 28,28,1 to -1,28,28,1
    img = img.reshape(-1,28,28,1)
    img = np.float32(img)
    img = (img / 255 - 0.5) * 2
    img = np.clip(img, -1,1)
    return img

In [None]:
# Test the Function
img = normalize_images(X_train[3337])
print('Normalized Pixel Values: \n\n', img)
print('\n\n Shape of Normalized Image: ', img.shape)

In [None]:
# Normalize the Training and Test Features
X_train = normalize_images(X_train)
X_test = normalize_images(X_test)

## DCGAN Generator Architecture

The paper describes the DCGAN Architecture as shown in the following image:

In [None]:
# Display Generator Architecture
from IPython.display import Image
Image(filename='./Images/generator.png', width=900) 

The Generator for DCGAN has the following components:

**1. Input Layer [Dense]:**

This layer is where we provide the noise Input using which, over time and training the Generator is able to convert into an image at the output.

This layer is a Fully Connected or Dense layer that takes the pixels of the image in the current case. Since, we know that the MNIST Image has a shape of [28,28,1], so the total number of Input values is 28*28 = 784.

**2. Reshape:**

Before giving the data into the Transposed Convolution function, we need to resize the input data so that we can apply convolution operation on it.

Input Shape: **[1,784]**

After Reshaping, Input to Transpose Convolution Function: **[4,4,1024]**

**3. 2-D Transposed Convolution [Conv2DTranspose]:**

As per the architecture of DCGAN mentioned in the paper, the Generator performs a series of Transposed Convolutions after getting the data from the dense layer and at the final layer we get a 64x64 image from these high level representations.

**4. Activation Functions [ReLU, Tanh]:**

As per the paper, the Transposed Convolution layers use the ReLU activation function whereas we use a tanh activation function for the final layer. Using the bounded activation function allows the model to learn more quickly to saturate and cover the color space of the training distribution.

So, let's define the Generator Function.

In [None]:
# Generator
def generator(inputSize, leakSlope):
    generator_model = Sequential()
    # Input Dense Layer
    generator_model.add(Dense(784, input_shape=(inputSize,)))
    # Reshape the Input, apply Batch Normalization and Leaky ReLU Activation.
    generator_model.add(Reshape(target_shape=(7,7,16)))
    generator_model.add(BatchNormalization())
    generator_model.add(Activation('relu'))
    
    # First Transpose Convolution Layer
    generator_model.add(Conv2DTranspose(filters=32, kernel_size=5, strides=2, padding='same'))
    generator_model.add(BatchNormalization())
    generator_model.add(Activation('relu'))
    
    # Second Transpose Convolution Layer
    # generator_model.add(Conv2DTranspose(filters=256, kernel_size=5, strides=2, padding='same'))
    # generator_model.add(BatchNormalization())
    # generator_model.add(Activation('relu'))
    
    # Third Transpose Convolution Layer
    # generator_model.add(Conv2DTranspose(filters=128, kernel_size=5, strides=2, padding='same'))
    # generator_model.add(BatchNormalization())
    # generator_model.add(Activation('relu'))
    
    # Since, we are using MNIST Data which has only 1 channel, so filter for Generated Image = 1
    generator_model.add(Conv2DTranspose(filters=1, kernel_size=5, strides=2, padding='same'))
    generator_model.add(Activation('tanh'))
    
    # Print Model Summary
    generator_model.summary()
    
    return generator_model

## DCGAN Discriminator Architecture

The paper describes the DCGAN Architecture as shown in the following image:

In [None]:
# Display Discriminator Architecture
from IPython.display import Image
Image(filename='./Images/discriminator.png', width=900) 

The discriminator for DCGAN has the following components:

**1. 2-D Convolution [Conv2D]:**

Since, the aim of the discriminator is to classify images between real and fake, it takes in the complete image generated by the generator and try to tell that whether it is a true or a fake image. Hence, CNN comes into play as they are the state of the art networks for image classification. So, we use Convolution filters for the first 3 layers as opposed to Transpose Convolution in the Generator.


**2. Activation Functions [LeakyReLU]:**

As per the paper, the Convolution layers use the LeakyReLU activation function throughout the discriminator layers. Using the bounded activation function allows the model to learn more quickly to saturate and cover the color space of the training distribution.

In [None]:
# Discriminator
def discriminator(leakSlope):
    discriminator_model = Sequential()
    
    # Input and First Conv2D Layer
    discriminator_model.add(Conv2D(filters=32, kernel_size=5, strides=2, padding='same', input_shape=(28,28,1)))
    discriminator_model.add(LeakyReLU(alpha=leakSlope))
    
    # Second Conv2D Layer
    discriminator_model.add(Conv2D(filters=16, kernel_size=5, strides=2, padding='same'))
    discriminator_model.add(BatchNormalization())
    discriminator_model.add(LeakyReLU(alpha=leakSlope))
    
    # Third Conv2D Layer
    # discriminator_model.add(Conv2D(filters=256, kernel_size=5, strides=2, padding='same'))
    # discriminator_model.add(BatchNormalization())
    # discriminator_model.add(LeakyReLU(alpha=leakSlope))
    
    # Fully Connected Layer
    discriminator_model.add(Flatten())
    discriminator_model.add(Dense(784))
    discriminator_model.add(BatchNormalization())
    discriminator_model.add(LeakyReLU(alpha=leakSlope))
    
    # Output Layer
    discriminator_model.add(Dense(1))
    discriminator_model.add(Activation('sigmoid'))
    
    # Model Summary
    discriminator_model.summary()
    
    return discriminator_model

Now since that we have defined the Generator and the Discriminator for the DCGAN Architecture, let's bring them together and complete the DCGAN Architecture.

The complete DCGAN Architecture looks like as image below:

In [None]:
# Complete DCGAN Architecture
from IPython.display import Image
Image(filename='./Images/complete_dcgan.png', width=900) 

In [None]:
# DCGAN Architecture
def DCGAN(sample_size, generator_lr, generator_momentum_beta, discriminator_lr, discriminator_momentum_beta, leakyAlpha):
    # Generator
    gen = generator(inputSize=sample_size, leakSlope=leakyAlpha)
    
    # Discriminator
    disc = discriminator(leakSlope=leakyAlpha)
    
    # Discriminator Optimizer
    disc.compile(optimizer=Adam(lr=discriminator_lr, beta_1=discriminator_momentum_beta), loss='binary_crossentropy')
    
    # DCGAN
    dcgan = Sequential()
    dcgan.add(gen)
    disc.trainable = False
    dcgan.add(disc)
    dcgan.compile(optimizer=Adam(lr=generator_lr, beta_1=generator_momentum_beta), loss='binary_crossentropy')
    
    return dcgan, gen, disc

In [None]:
# Data Normalization with Zero Mean and Unit Variance
def latent_samples(num_samples, sample_size):
    return np.random.normal(loc=0, scale=1, size=(num_samples, sample_size))

In [None]:
# ------------------------------- TEST --------------------------------
def show_results(losses):
    labels = ['Classifier', 'Discriminator', 'Generator']
    losses = np.array(losses)    
    
    fig, ax = plt.subplots()
    plt.plot(losses.T[0], label='Discriminator')
    plt.plot(losses.T[1], label='Generator')
    plt.title("Training Losses")
    plt.legend()
    plt.show()

In [None]:
# ------------------------------------- TEST --------------------------------------------
def show_images(generated_images):
    n_images = len(generated_images)
    rows = 4
    cols = n_images//rows
    
    plt.figure(figsize=(cols, rows))
    for i in range(n_images):
        img = deprocess(generated_images[i])
        plt.subplot(rows, cols, i+1)
        plt.imshow(img, cmap='gray')
        plt.xticks([])
        plt.yticks([])
    plt.tight_layout()
    plt.show()

In [None]:
# ------------------------------------ TEST --------------------------------------------
def make_labels(size):
    return np.ones([size, 1]), np.zeros([size, 1])

In [None]:
# ----------------------------------------------- TEST ------------------------------------------
def train(
    g_learning_rate,   # learning rate for the generator
    g_beta_1,          # the exponential decay rate for the 1st moment estimates in Adam optimizer
    d_learning_rate,   # learning rate for the discriminator
    d_beta_1,          # the exponential decay rate for the 1st moment estimates in Adam optimizer
    leaky_alpha,
    smooth=0.1,        # label smoothing
    sample_size=100,   # latent sample size (i.e. 100 random numbers)
    epochs=100,
    batch_size=128,    # train batch size
    eval_size=16,      # evaluate size
    show_details=True):
    
    # labels for the batch size and the test size
    y_train_real, y_train_fake = make_labels(batch_size)
    y_eval_real,  y_eval_fake  = make_labels(eval_size)

    # create a GAN, a generator and a discriminator
    gan, generator, discriminator = DCGAN(
        sample_size, 
        g_learning_rate, 
        g_beta_1,
        d_learning_rate,
        d_beta_1,
        leaky_alpha)

    losses = []
    for e in range(epochs):
        for i in range(len(X_train)//batch_size):
            # real MNIST digit images
            X_batch_real = X_train[i*batch_size:(i+1)*batch_size]

            # latent samples and the generated digit images
            latent_sample = latent_samples(batch_size, sample_size)
            X_batch_fake = generator.predict_on_batch(latent_sample)

            # train the discriminator to detect real and fake images
            #make_trainable(discriminator, True)
            discriminator.trainable = True
            discriminator.train_on_batch(X_batch_real, y_train_real * (1 - smooth))
            discriminator.train_on_batch(X_batch_fake, y_train_fake)

            # train the generator via GAN
            #make_trainable(discriminator, False)
            discriminator.trainable = False
            gan.train_on_batch(latent_sample, y_train_real)

        # evaluate
        X_eval_real = X_test[np.random.choice(len(X_test), eval_size, replace=False)]

        latent_sample = latent_samples(eval_size, sample_size)
        X_eval_fake = generator.predict_on_batch(latent_sample)

        d_loss  = discriminator.test_on_batch(X_eval_real, y_eval_real)
        d_loss += discriminator.test_on_batch(X_eval_fake, y_eval_fake)
        g_loss  = gan.test_on_batch(latent_sample, y_eval_real) # we want the fake to be realistic!

        losses.append((d_loss, g_loss))

        print("Epoch:{:>3}/{} Discriminator Loss:{:>7.4f} Generator Loss:{:>7.4f}".format(e+1, epochs, d_loss, g_loss))
        
        if show_details and (e+1)%10==0:
            show_images(X_eval_fake)
    if show_details:
        show_results(losses)
    return generator

In [None]:
train(g_learning_rate=0.0001, g_beta_1=0.9, d_learning_rate=0.001, d_beta_1=0.9, leaky_alpha=0.01)