### Deep Convolutional GANs (DCGANs) for MNIST digits

#### Summary of how DCAGNs build on top of the basic GANs

1. Rather than fully-connected or pooling layers, we use strided convoltuions (for discriminator) and transposed convolutions (for generator). 
2. We use batch-normalization layers in both generator and discriminator to faster and more stable training. 
3. We use LeakyReLU as the alternative of ReLU to prevent the zero learning problem of ReLU. 

#### Git repo

In [None]:
!git clone https://github.com/Daisuke0713/ANN-final.git
%cd ./ANN-final/DCGANs

#### Libraries

In [1]:
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
import tensorflow_probability as tfp
from tensorflow import keras, random, nn
from tensorflow import data
from keras import Model, Sequential
from keras.losses import BinaryCrossentropy
from keras.metrics import BinaryAccuracy
from keras.optimizers import Adam
from keras import initializers 
from keras.layers import Dense, Conv2D, Conv2DTranspose, Reshape, Flatten, BatchNormalization, ReLU, LeakyReLU, Dropout, InputLayer

2023-04-19 22:45:44.844403: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2 AVX AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


#### GPU/CPU Settings

In [2]:
gpu_device = tf.test.gpu_device_name()
cpu_device = '/cpu:0'
# set CPU the device for now
device = cpu_device

2023-04-19 22:45:51.537370: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2 AVX AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [None]:
if gpu_device != '/device:GPU:0':
  raise SystemError('GPU device not found')
print('Found GPU at: {}'.format(gpu_device))

#### Config Hyper-parameters

In [3]:
epochs = 50
latent_dim = 2

batch_size = 32
learning_rate = 1e-4

#### Data Pre-processing

In [4]:
# get data
(x_train_digits, _), (x_test_digits, _) = keras.datasets.mnist.load_data()

# process data
def preprocess_digits_image_data(data):
    # reshape, normalize, and binarize (gray) data
    shape = data.shape
    image_data = (data.reshape((shape[0], shape[1], shape[2], 1)) / 255. - 0.5) * 2.0
    return image_data.astype('float32')

x_train_digits = preprocess_digits_image_data(x_train_digits)
x_test_digits = preprocess_digits_image_data(x_test_digits)

#### Split into batches

In [5]:
def split_batch(image_data, batch_size):
    data_size = len(image_data)
    return (data.Dataset.from_tensor_slices(image_data).shuffle(data_size).batch(batch_size))

# split data into batches
x_train_digits = split_batch(x_train_digits, batch_size)
x_test_digits  = split_batch(x_test_digits, batch_size)

#### Define GANs

Generator

In [6]:
generator = Sequential([
    # input
    Dense(units=7*7*128, input_shape=(latent_dim,)),
    Reshape((7,7,128)),
    # conv 1
    Conv2DTranspose(filters=64, kernel_size=3, strides=2, padding='same'),
    BatchNormalization(),
    ReLU(max_value=0.2),
    # conv 2
    Conv2DTranspose(filters=64, kernel_size=3, strides=1, padding='same'),
    BatchNormalization(),
    ReLU(max_value=0.2),
    # final tanh    
    Conv2DTranspose(filters=1, kernel_size=3, strides=2, padding='same', activation='tanh')
])

Discriminator

In [7]:
discriminator = Sequential([
    # conv 1
    Conv2D(filters=64, kernel_size=3, strides=2, padding='same', input_shape=(28,28,1)),
    LeakyReLU(0.2),
    # conv 2
    Conv2D(filters=64, kernel_size=3, strides=2, padding='same'),
    # BatchNormalization(),
    LeakyReLU(0.2),
    # output
    Flatten(),
    Dense(1, activation='sigmoid')
])

Merge G and D

In [8]:
discriminator.compile(optimizer=Adam(learning_rate=learning_rate), loss=BinaryCrossentropy(), metrics=[BinaryAccuracy()])
discriminator.trainable = False
gan = Sequential([
    generator, 
    discriminator
])
gan.compile(optimizer=Adam(learning_rate=learning_rate), loss=BinaryCrossentropy(), metrics=[BinaryAccuracy()])

#### Train GANs

In [9]:
def generate():
    with tf.device(device):
        ramdom_vector_z = np.random.normal(loc=0, scale=1, size=(16, latent_dim))
        generated = generator(ramdom_vector_z)
        return generated

def save_img(imgs, filename):
    plt.figure(figsize=(4,4))
    for i in range(16):
        plt.subplot(4, 4, i + 1)
        plt.imshow(imgs[i, :, :, 0], cmap='gray')
        plt.axis('off')
    plt.savefig(f'./images/{filename}.png')

In [None]:
# labels
real = np.ones(shape=(batch_size, 1))
fake = np.zeros(shape=(batch_size, 1))

with tf.device(device_name=device):
    for epoch in range(epochs):
        batch = 0
        for real_x in x_train_digits:
            '''discriminator'''

            # train on real data
            d_loss_real = discriminator.train_on_batch(x=real_x, y=real)

            # train on fake data
            z = np.random.normal(loc=0, scale=1, size=(batch_size, latent_dim))
            fake_x = generator.predict_on_batch(x=z)
            d_loss_fake = discriminator.train_on_batch(x=fake_x, y=fake)
            d_loss = np.mean(d_loss_real + d_loss_fake)

            '''generator'''
            g_loss = gan.train_on_batch(x=z, y=real)

            batch += 1
            # print(f'[Progress: {100*batch//len(x_train_digits)}%] Epoch: {epoch}', end='\r')
        print(f'Epoch: {epoch}, Loss G: {g_loss[-1]:.3f}, Loss D: {d_loss:.3f}')
        
        # sample and save images
        if epoch in [0,10,20,30,40,49]:
            save_img(generate(), f'samples_latent{latent_dim}_epoch{epoch}')

gan.save('./saved')
# Note 1 min per epoch

In [None]:
!zip -r ./images.zip images
!zip -r ./saved_models.zip saved_models

In [11]:
# visualize latent dim
def show_latent_image(n):
    std_normal = tfp.distributions.Normal(0,1)
    x = std_normal.quantile(np.linspace(0.05, 0.95, n))
    y = std_normal.quantile(np.linspace(0.05, 0.95, n))
    size = 28 * n
    latent_image = np.zeros(shape=(size, size))

    for i, y_i in enumerate(x):
        for j, x_i in enumerate(y):
            latent_z = np.array([[x_i, y_i]])
            generated_x = tf.reshape(generator(latent_z)[0], (28,28)).numpy()
            latent_image[i*28:(i+1)*28, j*28:(j+1)*28] = generated_x

    plt.figure(figsize=(10,10))
    plt.imshow(latent_image, cmap='gray')
    plt.axis('Off')
    plt.savefig(f'./latent.png')
    plt.show()

In [None]:
# show the learned latent space
show_latent_image(n=30)