In [None]:
from IPython.display import clear_output
from random import shuffle
import time

In [None]:
import tensorflow as tf
tf.keras.backend.set_floatx('float32')

In [None]:
print(tf.__version__)

In [None]:
import numpy as np
import pandas as pd
import os
import matplotlib.pyplot as plt
from scipy.stats import norm
from tqdm import tqdm_notebook

from tensorflow.keras.layers import Input, Dense, Lambda, Flatten, Reshape, \
  Conv2D, Conv2DTranspose, UpSampling2D, BatchNormalization, \
  LeakyReLU, Dropout
from tensorflow.keras.models import Model
from tensorflow.keras import metrics
from tensorflow.keras.datasets import fashion_mnist

We are going to use FashionMNIST for this session. The objective will be to build a generative adversarial network for this dataset.

In [None]:
(x_train, y_train), (x_test, y_test) = fashion_mnist.load_data()

In [None]:
plt.figure(figsize=(16, 8))
for i in range(0, 18):
    plt.subplot(3, 6, i + 1)
    plt.imshow(x_train[i], cmap="gray")
    plt.axis("off")
plt.show()

In [None]:
x_train = x_train.astype('float32') / 255.
x_test = x_test.astype('float32') / 255.

In [None]:
original_dim = 784
seed_dim = 32
intermediate_dim = 256
batch_size = 8

In [None]:
def adam_optimizer(lr=0.0001):
    return tf.keras.optimizers.Adam(lr=lr, beta_1=0.5)

## A) Generator

In [None]:
# Exercise:
# * create a simple fully-connected generator that takes noise as input and generate an image
# * plot the output of the generator for some input sample
# * Hint: we need pixels between -1 and 1. What is the correct activation for the last layer ?

def create_generator():
    
    inp = Input(shape=(seed_dim,))
    x = Dense(units=256,input_dim=seed_dim)(inp)
    x = LeakyReLU(0.2)(x)
    x = Dense(units=512)(x)
    x = LeakyReLU(0.2)(x)
    x = Dense(units=original_dim, activation='tanh')(x)
    out = Reshape((28, 28, 1))(x)

    model = Model(
        inputs=inp,
        outputs=out,
        name="generator"
    )
    ####

    model.compile(loss='binary_crossentropy', optimizer=adam_optimizer())
    return model

In [None]:
generator = create_generator()
print(generator.summary())

In [None]:
def test_generator(generator):
    """
    Test that the generator generates the correct shape
    """
    seed = tf.random.normal(shape=(batch_size, seed_dim),
                            mean=0., stddev=1.)
    data = generator(seed).numpy()
    
    if data.shape != (batch_size, 28, 28, 1):
        raise RuntimeError(f"Shape is {data.shape} (expected {(batch_size, 28, 28, 1)})")
    
test_generator(generator)

In [None]:
####
def generate_and_plot(generator, inputs=None):
    
    if inputs is None:
        real_images = x_train[:8]
    else:
        real_images = inputs

    real_images = real_images.reshape((-1, 28, 28, 1))
    seed = tf.random.normal(shape=(batch_size, seed_dim),
                            mean=0., stddev=1.)

    data = generator(seed).numpy()
    data = np.concatenate(
        (data, real_images), axis=0)

    plt.figure(figsize=(20, 8))
    for i in range(0, 16):
        plt.subplot(4, 8, i + 1)
        plt.imshow(data[i, :].reshape(28, 28), cmap="gray")
        plt.axis("off")
    plt.show()
    
generate_and_plot(generator)
####

## B) Discriminator

In [None]:
# Exercise:
# * Create a simple fully-connected discriminator model
#   using a funnel 512 -> 256 -> 128
# * Train it to check that it can easily make the difference between 
#   your random generator and the real images

def create_discriminator() -> Model:
    
    inp = Input(shape=(28, 28, 1))
    x = Flatten()(inp)
    x = Dense(units=512,input_dim=original_dim)(x)
    x = LeakyReLU(0.2)(x)
    x = Dropout(0.3)(x)
    x = Dense(units=256)(x)
    x = LeakyReLU(0.2)(x)
    x = Dropout(0.3)(x)
    x = Dense(units=128)(x)
    x = LeakyReLU(0.2)(x)
    out = Dense(units=1)(x)
    
    model = Model(inputs=inp, outputs=out, name="discriminator")
    ####
    
    model.compile(loss='binary_crossentropy', optimizer=adam_optimizer())
    return model

In [None]:
discriminator = create_discriminator()
print(discriminator.summary())

In [None]:
n_train = 1000
seed = tf.random.normal(shape=(n_train, seed_dim), mean=0., stddev=1.)
fake_data = generator(seed).numpy()
discr_train = tf.concat([x_train[:n_train].reshape(-1, 28, 28, 1), fake_data], axis=0)
y_train = np.concatenate([np.zeros(n_train), np.ones(n_train)])

seed = tf.random.normal(shape=(10000, seed_dim),
                            mean=0., stddev=1.)
fake_data = generator(seed).numpy()
discr_test = tf.concat([x_test.reshape(-1, 28, 28, 1), fake_data], axis=0)
y_test = np.concatenate([np.zeros(10000), np.ones(10000)])
print(discr_test.shape)

In [None]:
discriminator.fit(x=discr_train, y=y_train)

In [None]:
from sklearn.metrics import roc_auc_score
auc = roc_auc_score(y_score=discriminator.predict(discr_test), y_true=y_test)
print(f"AUC is {auc}")

## C) GAN model training

In [None]:
# Exercise 4:
# * Implement the training of the GAN. What do you observe ?

def training(
    iter=2,
    batch_size=128,
    discr_func=create_discriminator,
    gen_func=create_generator,
    seed_dim=seed_dim,
    soft_labels=True,
    non_saturated=True
):

    # Creating GAN
    generator= gen_func()
    discriminator= discr_func()
    
    #gan = create_gan(discriminator, generator)

    generator_opt = tf.keras.optimizers.Adam(1e-4)
    discriminator_opt = tf.keras.optimizers.Adam(1e-4)
    start_time = time.time()
    losses_generator = list()
    losses_discriminator = list()

    for my_iter in range(iter):
        #Loading the batch
        real_iter = my_iter % (len(x_train)//batch_size)
        real_data  = x_train[real_iter*batch_size:(real_iter+1)*batch_size]
        real_data = real_data.reshape(-1, 28, 28, 1)

        # Construct different batches of  real and fake data 
        # Train discriminator
        with tf.GradientTape() as d:
          d.watch(discriminator.trainable_variables)
          #Add code here          
          noise = np.random.normal(0, 1, [batch_size, seed_dim])
          generated_images = generator(noise)
          
          d_real = tf.math.sigmoid(discriminator(real_data))
          d_fake = tf.math.sigmoid(discriminator(generated_images))
          
          loss_discriminator = -(tf.reduce_mean(tf.math.log(d_real)) + tf.reduce_mean(tf.math.log(1-d_fake)))
          # Backward pass
          grads = d.gradient(loss_discriminator, discriminator.trainable_variables)
          discriminator_opt.apply_gradients(
              zip(grads, discriminator.trainable_variables)
          )
        losses_discriminator.append(loss_discriminator)

        # Train generator
        noise = np.random.normal(0, 1, [batch_size, seed_dim])
        with tf.GradientTape() as g:
          g.watch(generator.trainable_variables)
          # Forward pass
          d_fake = tf.math.sigmoid(discriminator(generator(noise)))
          loss_generator = tf.reduce_mean(tf.math.log(1-d_fake)) ##Vanilla: -loss_discrim, minimize proba to be classified as fake
          grads = g.gradient(loss_generator, generator.trainable_variables)
          generator_opt.apply_gradients(
              zip(grads, generator.trainable_variables)
          )
        losses_generator.append(loss_generator.numpy())

        if my_iter % 100 == 0:
            clear_output()
            print(f"Iter {my_iter} ({real_iter})")
            if start_time is not None:
                time_spent = time.time() - start_time
                print(f"(Avg {time_spent/100} seconds per iteration)")
            start_time = time.time()
            print(my_iter, loss_discriminator, tf.reduce_mean(d_real), tf.reduce_mean(d_fake))
            generate_and_plot(generator, real_data)
        
    return losses_generator, losses_discriminator

In [None]:
#EXERCISES
#1) Write code for GANs
#2) Add soft labels 
#3) Write non-saturated GANs

In [None]:
losses_generator, losses_discriminator = training(10000, 64, soft_labels=False, non_saturated=False)

In [None]:
#losses_generator, losses_discriminator = training(5000, 64, soft_labels=True, non_saturated=False)

In [None]:
#losses_generator, losses_discriminator = training(10000, 64, soft_labels=False, non_saturated=True)

In [None]:
fig = plt.figure(figsize=(20,10))
ax = fig.subplots()
ax.plot(pd.Series(losses_generator).rolling(50).mean(), label="Generator")
ax.plot(pd.Series(losses_discriminator).rolling(50).mean(), label="Discriminator")
plt.legend()
fig.show()

## D) Switching to CNNs

In [None]:
def create_generator_cnn(seed_dim=seed_dim):
 

    inp = Input(shape=(seed_dim,))
    x = Dense(7*7*256, use_bias=False)(inp)
    x = BatchNormalization()(x)
    x = LeakyReLU()(x)

    x = Reshape((7, 7, 256))(x)
    x = Conv2DTranspose(128, (5,5), strides=(1,1), padding='same', use_bias=False)(x)
    x = BatchNormalization()(x)
    x = LeakyReLU()(x)

    x = Conv2DTranspose(64, (5,5), strides=(2,2), padding='same', use_bias=False)(x)
    x = BatchNormalization()(x)
    x = LeakyReLU()(x)

    x = Conv2DTranspose(1, (5,5), strides=(2,2), padding='same', use_bias=False, activation='tanh')(x)

    model = Model(
        inputs=inp,
        outputs=x,
        name="generator"
    )

    model.compile(loss='binary_crossentropy', optimizer=adam_optimizer())
    return model

create_generator_cnn().summary()

In [None]:
def create_discriminator_cnn():
    inp = Input(shape=(28, 28, 1))
    x = Conv2D(64, (5,5), strides=(2,2), padding='same', input_shape=[28,28,1])(inp)
    x = LeakyReLU()(x)
    x = Dropout(0.3)(x)
    x = Conv2D(128,(5,5), strides=(2,2), padding='same')(x)
    x = LeakyReLU()(x)
    x = Dropout(0.3)(x)
    x = Flatten()(x)
    x = Dense(1)(x)

    model = Model(
        inputs=inp,
        outputs=x,
        name="discriminator"
    )

    model.compile(loss='binary_crossentropy', optimizer=adam_optimizer())
    return model

create_discriminator_cnn().summary()

In [None]:
losses_generator, losses_discriminator = training(1000, 64, create_discriminator_cnn, create_generator_cnn, soft_labels=False, non_saturated=False)

In [None]:
fig = plt.figure(figsize=(20,10))
ax = fig.subplots()
ax.plot(pd.Series(losses_generator).rolling(30).mean(), label="Generator")
ax.plot(pd.Series(losses_discriminator).rolling(30).mean(), label="Discriminator")
plt.legend()
fig.show()

## E) OPT. Wasserstein Gan

Exercise: Implement a Wasserstein Gan


In [None]:
# Exercise 4:
# * Implement the training of the GAN. What do you observe ?
def gradient_penalty(discriminator, x, x_gen):
        epsilon = tf.random.uniform([x.shape[0], 1, 1, 1], 0.0, 1.0)
        x_hat = epsilon * x + (1 - epsilon) * x_gen
        with tf.GradientTape() as t:
            t.watch(x_hat)
            d_hat = discriminator(x_hat)
        gradients = t.gradient(d_hat, x_hat)
        ddx = tf.sqrt(tf.reduce_sum(gradients ** 2, axis=[1, 2]))
        d_regularizer = tf.reduce_mean((ddx - 1.0) ** 2)
        return d_regularizer

def training_wasserstein(
    iter=2,
    batch_size=128,
    discr_func=create_discriminator,
    gen_func=create_generator,
    seed_dim=seed_dim,
    cnn = True
):

    # Creating GAN
    generator= gen_func()
    discriminator= discr_func()
    gradient_penalty_weight = 10
    generator_opt = tf.keras.optimizers.Adam(learning_rate=1e-4)
    discriminator_opt = tf.keras.optimizers.Adam(learning_rate=1e-3)
    start_time = time.time()
    losses_generator = list()
    losses_discriminator = list()

    for my_iter in range(iter):

        #Loading the batch
        real_iter = my_iter % (len(x_train)//batch_size)
        real_data  = x_train[real_iter*batch_size:(real_iter+1)*batch_size]
        real_data = real_data.reshape(-1, 28, 28, 1)

        # Train discriminator
        noise = np.random.normal(0, 1, [batch_size, seed_dim])
        generated_images = generator(noise)
        with tf.GradientTape() as g:
          g.watch(discriminator.trainable_variables)
          
          pred_real = discriminator(real_data)
          pred_fake = discriminator(generated_images)
          
          loss_discriminator = (tf.reduce_mean(pred_real) - tf.reduce_mean(pred_fake))
          loss_discriminator -= gradient_penalty_weight*gradient_penalty(discriminator, real_data, generated_images)
          grads = g.gradient(-loss_discriminator, discriminator.trainable_variables)
          discriminator_opt.apply_gradients(
              zip(grads, discriminator.trainable_variables)
          )
        losses_discriminator.append(loss_discriminator.numpy())
        
        # Train generator
        noise = np.random.normal(0, 1, [batch_size, seed_dim])
        with tf.GradientTape() as g:
          g.watch(generator.trainable_variables)
          pred = discriminator(generator(noise))
          loss_generator = tf.reduce_mean(pred)
          grads = g.gradient(-loss_generator, generator.trainable_variables)
          generator_opt.apply_gradients(
             zip(grads, generator.trainable_variables)
          )
        losses_generator.append(loss_generator.numpy())
        
        if my_iter % 100 == 0:
            clear_output()
            print(f"Iter {my_iter} ({real_iter})")
            if start_time is not None:
                time_spent = time.time() - start_time
                print(f"(Avg {time_spent/100} seconds per iteration)")
                print(tf.reduce_mean(pred_real), tf.reduce_mean(pred_fake))
            start_time = time.time()
            generate_and_plot(generator, real_data)
        
    return losses_generator, losses_discriminator

In [None]:
losses_generator_w, losses_discriminator_w = training_wasserstein(1000, 64)

In [None]:
losses_generator_cnn, losses_discriminator_cnn = training_wasserstein(3000, 64, discr_func=create_discriminator_cnn, gen_func=create_generator_cnn)

In [None]:
fig = plt.figure(figsize=(20,10))
ax = fig.subplots()
ax.plot(pd.Series(losses_generator_w).rolling(30).mean(), label="Generator")
ax.plot(pd.Series(losses_discriminator_w).rolling(30).mean(), label="Discriminator")
plt.legend()
fig.show()

In [None]:
# Exercise 4:
# * Implement the training of the GAN. What do you observe ?
def gradient_penalty(discriminator, x, x_gen):
        epsilon = tf.random.uniform([x.shape[0], 1, 1, 1], 0.0, 1.0)
        x_hat = epsilon * x + (1 - epsilon) * x_gen
        with tf.GradientTape() as t:
            t.watch(x_hat)
            d_hat = discriminator(x_hat)
        gradients = t.gradient(d_hat, x_hat)
        ddx = tf.sqrt(tf.reduce_sum(gradients ** 2, axis=[1, 2]))
        d_regularizer = tf.reduce_mean((ddx - 1.0) ** 2)
        return d_regularizer

def training_vanilla_GANs_regularized(
    iter=2,
    batch_size=128,
    discr_func=create_discriminator,
    gen_func=create_generator,
    seed_dim=seed_dim,
    cnn = True
):

    # Creating GAN
    generator= gen_func()
    discriminator= discr_func()
    gradient_penalty_weight = 10
    generator_opt = tf.keras.optimizers.Adam(learning_rate=1e-4)
    discriminator_opt = tf.keras.optimizers.Adam(learning_rate=1e-3)
    start_time = time.time()
    losses_generator = list()
    losses_discriminator = list()

    for my_iter in range(iter):

        #Loading the batch
        real_iter = my_iter % (len(x_train)//batch_size)
        real_data  = x_train[real_iter*batch_size:(real_iter+1)*batch_size]
        real_data = real_data.reshape(-1, 28, 28, 1)

        # Train discriminator
        noise = np.random.normal(0, 1, [batch_size, seed_dim])
        generated_images = generator(noise)
        with tf.GradientTape() as g:
          g.watch(discriminator.trainable_variables)
          
          pred_real = tf.math.sigmoid(discriminator(real_data))
          pred_fake = tf.math.sigmoid(discriminator(generated_images))
          
          loss_discriminator = (tf.reduce_mean(tf.math.log(pred_real)) + tf.reduce_mean(tf.math.log(1-pred_fake)))
          loss_discriminator -= gradient_penalty_weight*gradient_penalty(discriminator, real_data, generated_images)
          
          grads = g.gradient(-loss_discriminator, discriminator.trainable_variables)
          discriminator_opt.apply_gradients(
              zip(grads, discriminator.trainable_variables)
          )
        losses_discriminator.append(loss_discriminator.numpy())
        
        # Train generator
        noise = np.random.normal(0, 1, [batch_size, seed_dim])
        with tf.GradientTape() as g:
          g.watch(generator.trainable_variables)
          pred = tf.math.sigmoid(discriminator(generator(noise)))
          loss_generator = tf.reduce_mean(tf.math.log(pred))
          grads = g.gradient(-loss_generator, generator.trainable_variables)
          generator_opt.apply_gradients(
             zip(grads, generator.trainable_variables)
          )
        losses_generator.append(loss_generator.numpy())
        
        if my_iter % 100 == 0:
            clear_output()
            print(f"Iter {my_iter} ({real_iter})")
            if start_time is not None:
                time_spent = time.time() - start_time
                print(f"(Avg {time_spent/100} seconds per iteration)")
                print(tf.reduce_mean(pred_real), tf.reduce_mean(pred_fake))
            start_time = time.time()
            generate_and_plot(generator, real_data)
        
    return losses_generator, losses_discriminator

In [None]:
losses_generator_cnn, losses_discriminator_cnn = training_vanilla_GANs_regularized(3500, 64, discr_func=create_discriminator_cnn, gen_func=create_generator_cnn)

In [None]:
losses_generator_cnn, losses_discriminator_cnn = training(1500, 64, discr_func=create_discriminator_cnn, gen_func=create_generator_cnn)

In [None]:
fig = plt.figure(figsize=(20,10))
ax = fig.subplots()
ax.plot(pd.Series(losses_generator_w).rolling(30).mean(), label="Generator")
ax.plot(pd.Series(losses_discriminator_w).rolling(30).mean(), label="Discriminator")
plt.legend()
fig.show()