#Conditional GAN

Despite GANs help in generating new random plausible examples for a given dataset, there is no way to control the types of images that are generated other than trying to figure out the complex relationship between the latent space input to the generator and the generated images. cGAN helps and involved in conditional generation of images by a generator model.


1. The limitations of generating random samples with a GAN that can be overcome with a conditional generative adversarial network.
2. How to develop and evaluate an unconditional generative adversarial network for generating fashion MNIST images.
3. How to develop and evaluate a conditional generative adversarial network for generating fashion MNIST images.

Motivations for making use of the class label information in a GAN model are given below which are salient in the development framework of CGANs.
1. Improve the GAN.
2. Improve targeted Image Generation.
3. Faster training of GAN.

## Part 1 - Defining the Conditional GAN

In [1]:
### Import necessary packages ###
import numpy as np
from numpy.random import randn
from numpy.random import randint
from keras.datasets.fashion_mnist import load_data
from keras.optimizers import Adam
from keras.models import Model
from keras.layers import Input
from keras.layers import Dense
from keras.layers import Reshape
from keras.layers import Flatten
from keras.layers import Conv2D
from keras.layers import Conv2DTranspose
from keras.layers import LeakyReLU
from keras.layers import Dropout
from keras.layers import Embedding
from keras.layers import Concatenate


For conditional GANs, the discriminator is fed with input and labels, so a sequential API will be not suitable in this case. So to aid, and for the ease of feeding multiple inputs to the network we use functional API of Keras package.

In [2]:
# define the standalone discriminator model
def define_discriminator(in_shape=(28,28,1), n_classes=10):
  # label input
  in_label = Input(shape=(1,))
  # embedding for categorical input
  li = Embedding(n_classes, 50)(in_label)
  # scale up to image dimensions with linear activation
  n_nodes = in_shape[0] * in_shape[1]
  li = Dense(n_nodes)(li)
  # reshape to additional channel
  li = Reshape((in_shape[0], in_shape[1], 1))(li)
  # image input
  in_image = Input(shape=in_shape)
  # concat label as a channel
  merge = Concatenate()([in_image, li])
  # downsample
  feature = Conv2D(128, (3,3), strides=(2,2), padding="same")(merge)
  feature = LeakyReLU(alpha=0.2)(feature)
  # downsample
  feature = Conv2D(128, (3,3), strides=(2,2), padding="same")(feature)
  feature = LeakyReLU(alpha=0.2)(feature)
  # flatten feature maps
  feature = Flatten()(feature)
  # dropout
  feature = Dropout(0.4)(feature)
  # output
  out_layer = Dense(1, activation="sigmoid")(feature)
  # define model
  model = Model([in_image, in_label], out_layer)
  # compile model
  opt = Adam(learning_rate=0.0002, beta_1=0.5)
  model.compile(loss="binary_crossentropy", optimizer=opt, metrics=["accuracy"])
  return model

disc_model = define_discriminator()
disc_model.summary()

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_1 (InputLayer)           [(None, 1)]          0           []                               
                                                                                                  
 embedding (Embedding)          (None, 1, 50)        500         ['input_1[0][0]']                
                                                                                                  
 dense (Dense)                  (None, 1, 784)       39984       ['embedding[0][0]']              
                                                                                                  
 input_2 (InputLayer)           [(None, 28, 28, 1)]  0           []                               
                                                                                              

In [3]:
################ Define generator #####################

# define the standalone generator model
def define_generator(latent_dim, n_classes=10):
  # label input
  in_label = Input(shape=(1,))
  # embedding for categorical input
  li = Embedding(n_classes, 50)(in_label)
  # linear multiplication
  n_nodes = 7 * 7
  li = Dense(n_nodes)(li)
  # reshape to additional channel
  li = Reshape((7, 7, 1))(li)
  # image generator input
  in_lat = Input(shape=(latent_dim,))
  # foundation for 7x7 image
  n_nodes = 128 * 7 * 7
  gen = Dense(n_nodes)(in_lat)
  gen = LeakyReLU(alpha=0.2)(gen)
  gen = Reshape((7, 7, 128))(gen)
  # merge image gen and label input
  merge = Concatenate()([gen, li])
  # upsample to 14x14
  gen = Conv2DTranspose(128, (4,4), strides=(2,2), padding="same")(merge)
  gen = LeakyReLU(alpha=0.2)(gen)
  # upsample to 28x28
  gen = Conv2DTranspose(128, (4,4), strides=(2,2), padding="same")(gen)
  gen = LeakyReLU(alpha=0.2)(gen)
  # output
  out_layer = Conv2D(1, (7,7), activation="tanh", padding="same")(gen)
  # define model
  model = Model([in_lat, in_label], out_layer)
  return model

gen_model = define_generator(latent_dim=100, n_classes=10)
gen_model.summary()

Model: "model_1"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_4 (InputLayer)           [(None, 100)]        0           []                               
                                                                                                  
 input_3 (InputLayer)           [(None, 1)]          0           []                               
                                                                                                  
 dense_3 (Dense)                (None, 6272)         633472      ['input_4[0][0]']                
                                                                                                  
 embedding_1 (Embedding)        (None, 1, 50)        500         ['input_3[0][0]']                
                                                                                            

In [4]:
# define the combined generator and discriminator model, for updating the generator
def define_gan(g_model, d_model):
  # make weights in the discriminator not trainable
  d_model.trainable = False
  # get noise and label inputs from generator model
  gen_noise, gen_label = g_model.input
  # get image output from the generator model
  gen_output = g_model.output
  # connect image output and label input from generator as inputs to discriminator
  gan_output = d_model([gen_output, gen_label])
  # define gan model as taking noise and label and outputting a classification
  model = Model([gen_noise, gen_label], gan_output)
  # compile model
  opt = Adam(learning_rate=0.0002, beta_1=0.5)
  model.compile(loss="binary_crossentropy", optimizer=opt)
  return model

#
gan_model = define_gan(gen_model, disc_model)
gan_model.summary()

Model: "model_2"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_4 (InputLayer)           [(None, 100)]        0           []                               
                                                                                                  
 input_3 (InputLayer)           [(None, 1)]          0           []                               
                                                                                                  
 dense_3 (Dense)                (None, 6272)         633472      ['input_4[0][0]']                
                                                                                                  
 embedding_1 (Embedding)        (None, 1, 50)        500         ['input_3[0][0]']                
                                                                                            

### 1.1 - Data Preparation

In [5]:
# load fashion mnist images
def load_real_samples():
  # load dataset
  (trainX, trainy), (_, _) = load_data()
  # expand to 3d, e.g. add channels
  X = np.expand_dims(trainX, axis=-1)
  # convert from ints to floats
  X = X.astype("float32")
  # scale from [0,255] to [-1,1]
  X = (X - 127.5) / 127.5
  return [X, trainy]

In [6]:
# # select real samples
def generate_real_samples(dataset, n_samples):
  # split into images and labels
  images, labels = dataset
  # choose random instances
  ix = randint(0, images.shape[0], n_samples)
  # select images and labels
  X, labels = images[ix], labels[ix]
  # generate class labels
  y = np.ones((n_samples, 1))
  return [X, labels], y

In [22]:
# generate points in latent space as input for the generator
def generate_latent_points(latent_dim, n_samples, n_classes=10):
  # generate points in the latent space
  x_input = randn(latent_dim * n_samples)
  # reshape into a batch of inputs for the network
  z_input = x_input.reshape(n_samples, latent_dim)
  # generate labels
  labels = randint(0, n_classes, n_samples)
  return [z_input, labels]

# use the generator to generate n fake examples, with class labels
def generate_fake_samples(generator, latent_dim, n_samples):
  # generate points in latent space
  z_input, labels_input = generate_latent_points(latent_dim, n_samples)
  # predict outputs
  images = generator.predict([z_input, labels_input], verbose='0')
  # create class labels
  y = np.zeros((n_samples, 1))
  return [images, labels_input], y

### 1.2 - Defining the Training Loop

In [30]:
from timeit import default_timer

# train the generator and discriminator
def train(g_model, d_model, gan_model, dataset, latent_dim, n_epochs=100, n_batch=128, print_freq=64):
  bat_per_epo = int(dataset[0].shape[0] / n_batch)
  half_batch = int(n_batch / 2)
  train_start_t = default_timer()
  # manually enumerate epochs
  for i in range(n_epochs):
    # enumerate batches over the training set
    epoch_start_t = default_timer()
    for j in range(bat_per_epo):
      # get randomly selected real samples
      [X_real, labels_real], y_real = generate_real_samples(dataset, half_batch)
      # update discriminator model weights
      d_loss1, _ = d_model.train_on_batch([X_real, labels_real], y_real)
      # generate fake examples
      [X_fake, labels], y_fake = generate_fake_samples(g_model, latent_dim, half_batch)
      # update discriminator model weights
      d_loss2, _ = d_model.train_on_batch([X_fake, labels], y_fake)
      # prepare points in latent space as input for the generator
      [z_input, labels_input] = generate_latent_points(latent_dim, n_batch)
      # create inverted labels for the fake samples
      y_gan = np.ones((n_batch, 1))
      # update the generator via the discriminator's error
      g_loss = gan_model.train_on_batch([z_input, labels_input], y_gan)
      # summarize loss on this batch
      curr_t = default_timer()
      if (j + 1) % print_freq == 0:
        print("Epoch >%d, Batch %d/%d, d1=%.3f, d2=%.3f g=%.3f, time=%.2f s" %
              (i+1, j+1, bat_per_epo, d_loss1, d_loss2, g_loss, curr_t - train_start_t))
    print(f"Epoch time: {round(curr_t - epoch_start_t, 2)} s; Total time: {round(curr_t - train_start_t, 2)} s")
  # save the generator model
  g_model.save("cgan_generator.h5")

### 1.3 - Training the GAN

Estimated runtime: n_epochs * (1 minute / epoch)

We will demonstrate n_epochs = 10 in class. 

This gives us some results but they're not amazing. 

You are invited to experiment with larger n_epochs at home

In [24]:
# size of the latent space
latent_dim = 100
n_epochs = 10
# create the discriminator
d_model = define_discriminator()
# create the generator
g_model = define_generator(latent_dim)
# create the Conditional gan for Clothing Generation
gan_model = define_gan(g_model, d_model)
# load image data
dataset = load_real_samples()

In [31]:
%%time
# train model
train(g_model, d_model, gan_model, dataset, latent_dim, n_epochs=n_epochs)

Epoch >1, Batch 64/468, d1=0.648, d2=0.614 g=0.834, time=7.02 s
Epoch >1, Batch 128/468, d1=0.602, d2=0.638 g=0.789, time=13.79 s
Epoch >1, Batch 192/468, d1=0.683, d2=0.690 g=0.746, time=20.53 s
Epoch >1, Batch 256/468, d1=0.660, d2=0.595 g=0.870, time=27.51 s
Epoch >1, Batch 320/468, d1=0.695, d2=0.610 g=0.839, time=34.41 s
Epoch >1, Batch 384/468, d1=0.639, d2=0.724 g=0.710, time=41.46 s
Epoch >1, Batch 448/468, d1=0.659, d2=0.643 g=0.813, time=50.90 s
Epoch time: 53.61 s; Total time: 53.61 s
Epoch >2, Batch 64/468, d1=0.626, d2=0.721 g=0.720, time=61.77 s
Epoch >2, Batch 128/468, d1=0.571, d2=0.652 g=0.814, time=69.33 s
Epoch >2, Batch 192/468, d1=0.559, d2=0.617 g=0.898, time=76.18 s
Epoch >2, Batch 256/468, d1=0.723, d2=0.744 g=0.754, time=84.67 s
Epoch >2, Batch 320/468, d1=0.718, d2=0.749 g=0.781, time=92.57 s
Epoch >2, Batch 384/468, d1=0.641, d2=0.621 g=0.845, time=101.10 s
Epoch >2, Batch 448/468, d1=0.703, d2=0.617 g=0.820, time=110.13 s
Epoch time: 59.29 s; Total time: 112



Epoch time: 48.54 s; Total time: 508.54 s
CPU times: user 6min, sys: 21 s, total: 6min 21s
Wall time: 8min 28s


### 1.4 - Evaluating our GAN

In [None]:
import numpy as np
from numpy.random import randn
from numpy.random import randint
from keras.models import load_model
import matplotlib.pyplot as plt

# generate points in latent space as input for the generator
def generate_latent_points(latent_dim, n_samples, n_classes=10):
  # generate points in the latent space
  x_input = randn(latent_dim * n_samples)
  # reshape into a batch of inputs for the network
  z_input = x_input.reshape(n_samples, latent_dim)
  # generate labels
  labels = randint(0, n_classes, n_samples)
  return [z_input, labels]

# create and save a plot of generated images
def save_plot(examples, n):
  # plot images
  for i in range(n * n):
    # define subplot
    plt.subplot(n, n, 1 + i)
    # turn off axis
    plt.axis("off")
    # plot raw pixel data
    plt.imshow(examples[i, :, :, 0], cmap="gray_r")
  #plt.show()
  plt.savefig("cgan_generator_100.png", dpi=300)

# load model
model = load_model("cgan_generator.h5")
# generate images
latent_points, labels = generate_latent_points(100, 100)
# specify labels
labels = np.asarray([x for _ in range(10) for x in range(10)])
# generate images
X = model.predict([latent_points, labels])
# scale from [-1,1] to [0,1]
X = (X + 1) / 2.0
# plot the result
save_plot(X, 10)