## Acknowledgements

* Used [Monet CycleGAN Tutorial](https://www.kaggle.com/amyjang/monet-cyclegan-tutorial) kernel by [Amy Jang](https://www.kaggle.com/amyjang) to build the data pipeline. 
* The [official TensorFlow Cycle GAN tutorial](https://www.tensorflow.org/tutorials/generative/cyclegan) is used to build the baseline model based on Pix2Pix model.
* Used [CycleGAN to generate Monet-style images](https://www.kaggle.com/swepat/cyclegan-to-generate-monet-style-images) kernel by [Swetha](https://www.kaggle.com/swepat) to build the submission pipeline.


## Updates

* `v1`: Using generator and discriminator used in Pix2Pix model as the baseline. Using Random Jitter as the augmentation policy while training. 

# 🧰 Setups and Imports

* We will be using `tensorflow_examples` package to import generator and discriminator used in Pix2Pix. 
* [Weights and Biases](https://wandb.ai/site) is used for experiment tracking. 

In [None]:
!pip install -q git+https://github.com/tensorflow/examples.git
!pip install wandb -q

In [None]:
import tensorflow as tf

from tensorflow.keras.layers import * 
from tensorflow.keras.models import *
from tensorflow.keras.initializers import *
from tensorflow.keras.optimizers import *
from tensorflow_addons.layers import *

import tensorflow_datasets as tfds
from tensorflow_examples.models.pix2pix import pix2pix

import os
import random
import time
import PIL
from tqdm import tqdm
import numpy as np
import matplotlib.pyplot as plt
from IPython.display import clear_output

from kaggle_datasets import KaggleDatasets

In [None]:
import wandb
from kaggle_secrets import UserSecretsClient

user_secrets = UserSecretsClient()
wandb_api = user_secrets.get_secret("wandb_api")

wandb.login(key=wandb_api)

In [None]:
# Set the random seeds
os.environ['TF_CUDNN_DETERMINISTIC'] = '1' 
random.seed(hash("setting random seeds") % 2**32 - 1)
np.random.seed(hash("improves reproducibility") % 2**32 - 1)
tf.random.set_seed(hash("by removing stochasticity") % 2**32 - 1)

# 📀 Load Data and Prepare

We will have two separate `tf.data` dataset:
* One for Monet which will be our style images. 
* Other for the content images. 

The resulting `monet_ds` and `photo_ds` data loaders will be used to train the CycleGAN. We will train with some augmentation policy so that the model doesn't overfit on the training data since the same data is used for generating styled images. 

`photo_ds_eval` will be used to generate the styled images. Note that only pixel normalization is applied in the evaluation dataset.

In [None]:
GCS_PATH = KaggleDatasets().get_gcs_path()
print(GCS_PATH)

MONET_FILENAMES = tf.io.gfile.glob(str(GCS_PATH + '/monet_tfrec/*.tfrec'))
print('Monet TFRecord Files:', len(MONET_FILENAMES))

PHOTO_FILENAMES = tf.io.gfile.glob(str(GCS_PATH + '/photo_tfrec/*.tfrec'))
print('Photo TFRecord Files:', len(PHOTO_FILENAMES))

In [None]:
IMG_HEIGHT = 256
IMG_WIDTH = 256
BUFFER_SIZE = 1024
AUTOTUNE = tf.data.experimental.AUTOTUNE

# Function to read the TFRecord files.
def read_tfrecord(example):
    tfrecord_format = {
        "image_name": tf.io.FixedLenFeature([], tf.string),
        "image": tf.io.FixedLenFeature([], tf.string),
        "target": tf.io.FixedLenFeature([], tf.string)
    }
    example = tf.io.parse_single_example(example, tfrecord_format)
    # decode image
    image = tf.image.decode_jpeg(example['image'], channels=3)
    
    return image

# Crop the image randomly 
def random_crop(image):
  cropped_image = tf.image.random_crop(
      image, size=[IMG_HEIGHT, IMG_WIDTH, 3])

  return cropped_image

# Bring image pixels in the range of [-1, 1]
def normalize(image):
  image = tf.cast(image, tf.float32)
  image = (image / 127.5) - 1
  return image

# Apply random jitter augmentation
def random_jitter(image):
  # resizing to 286 x 286 x 3
  image = tf.image.resize(image, [286, 286],
                          method=tf.image.ResizeMethod.NEAREST_NEIGHBOR)

  # randomly cropping to 256 x 256 x 3
  image = random_crop(image)

  # random mirroring
  image = tf.image.random_flip_left_right(image)

  return image

# Preprocessing pipeline for training
def preprocess_image_train(image):
  image = random_jitter(image)
  image = normalize(image)
  return image

# Preprocessing pipeline for evaluation/submission
def preprocess_image_eval(image):
  image = normalize(image)
  return image

In [None]:
def load_dataset(filenames, labeled=True, ordered=False):
    dataset = tf.data.TFRecordDataset(filenames)
    dataset = dataset.map(read_tfrecord, num_parallel_calls=AUTOTUNE)
    return dataset

In [None]:
monet_ds = load_dataset(MONET_FILENAMES, labeled=True).map(
    preprocess_image_train, num_parallel_calls=AUTOTUNE).cache().shuffle(BUFFER_SIZE).batch(1)

photo_ds = load_dataset(PHOTO_FILENAMES, labeled=True).map(
    preprocess_image_train, num_parallel_calls=AUTOTUNE).cache().shuffle(BUFFER_SIZE).batch(1)

photo_ds_eval = load_dataset(PHOTO_FILENAMES, labeled=True).map(
    preprocess_image_eval, num_parallel_calls=AUTOTUNE).shuffle(BUFFER_SIZE).batch(1)

### Visualize a pair of style and content image. 

**Note**: There is no one-to-one mapping between the style and the content images. 

* First column - Style(Monet) Images
* Second column - Content(Photo) Images

In [None]:
def show_image_pairs(monet_images, photo_images):
    fig, axs = plt.subplots(nrows=5, ncols=2, figsize=(8,12))
    for i, (monet_image, photo_image) in enumerate(zip(monet_images, photo_images)):
        axs[i][0].imshow(np.squeeze(monet_image, 0) * 0.5 + 0.5); 
        axs[i][1].imshow(np.squeeze(photo_image, 0) * 0.5 + 0.5);
        
monet_images = []
photo_images = []

for i in range(5):
    monet_images.append(next(iter(monet_ds)))
    photo_images.append(next(iter(photo_ds)))

show_image_pairs(monet_images, photo_images)

# 💎 CycleGAN Model

The model architecture is similar to that of Pix2Pix. Some of the differences are:

* Instance normalization is used instead of batch normalization.
* UNET based generator is used.

There are 2 generators (G and F) and 2 discriminators (X and Y) being trained here.

* Generator G learns to transform image X to image Y. 
* Generator F learns to transform image Y to image X.
* Discriminator D_X learns to differentiate between image X and generated image X (F(Y)).
* Discriminator D_Y learns to differentiate between image Y and generated image Y (G(X)).

In [None]:
OUTPUT_CHANNELS = 3

tf.keras.backend.clear_session()

generator_g = pix2pix.unet_generator(OUTPUT_CHANNELS, norm_type='instancenorm')
generator_f = pix2pix.unet_generator(OUTPUT_CHANNELS, norm_type='instancenorm')

discriminator_x = pix2pix.discriminator(norm_type='instancenorm', target=False)
discriminator_y = pix2pix.discriminator(norm_type='instancenorm', target=False)

In [None]:
def define_discriminator(image_shape=(IMG_HEIGHT, IMG_HEIGHT, 3)):
	# weight initialization
	init = RandomNormal(stddev=0.02)
	# source image input
	in_image = Input(shape=image_shape)
	# C64
	d = Conv2D(64, (4,4), strides=(2,2), padding='same', kernel_initializer=init)(in_image)
	d = LeakyReLU(alpha=0.2)(d)
	# C128
	d = Conv2D(128, (4,4), strides=(2,2), padding='same', kernel_initializer=init)(d)
	d = InstanceNormalization(axis=-1)(d)
	d = LeakyReLU(alpha=0.2)(d)
	# C256
	d = Conv2D(256, (4,4), strides=(2,2), padding='same', kernel_initializer=init)(d)
	d = InstanceNormalization(axis=-1)(d)
	d = LeakyReLU(alpha=0.2)(d)
	# C512
	d = Conv2D(512, (4,4), strides=(2,2), padding='same', kernel_initializer=init)(d)
	d = InstanceNormalization(axis=-1)(d)
	d = LeakyReLU(alpha=0.2)(d)
	# second last output layer
	d = Conv2D(512, (4,4), padding='same', kernel_initializer=init)(d)
	d = InstanceNormalization(axis=-1)(d)
	d = LeakyReLU(alpha=0.2)(d)
	# patch output
	patch_out = Conv2D(1, (4,4), padding='same', kernel_initializer=init)(d)
	# define model
	model = Model(in_image, patch_out)
	# compile model
# 	model.compile(loss='mse', optimizer=Adam(lr=0.0002, beta_1=0.5), loss_weights=[0.5])
	return model

In [None]:
# generator a resnet block
def resnet_block(n_filters, input_layer):
	# weight initialization
	init = RandomNormal(stddev=0.02)
	# first layer convolutional layer
	g = Conv2D(n_filters, (3,3), padding='same', kernel_initializer=init)(input_layer)
	g = InstanceNormalization(axis=-1)(g)
	g = Activation('relu')(g)
	# second convolutional layer
	g = Conv2D(n_filters, (3,3), padding='same', kernel_initializer=init)(g)
	g = InstanceNormalization(axis=-1)(g)
	# concatenate merge channel-wise with input layer
	g = Concatenate()([g, input_layer])
	return g

In [None]:
# define the standalone generator model
def define_generator(image_shape=(IMG_HEIGHT,IMG_WIDTH,3), n_resnet=9):
	# weight initialization
	init = RandomNormal(stddev=0.02)
	# image input
	in_image = Input(shape=image_shape)
	# c7s1-64
	g = Conv2D(64, (7,7), padding='same', kernel_initializer=init)(in_image)
	g = InstanceNormalization(axis=-1)(g)
	g = Activation('relu')(g)
	# d128
	g = Conv2D(128, (3,3), strides=(2,2), padding='same', kernel_initializer=init)(g)
	g = InstanceNormalization(axis=-1)(g)
	g = Activation('relu')(g)
	# d256
	g = Conv2D(256, (3,3), strides=(2,2), padding='same', kernel_initializer=init)(g)
	g = InstanceNormalization(axis=-1)(g)
	g = Activation('relu')(g)
	# R256
	for _ in range(n_resnet):
		g = resnet_block(256, g)
	# u128
	g = Conv2DTranspose(128, (3,3), strides=(2,2), padding='same', kernel_initializer=init)(g)
	g = InstanceNormalization(axis=-1)(g)
	g = Activation('relu')(g)
	# u64
	g = Conv2DTranspose(64, (3,3), strides=(2,2), padding='same', kernel_initializer=init)(g)
	g = InstanceNormalization(axis=-1)(g)
	g = Activation('relu')(g)
	# c7s1-3
	g = Conv2D(3, (7,7), padding='same', kernel_initializer=init)(g)
	g = InstanceNormalization(axis=-1)(g)
	out_image = Activation('tanh')(g)
	# define model
	model = Model(in_image, out_image)
	return model

In [None]:
tf.keras.backend.clear_session()

generator_g = define_generator()
generator_f = define_generator()

discriminator_x = define_discriminator()
discriminator_y = define_discriminator()

In [None]:
generator_g.summary()

In [None]:
discriminator_x.summary()

# 🌸 Loss

In CycleGAN, there is no paired data to train on, hence there is no guarantee that the input x and the target y pair are meaningful during training. Thus in order to enforce that the network learns the correct mapping, the authors propose the cycle consistency loss.

In [None]:
LAMBDA = 10

loss_obj = tf.keras.losses.BinaryCrossentropy(from_logits=True)

In [None]:
def discriminator_loss(real, generated):
  real_loss = loss_obj(tf.ones_like(real), real)

  generated_loss = loss_obj(tf.zeros_like(generated), generated)

  total_disc_loss = real_loss + generated_loss

  return total_disc_loss * 0.5

In [None]:
def generator_loss(generated):
    return loss_obj(tf.ones_like(generated), generated)

Cycle consistency loss is used. It ensures that the the styled image/result is close to the original input. 

This is an inportant objective function since there ain't any one-to-one mapping between the style and the content images.



In [None]:
def calc_cycle_loss(real_image, cycled_image):
    loss1 = tf.reduce_mean(tf.abs(real_image - cycled_image))

    return LAMBDA * loss1

Itenstity loss ensures that if you fed image Y to generator G, it should yield the real image Y or something similar. For example, if we run the photo(X)-to-monet(Y) model on a monet(Y) then the generated image should be similar to Y.

In [None]:
def identity_loss(real_image, same_image):
    loss = tf.reduce_mean(tf.abs(real_image - same_image))
    return LAMBDA * 0.5 * loss

# 🍄 Optimizer

Initialize the optimizers for all the generators and the discriminators.

In [None]:
generator_g_optimizer = tf.keras.optimizers.Adam(2e-4, beta_1=0.5)
generator_f_optimizer = tf.keras.optimizers.Adam(2e-4, beta_1=0.5)

discriminator_x_optimizer = tf.keras.optimizers.Adam(2e-4, beta_1=0.5)
discriminator_y_optimizer = tf.keras.optimizers.Adam(2e-4, beta_1=0.5)

# 🌀 Train Loop

The training step consists of four basic steps:

* Get the predictions.
* Calculate the loss.
* Calculate the gradients using backpropagation.
* Apply the gradients to the optimizer.


In [None]:
@tf.function
def train_step(real_x, real_y):
  # persistent is set to True because the tape is used more than
  # once to calculate the gradients.
  with tf.GradientTape(persistent=True) as tape:
    # Generator G translates X -> Y
    # Generator F translates Y -> X.
    
    fake_y = generator_g(real_x, training=True)
    cycled_x = generator_f(fake_y, training=True)

    fake_x = generator_f(real_y, training=True)
    cycled_y = generator_g(fake_x, training=True)

    # same_x and same_y are used for identity loss.
    same_x = generator_f(real_x, training=True)
    same_y = generator_g(real_y, training=True)

    disc_real_x = discriminator_x(real_x, training=True)
    disc_real_y = discriminator_y(real_y, training=True)

    disc_fake_x = discriminator_x(fake_x, training=True)
    disc_fake_y = discriminator_y(fake_y, training=True)

    # calculate the loss
    gen_g_loss = generator_loss(disc_fake_y)
    gen_f_loss = generator_loss(disc_fake_x)
    
    total_cycle_loss = calc_cycle_loss(real_x, cycled_x) + calc_cycle_loss(real_y, cycled_y)
    
    # Total generator loss = adversarial loss + cycle loss
    total_gen_g_loss = gen_g_loss + total_cycle_loss + identity_loss(real_y, same_y)
    total_gen_f_loss = gen_f_loss + total_cycle_loss + identity_loss(real_x, same_x)

    disc_x_loss = discriminator_loss(disc_real_x, disc_fake_x)
    disc_y_loss = discriminator_loss(disc_real_y, disc_fake_y)
    
  
  # Calculate the gradients for generator and discriminator
  generator_g_gradients = tape.gradient(total_gen_g_loss, 
                                        generator_g.trainable_variables)
  generator_f_gradients = tape.gradient(total_gen_f_loss, 
                                        generator_f.trainable_variables)
  
  discriminator_x_gradients = tape.gradient(disc_x_loss, 
                                            discriminator_x.trainable_variables)
  discriminator_y_gradients = tape.gradient(disc_y_loss, 
                                            discriminator_y.trainable_variables)
  
  # Apply the gradients to the optimizer
  generator_g_optimizer.apply_gradients(zip(generator_g_gradients, 
                                            generator_g.trainable_variables))

  generator_f_optimizer.apply_gradients(zip(generator_f_gradients, 
                                            generator_f.trainable_variables))
  
  discriminator_x_optimizer.apply_gradients(zip(discriminator_x_gradients,
                                                discriminator_x.trainable_variables))
  
  discriminator_y_optimizer.apply_gradients(zip(discriminator_y_gradients,
                                                discriminator_y.trainable_variables))

  return total_gen_g_loss, total_gen_f_loss, disc_x_loss, disc_y_loss

# 📲 Callbacks

We have two callbacks:

* `log_generated_images` will log the styled images to W&B dashboard. You can click on the link generated below while training and visualize how the model learns to create better styled images.
* `save_model` will save the trained weights of the generator and discriminator at the given epoch. These weights will later be logged as W&B artifacts to be used in another kernel to generate submissions.

In [None]:
# samples of content images.
photo_images = []
for i in range(5):
    photo_images.append(next(iter(photo_ds_eval)))

    
# Log the styled images to W&B dashboard.
def log_generated_images(model, test_inputs, epoch):
    predictions = []
    for test_input in test_inputs:
        prediction = model(test_input)
        predictions.append(prediction)
    
    if epoch==0:
        wandb.log({'Test Content Images': [wandb.Image(test_input[0]*0.5+0.5) for test_input in test_inputs]})
        
    wandb.log({'Styled Images': [wandb.Image(pred[0]*0.5+0.5) for pred in predictions]})
    
    
# Save the trained weights of the generator at the given epoch.
def save_model_weights(gen_g, gen_f, epoch):
    gen_g.save(SAVE_PATH + f'generator_g_checkpoint_{epoch}')
    gen_f.save(SAVE_PATH + f'generator_f_checkpoint_{epoch}')

# 🚅 Train with W&B

In [None]:
# Number of epochs to train
EPOCHS = 30

# create dir to save trained weights
SAVE_PATH = '../models/'
os.makedirs(SAVE_PATH, exist_ok=True)

In [None]:
run = wandb.init(project='Kaggle CycleGAN')

for epoch in range(EPOCHS):
    total_gen_g_loss = []
    total_gen_f_loss = []
    total_disc_x_loss = []
    total_disc_y_loss = []
    
    start = time.time()
    
    n = 0
    for image_x, image_y in tf.data.Dataset.zip((photo_ds, monet_ds)):
        gen_g_loss, gen_f_loss, disc_x_loss, disc_y_loss = train_step(image_x, image_y)
        total_gen_g_loss.append(gen_g_loss)
        total_gen_f_loss.append(gen_f_loss)
        total_disc_x_loss.append(disc_x_loss)
        total_disc_y_loss.append(disc_y_loss)
        
        if n % 10 == 0:
            print ('.', end='')
        n+=1
    
    # Log the styled images to W&B
    log_generated_images(generator_g, photo_images, epoch)
    
    # Save the trained weights after every 5 epochs.
    if (epoch+1) % 10 == 0:
        save_model_weights(generator_g,
                           generator_f,
                           epoch+1)

    print(f'\nTime taken for epoch {epoch + 1} is {time.time()-start} sec\n total gen g loss: {np.mean(total_gen_g_loss)},\
    total gen f loss: {np.mean(total_gen_f_loss)}, total disc x loss: {np.mean(total_disc_x_loss)},\
    total disc y loss: {np.mean(total_disc_y_loss)}')

    wandb.log({'total_gen_g_loss': np.mean(total_gen_g_loss), 
               'total_gen_f_loss': np.mean(total_gen_f_loss),
               'total_disc_x _loss': np.mean(total_disc_x_loss),
               'total_disc_y_loss': np.mean(total_disc_y_loss)})
    
run.join()

# 💾 Save the Weights as W&B Artifacts

In [None]:
run = wandb.init(project='Kaggle CycleGAN', job_type='producer')

artifact = wandb.Artifact('cyclegan_models', type='model')

artifact.add_dir(SAVE_PATH)

run.log_artifact(artifact)
run.join()

# 🎈 Generate Submission

In [None]:
def predict_and_save(input_ds, generator_model, output_path):
    i = 1
    for img in tqdm(input_ds):
        prediction = generator_model(img, training=False)[0].numpy() # make predition
        prediction = (prediction * 127.5 + 127.5).astype(np.uint8)   # re-scale
        im = PIL.Image.fromarray(prediction)
        im.save(f'{output_path}{str(i)}.jpg')
        i += 1

In [None]:
os.makedirs('../images/') # Create folder to save generated images

predict_and_save(photo_ds_eval, generator_g, '../images/')

In [None]:
import shutil
shutil.make_archive('/kaggle/working/images/', 'zip', '../images')

print(f"Number of generated samples: {len([name for name in os.listdir('../images/') if os.path.isfile(os.path.join('../images/', name))])}")