# Image-to-Image Translation with CycleGAN
Author: Jin Yeom (jinyeom@utexas.edu)

In [6]:
import os
import numpy as np
import tensorflow as tf

from tensorflow.initializers import truncated_normal
from tensorflow.layers import conv2d
from tensorflow.layers import conv2d_transpose
from tensorflow.nn import leaky_relu
from tensorflow.nn import moments
from tensorflow.nn import relu
from tensorflow.nn import tanh

import wget
import zipfile

ModuleNotFoundError: No module named 'tensorflow.initializers'

## Datasets

In [4]:
def download_dataset(name: str):
    if name not in ["ae_photos", "apple2orange", "summer2winter_yosemite", "horse2zebra", 
                    "monet2photo", "cezanne2photo", "ukiyoe2photo", "vangogh2photo", "maps", 
                    "cityscapes", "facades", "iphone2dslr_flower", "ae_photos"]:
        raise ValueError("invalid argument dataset name")
        
    if not os.path.exists("./datasets"):
        print("Datasets directory not found, creating a new directory 'datasets'...")
        os.mkdir("./datasets")
    zip_path = "./datasets/{}.zip".format(name)
    target_dir = "./datasets/{}/".format(name)
    
    url = "https://people.eecs.berkeley.edu/~taesung_park/CycleGAN/datasets/{}.zip".format(name)
    wget.download(url, out=zip_path)
    
    os.mkdir(target_dir)
    with zipfile.ZipFile(zip_path, "r") as zip_ref:
        zip_ref.extractall("datasets/")
    os.remove(zip_path)1

In [3]:
download_dataset("apple2orange")
download_dataset("horse2zebra")

Datasets directory not found, creating a new directory 'datasets'...


In [5]:
!ls ./datasets

[1m[36mapple2orange[m[m [1m[36mhorse2zebra[m[m


## Building Blocks

Here, we define some building blocks used for CycleGAN.

In [13]:
def tf_instance_norm(input_, epsilon=1e-5, name="instance_norm"):
    with tf.variable_scope(name):
        mean, variance = moments(input_, axes=(1, 2), keep_dims=True)
        normalized = (input_ - mean) * tf.rsqrt(variance - epsilon)
        
        depth = input_.get_shape()[3]
        scale = tf.get_variable("scale", [depth], dtype=tf.float32)
        shift = tf.get_variable("shift", [depth], dtype=tf.float32)
        
        return scale * normalized + shift

In [14]:
def tf_conv_block(input_, 
                  filters, 
                  kernel_size, 
                  strides, 
                  activation=None, 
                  normalize=True, 
                  name="conv_block"):
    """ 2D convolution layer with options for instance normalization 
    and activation functions. """
    with tf.variable_scope(name):
        c = conv2d(input_, 
                   filters, 
                   kernel_size, 
                   strides=strides, 
                   padding="valid",
                   kernel_initializer=truncated_normal(stddev=0.02))
        if normalize:
            c = tf_instance_norm(c)
        if activation is not None:
            c = activation(c)
        return c

In [15]:
def tf_deconv_block(input_, 
                    filters, 
                    kernel_size, 
                    strides, 
                    activation=None, 
                    normalize=True, 
                    name="deconv_block"):
    """ 2D transpose convolution layer with options for instance normalization 
    and activation functions. """
    with tf.variable_scope(name):
        d = conv2d_transpose(input_, 
                             filters, 
                             kernel_size, 
                             strides=strides, 
                             padding="valid",
                             kernel_initializer=truncated_normal(stddev=0.02))
        if normalize:
            d = tf_instance_norm(d)
        if activation is not None:
            d = activation(d)
        return d

In [16]:
def tf_residual_block(input_, filters, kernel_size, strides, name="residual_block"):
    with tf.variable_scope(name):
        p = (kernel_size - 1) // 2
        r_0 = tf.pad(input_, [[0, 0], [p, p], [p, p], [0, 0]], "reflect")
        r_0 = tf_conv_block(input_, filters, kernel_size, strides, activation=relu)
        r_1 = tf.pad(r_0, [[0, 0], [p, p], [p, p], [0, 0]], "reflect")
        r_1 = tf_conv_block(r_1, filters, kernel_size, strides)
        return input_ + r_1

## Generator network

In this section, we define the architecture of the generator network. Note that the generator network's architecture in CycleGAN is somewhat different from what we're familiar with. Rather than decoding a random Gaussian noise to an image, this generator adopts the mechanism of an image transformation network, which is often used in applications like style transfer and super-resolution. In the origianl paper, 6 residual blocks were used for 128 x 128 images, and 9 for 256 x 256 images. We're going to assume the latter for now, but this can change in the future, based on the desired resolution of the images.

In [17]:
def generator(input_, name="generator"):
    with tf.variable_scope(name):
        with tf.name_scope("encoder"):
            e_0 = tf.pad(input_, [[0, 0], [3, 3], [3, 3], [0, 0]], "reflect")
            e_0 = tf_conv_block(c_0, 32, 7, 1, activation=relu, name="e_0")
            e_1 = tf_conv_block(c_0, 64, 3, 2, activation=relu, name="e_1")
            e_2 = tf_conv_block(c_1, 128, 3, 2, activation=relu, name="e_2")
        with tf.name_scope("transformer"):
            r_0 = tf_residual_block(e_2, 128, 3, 1, name="r_0")
            r_1 = tf_residual_block(r_0, 128, 3, 1, name="r_1")
            r_2 = tf_residual_block(r_1, 128, 3, 1, name="r_2")
            r_3 = tf_residual_block(r_2, 128, 3, 1, name="r_3")
            r_4 = tf_residual_block(r_3, 128, 3, 1, name="r_4")
            r_5 = tf_residual_block(r_4, 128, 3, 1, name="r_5")
            r_6 = tf_residual_block(r_5, 128, 3, 1, name="r_6")
            r_7 = tf_residual_block(r_6, 128, 3, 1, name="r_7")
            r_8 = tf_residual_block(r_7, 128, 3, 1, name="r_8")
        with tf.name_scope("decoder"):
            d_0 = tf_deconv_block(r_8, 64, 3, 2, activation=relu, name="d_0")
            d_1 = tf_deconv_block(d_0, 32, 3, 2, activation=relu, name="d_1")
            d_1 = tf.pad(d_1, [[0, 0], [3, 3], [3, 3], [0, 0]], "reflect")
            pred = tf_conv_block(d_1, 3, 7, 1, activation=tanh, normalize=False, name="pred")
        return pred

## Descriminator network

This section describes the descriminator network.

In [18]:
def descriminator(input_, name):
    with tf.variable_scope(name):
        h_0 = tf_conv_block(input_, 64, 7, 2, activation=leaky_relu, normalize=False, name="h_0")
        h_1 = tf_conv_block(h_0, 128, 7, 2, activation=leaky_relu, name="h_1")
        h_2 = tf_conv_block(h_1, 256, 7, 2, activation=leaky_relu, name="h_2")
        h_3 = tf_conv_block(h_2, 512, 7, 2, activation=leaky_relu, name="h_3")
        pred = tf_conv_block(h_3, 1, 7, 1, 1, normalize=False, name="pred")
        return pred

## Reference
1. https://arxiv.org/pdf/1703.10593.pdf (Orignal CycleGAN paper)
2. https://arxiv.org/pdf/1603.08155v1.pdf (Perceptual losses and image transformation network)
3. https://arxiv.org/pdf/1607.08022.pdf (Instance normalization)
4. https://hardikbansal.github.io/CycleGANBlog/ (TensorFlow tutorial for CycleGAN)