# GAN implementation by tensorflow

In [1]:
import tensorflow as tf
import os
from PIL import Image
import numpy as np
import matplotlib.pyplot as plt

from model import *
from utility import *

model_name = "DTD5"

# モデル

In [2]:
class Generator:
    def __init__(self, batch_size):
        self.reuse = False
        self.z_dim = 57
        self.s_size = 8
        self.depths = [1024, 512, 256, 128, 3]

        self.num_res_blocks = 16
        self.num_pixel_CNN_blocks = 3

        self.batch_size = batch_size
        
    def __call__(self, z, training=False):
        with tf.variable_scope('g', reuse=self.reuse):
            # reshape from inputs
            with tf.variable_scope('fc0'):
                z0 = tf.reshape(z, [-1, self.z_dim])

                fc0 = tf.layers.dense(z0, 64*16*16, kernel_initializer=tf.truncated_normal_initializer(0.0, 0.02))            
                fc0 = tf.layers.batch_normalization(fc0, training=training)
                fc0 = tf.nn.relu(fc0)
                fc0 = tf.reshape(fc0, [-1,16,16,64])

            assert fc0.get_shape().as_list()[1:] == [16,16,64]
            
            layers = []
            layers.append(fc0)
            
            for i in range(int(self.num_res_blocks)):
                with tf.variable_scope('res_%d' % (i+1)):
                    res = tf.layers.conv2d(layers[-1], 64, [3,3], [1,1], padding="SAME", kernel_initializer=tf.truncated_normal_initializer(0.0, 0.02)) # 8x8
                    res = tf.layers.batch_normalization(res, training=training)
                    res = tf.nn.relu(res)

                    res = tf.layers.conv2d(res, 64, [3,3], [1,1], padding="SAME", kernel_initializer=tf.truncated_normal_initializer(0.0, 0.02))
                    res = tf.layers.batch_normalization(res, training=training)
                    res = layers[-1] + res
                    layers.append(res)
            
            assert layers[-1].get_shape().as_list()[1:] == [16,16,64]
            
            with tf.variable_scope('conv17'):
                conv17 = tf.layers.conv2d(layers[-1], 64, [3,3], [1,1], padding="SAME", kernel_initializer=tf.truncated_normal_initializer(0.0, 0.02)) # 8x8
                conv17 = tf.layers.batch_normalization(conv17, training=training)
                conv17 = tf.nn.relu(conv17)
                conv17 = layers[0] + conv17
                layers.append(conv17)

            assert layers[-1].get_shape().as_list()[1:] == [16, 16, 64]

            def pixel_shuffle_layer(x, r, n_split):
                def PS(x, r):
                    bs, a, b, c = x.get_shape().as_list()
                    bs = tf.shape(x)[0]
                    x = tf.reshape(x, (bs, a, b, r, r))
                    x = tf.transpose(x, (0, 1, 2, 4, 3))
                    x = tf.split(x, a, 1)
                    x = tf.concat([tf.squeeze(x_) for x_ in x], 2)
                    x = tf.split(x, b, 1)
                    x = tf.concat([tf.squeeze(x_) for x_ in x], 2)
                    return tf.reshape(x, (bs, a*r, b*r, 1))

                xc = tf.split(x, n_split, 3)
                return tf.concat([PS(x_, r) for x_ in xc], 3)

            for i in range(int(self.num_pixel_CNN_blocks)):
                with tf.variable_scope('pixel_CNN_%d' % (i+1)):
                    ps = tf.layers.conv2d(layers[-1], 256, [3,3], [1,1], padding="SAME", kernel_initializer=tf.truncated_normal_initializer(0.0, 0.02)) # 8x8
                    #print(ps.get_shape())
                    ps = pixel_shuffle_layer(ps, 2, 64)
                    ps = tf.layers.batch_normalization(ps, training=training)
                    ps = tf.nn.relu(ps)
                    layers.append(ps)
                    #print(ps.get_shape())

            assert layers[-1].get_shape().as_list()[1:] == [128, 128, 64]
                    
            with tf.variable_scope('output'):
                output = tf.layers.conv2d(layers[-1], 3, [9,9], [1,1], padding="SAME", kernel_initializer=tf.truncated_normal_initializer(0.0, 0.02)) # 8x8
                output = tf.nn.tanh(output)
                #output = tf.nn.sigmoid(output)

            assert output.get_shape().as_list()[1:] == [128, 128, 3]

        self.reuse = True
        self.variables = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='g')
        return output

In [3]:
class Discriminator:
    def __init__(self):
        self.reuse = False
        self.X_dim = 128*128*3
        self.s_size = 8
        self.depths = [3, 32, 128, 256, 512]

    def __call__(self, x, training=False, name=''):
        def leaky_relu(x, leak=0.2, name='outputs'):
            return tf.maximum(x, x * leak, name=name)

        with tf.name_scope('d' + name), tf.variable_scope('d', reuse=self.reuse):
            # convolution x 4
            x = tf.reshape(x, [-1, 128, 128, 3])
            with tf.variable_scope('conv1'):
                conv1 = tf.layers.conv2d(x, 32, [4, 4], [2 ,2], padding="SAME", kernel_initializer=tf.truncated_normal_initializer(0.0, 0.02))
                conv1 = leaky_relu(conv1)

            with tf.variable_scope('res1'):
                res1 = tf.layers.conv2d(conv1, 32, [3,3], [1,1], padding="SAME", kernel_initializer=tf.truncated_normal_initializer(0.0, 0.02))
                res1 = leaky_relu(res1)
                res1 = tf.layers.conv2d(res1, 32, [3,3], [1,1], padding="SAME", kernel_initializer=tf.truncated_normal_initializer(0.0, 0.02))
                res1 = res1 + conv1
                res1 = leaky_relu(res1)

            with tf.variable_scope('res2'):
                res2 = tf.layers.conv2d(res1, 32, [3,3], [1,1], padding="SAME", kernel_initializer=tf.truncated_normal_initializer(0.0, 0.02))
                res2 = leaky_relu(res2)
                res2 = tf.layers.conv2d(res2, 32, [3,3], [1,1], padding="SAME", kernel_initializer=tf.truncated_normal_initializer(0.0, 0.02))
                res2 = res2 + res1
                res2 = leaky_relu(res2)

            with tf.variable_scope('conv2'):
                conv2 = tf.layers.conv2d(res2, 64, [4, 4], [2 ,2], padding="SAME", kernel_initializer=tf.truncated_normal_initializer(0.0, 0.02))
                conv2 = leaky_relu(conv2)

            with tf.variable_scope('res3'):
                res3 = tf.layers.conv2d(conv2, 64, [3,3], [1,1], padding="SAME", kernel_initializer=tf.truncated_normal_initializer(0.0, 0.02))
                res3 = leaky_relu(res3)
                res3 = tf.layers.conv2d(res3, 64, [3,3], [1,1], padding="SAME", kernel_initializer=tf.truncated_normal_initializer(0.0, 0.02))
                res3 = leaky_relu(res3)
                res3 = res3 + conv2
                res3 = leaky_relu(res3)

            with tf.variable_scope('res4'):
                res4 = tf.layers.conv2d(res3, 64, [3,3], [1,1], padding="SAME", kernel_initializer=tf.truncated_normal_initializer(0.0, 0.02))
                res4 = leaky_relu(res4)
                res4 = tf.layers.conv2d(res4, 64, [3,3], [1,1], padding="SAME", kernel_initializer=tf.truncated_normal_initializer(0.0, 0.02))
                res4 = leaky_relu(res4)
                res4 = res4 + res3
                res4 = leaky_relu(res4)

            with tf.variable_scope('conv3'):
                conv3 = tf.layers.conv2d(res4, 128, [4, 4], [2 ,2], padding="SAME", kernel_initializer=tf.truncated_normal_initializer(0.0, 0.02))
                conv3 = leaky_relu(conv3)

            num_res_itr = 3
            layers = []
            layers.append(conv3)
            
            depth = [128, 256, 512, 1024]
            for i in range(int(num_res_itr)):
                with tf.variable_scope('res_%d_1' % (i+1+4)):
                    res = tf.layers.conv2d(layers[-1], depth[i], [3,3], [1,1], padding="SAME", kernel_initializer=tf.truncated_normal_initializer(0.0, 0.02))
                    res = leaky_relu(res)
                    res = tf.layers.conv2d(res, depth[i], [3,3], [1,1], padding="SAME", kernel_initializer=tf.truncated_normal_initializer(0.0, 0.02))
                    res = leaky_relu(res)
                    res = layers[-1] + res
                    res = leaky_relu(res)
                layers.append(res)

                with tf.variable_scope('res_%d_2' % (i+1+4)):
                    res = tf.layers.conv2d(layers[-1], depth[i], [3,3], [1,1], padding="SAME", kernel_initializer=tf.truncated_normal_initializer(0.0, 0.02))
                    res = leaky_relu(res)
                    res = tf.layers.conv2d(res, depth[i], [3,3], [1,1], padding="SAME", kernel_initializer=tf.truncated_normal_initializer(0.0, 0.02))
                    res = leaky_relu(res)
                    res = layers[-1] + res
                    res = leaky_relu(res)

                conv = tf.layers.conv2d(res, depth[i+1], [4, 4], [2 ,2], padding="SAME", kernel_initializer=tf.truncated_normal_initializer(0.0, 0.02))
                conv = leaky_relu(conv) 
                layers.append(conv)

            output = tf.contrib.layers.flatten(layers[-1])
            disc = tf.layers.dense(output, 1, kernel_initializer=tf.truncated_normal_initializer(0.0, 0.02))
                
            aux = tf.layers.dense(output, 47, kernel_initializer=tf.truncated_normal_initializer(0.0, 0.02))
            #output = tf.nn.sigmoid(output)
        self.reuse = True
        self.variables = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='d')
        return disc, aux

In [None]:
class DRAGAN:
    def __init__(self):
        self.batch_size = 64
        self.g = Generator(self.batch_size)
        self.d = Discriminator()
        
        self.img_size = 128
        self.rand_size = 10
        self.cat_size = 47
        self.z_size = self.rand_size + self.cat_size
              
        self.epochs = 500000
        self.epoch_saveMetrics = 100
        self.epoch_saveSampleImg = 100
        self.epoch_saveParameter = 10000
        self.losses = {"d_loss":[], "g_loss":[]}

        self.Z = tf.placeholder(tf.float32, [None, self.z_size])
        self.X_tr = tf.placeholder(tf.float32, [None, self.img_size*self.img_size*3])
        self.X_p = tf.placeholder(tf.float32, [None, self.img_size*self.img_size*3])
        self.Y_tr = tf.placeholder(tf.float32, shape=[None, self.cat_size])

        self.X_g = self.g(self.Z, training=True)
        self.X_g_test = self.g(self.Z, training=True)

        self.disc_tr, self.aux_tr = self.d(self.X_tr, training=True)
        self.disc_gen, self.aux_gen = self.d(self.X_g, training=True)

        self.noise = np.random.uniform(-1, 1, size=[self.batch_size, self.z_size]).astype(np.float32)
        self.L_g, self.L_d = self.loss()
        self.learning_rate = tf.placeholder("float", [])

        self.dtd = DTD()
        self.steps = 10

    def loss(self):

        lambda_adv = 1
        lambda_gp = 1
        lambda_c = 50
       
        loss_g = lambda_adv*tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=self.disc_gen, labels=tf.ones_like(self.disc_gen)))
        #loss_g = lambda_adv*tf.reduce_mean(1 - tf.log(disc_gen + TINY))

        diff = self.X_p - self.X_tr
        #print(g_outputs.shape[0])
        alpha = tf.random_uniform(shape=[self.batch_size,1], minval=0., maxval=1.)
        interpolates = self.X_tr + (alpha*diff)
        disc_interplates, _ = self.d(interpolates)
        gradients = tf.gradients(disc_interplates, [interpolates])[0]
        slopes = tf.sqrt(tf.reduce_sum(tf.square(gradients), reduction_indices=[1]))
        gradient_penalty = tf.reduce_mean((slopes-1.)**2)

        loss_d_tr = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=self.disc_tr, labels=tf.ones_like(self.disc_tr)))
        loss_d_gen = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=self.disc_gen, labels=tf.zeros_like(self.disc_gen)))
        loss_d = lambda_adv*(loss_d_tr + loss_d_gen)
        loss_d += lambda_gp*gradient_penalty

        loss_c_tr = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=self.aux_tr, labels=self.Y_tr))
        loss_c_gen = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=self.aux_gen, labels=self.Y_tr))
        loss_c = (loss_c_tr + loss_c_gen)

        loss_g += loss_c*lambda_c
        loss_d += loss_c*lambda_c
        return loss_g, loss_d
    def train(self):
        # Optimizer

        #learning_rate = 1e-4
        beta1 = 0.5
        beta2 = 0.9
       
        d_opt = tf.train.AdamOptimizer(learning_rate=self.learning_rate, beta1=beta1, beta2=beta2)
        d_train_op = d_opt.minimize(self.L_d, var_list=self.d.variables)
        g_opt = tf.train.AdamOptimizer(learning_rate=self.learning_rate, beta1=beta1, beta2=beta2)
        g_train_op = g_opt.minimize(self.L_g, var_list=self.g.variables)

        saver = tf.train.Saver()
        #%debug

        config = tf.ConfigProto(
            gpu_options=tf.GPUOptions(
                visible_device_list="1"
            )
        )
        
        with tf.Session(config=config) as sess:
            ckpt = tf.train.get_checkpoint_state("./model" + model_name + "/")
            if ckpt:
                last_model = ckpt.model_checkpoint_path
                print("load"+last_model)
                saver.restore(sess, last_model)
                
            else:
                init = tf.global_variables_initializer()
                sess.run(init)


            # preparing noise vec for test
            bs = 100
            test_z = np.random.uniform(0, 1, size=[bs, self.z_size])

            # cat 0
            bs = 10
            test_cat0_rand = np.random.uniform(0, 1, size=[bs, self.rand_size])
            test_cat0_cat = np.zeros([bs, self.cat_size])
            test_cat0_cat[:, 0] = np.linspace(0, 1, num=bs)
            test_cat0_z = np.concatenate((test_cat0_rand, test_cat0_cat), axis=1)  

            # cat 1
            bs = 10
            test_cat1_cat = np.zeros([bs, self.cat_size])
            test_cat1_cat[:, 1] = np.linspace(0, 1, num=bs)
            test_cat1_z = np.concatenate((test_cat0_rand, test_cat1_cat), axis=1)  

            # cat 2
            bs = 10
            test_cat2_cat = np.zeros([bs, self.cat_size])
            test_cat2_cat[:, 2] = np.linspace(0, 1, num=bs)
            test_cat2_z = np.concatenate((test_cat0_rand, test_cat2_cat), axis=1)

            # cat 3
            bs = 10
            test_cat3_cat = np.zeros([bs, self.cat_size])
            test_cat3_cat[:, 3] = np.linspace(0, 1, num=bs)
            test_cat3_z = np.concatenate((test_cat0_rand, test_cat3_cat), axis=1)

            lr = 1e-4
            for epoch in range(self.epochs):
                if epoch > 2000:
                    lr = 5e-5
                if epoch > 4000:
                    lr = 1e-5

                # visualize generated images during training
                if epoch % self.epoch_saveSampleImg == 0:
                    #model_name = "org_0"
                    # rand
                    img = sess.run(self.X_g_test, feed_dict={self.Z: test_z})
                    img = (img+0.5)
                    img = np.clip(img, 0.0, 1.0)
                    save_imgs(model_name, img, name=str(epoch)+"_rand")

                    # cat
                    test_cat = np.concatenate((test_cat0_z, test_cat1_z, test_cat2_z, test_cat3_z), axis=0)
                    img = sess.run(self.X_g_test, feed_dict={self.Z: test_cat})
                    img = (img+0.5)
                    img = np.clip(img, 0.0, 1.0)
                    save_imgs(model_name, img, plot_dim=(4,10), size=(20, 8), name=str(epoch)+"_cat")

                for step in range(self.steps):
                    # extract images for training
                    #rand_index = np.random.randint(0, self.dataset.shape[0], size=self.batch_size)
                    #X_mb, Y_mb = self.dataset[rand_index, :].astype(np.float32)
                    X_mb, Y_mb = self.dtd.extract(self.batch_size, self.img_size)
                    X_mb = np.reshape(X_mb, [self.batch_size, -1])
                    X_mb_per = X_mb + 0.5*np.std(X_mb)*np.random.random(X_mb.shape)

                    rand = np.random.uniform(0, 1, size=[self.batch_size, self.rand_size])
                    z = np.hstack((rand, Y_mb))

                    # train Discriminator
                    _, d_loss_value = sess.run([d_train_op, self.L_d], feed_dict={
                        self.X_tr: X_mb,
                        self.Z:z,
                        self.Y_tr: Y_mb,
                        self.X_p: X_mb_per,
                        self.learning_rate:lr,
                    })

                # train Generator
                _, g_loss_value = sess.run([g_train_op, self.L_g], feed_dict={
                    self.X_tr: X_mb,
                    self.Z:z,
                    self.Y_tr: Y_mb,
                    self.X_p: X_mb_per,
                    self.learning_rate: lr,
                })

                # 結果をappend
                self.losses["d_loss"].append(d_loss_value)
                self.losses["g_loss"].append(g_loss_value)

                # グラフの描画（余裕があったら）
                if epoch % self.epoch_saveMetrics == 0:
                    print("epoch:" + str(epoch) + ", d_loss:" + str(d_loss_value) + ", g_loss:" + str(g_loss_value))
                    path = "metrics" + model_name
                    save_metrics(path, self.losses, epoch)

                if epoch % self.epoch_saveParameter == 0:
                    path = "model" + model_name
                    if not os.path.isdir(path):
                        os.makedirs(path)
                    saver.save(sess, path + "/dcgan_model" + str(epoch) + ".ckpt")

    def sample_images(self, row=5, col=12, inputs=None, epoch=None):
        images = self.g(inputs, training=True)
        return images

In [None]:
gan = DRAGAN()
gan.train()

init DTD
datasize of texture:5641




epoch:0, d_loss:2.09367, g_loss:0.92711
