In [1]:
import tensorflow as tf
import os
import numpy as np
from model import *
from utility import *

model_name = "AC-DRAGAN_SRResNet_PixelCNN_for_DTD"

In [2]:
class Generator:
    def __init__(self):
        self.reuse = False
        self.g_bn0 = BatchNormalization(name = 'g_bn0')

        self.num_res_blocks = 32
        self.num_pixel_CNN_blocks = 3
        
        self.res_bns = []
        for i in range(int(self.num_res_blocks)):
            self.res_bns.append(BatchNormalization(name = "res_%d" % (2*i)))
            self.res_bns.append(BatchNormalization(name = "res_%d" % (2*i+1)))
        
        self.ps_bns = []
        for i in range(int(self.num_pixel_CNN_blocks)):
            self.ps_bns.append(BatchNormalization(name = "ps_%d" % i))
        
        self.g_bn1 = BatchNormalization(name = 'g_bn1')
        
    def __call__(self, z):
        with tf.variable_scope('g', reuse=self.reuse):
            
            # reshape from inputs
            with tf.variable_scope('fc0'):
                #z0 = tf.reshape(z, [-1, self.z_dim])
                fc0 = full_connection_layer(z, 64*16*16, name="fc0")
                fc0 = self.g_bn0(fc0)
                fc0 = tf.nn.relu(fc0)
                fc0 = tf.reshape(fc0, [-1,16,16,64])

            assert fc0.get_shape().as_list()[1:] == [16,16,64]
            
            layers = []
            layers.append(fc0)
            
            for i in range(int(self.num_res_blocks)):
                with tf.variable_scope('res_%d' % (i+1)):
                    res = conv2d_layer(layers[-1], 64, kernel_size=3, strides=1, name="g_conv_res_%d" % (2*i))
                    res = self.res_bns[2*i](res)
                    res = tf.nn.relu(res)

                    res = conv2d_layer(res, 64, kernel_size=3, strides=1, name="g_conv_res_%d" % (2*i+1))
                    res = self.res_bns[2*i+1](res)
                    res = layers[-1] + res
                    layers.append(res)                    

            assert layers[-1].get_shape().as_list()[1:] == [16,16,64]
            
            with tf.variable_scope('conv17'):
                conv17 = conv2d_layer(layers[-1], 64, kernel_size=3, strides=1, name="g_conv_17")
                conv17 = self.g_bn1(conv17)
                conv17 = tf.nn.relu(conv17)
                conv17 = layers[0] + conv17
                layers.append(conv17)

            assert layers[-1].get_shape().as_list()[1:] == [16, 16, 64]

            for i in range(int(self.num_pixel_CNN_blocks)):
                with tf.variable_scope('pixel_CNN_%d' % (i+1)):
                    ps = conv2d_layer(layers[-1], 256, kernel_size=3, strides=1, name="g_conv_ps_%d" % (i))
                    ps = pixel_shuffle_layer(ps, 2, 64)
                    ps = self.ps_bns[i](ps)
                    ps = tf.nn.relu(ps)
                    layers.append(ps)

            assert layers[-1].get_shape().as_list()[1:] == [128, 128, 64]
                    
            with tf.variable_scope('output'):
                output = conv2d_layer(layers[-1], 3, kernel_size=9, strides=1, name="output")
                output = tf.nn.sigmoid(output)

            assert output.get_shape().as_list()[1:] == [128, 128, 3]            
            
        self.reuse = True
        self.variables = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='g')
        return output

In [3]:
class Discriminator:
    def __init__(self, cat_size):
        self.reuse = False
        self.d_bn0 = BatchNormalization(name="d_bn0")
        self.d_bn1 = BatchNormalization(name="d_bn1")
        self.d_bn2 = BatchNormalization(name="d_bn2")
        self.d_bn3 = BatchNormalization(name="d_bn3")
        self.d_bn4 = BatchNormalization(name="d_bn4")
        
        self.cat_size = cat_size

    def __call__(self, x):
        with tf.variable_scope('d', reuse=self.reuse):
           
            # convolution x 4
            x = tf.reshape(x, [-1, 128, 128, 3])
            with tf.variable_scope('conv1'):
                conv1 = conv2d_layer(x, 32, kernel_size=4, strides=2, name="conv1")
                conv1 = lrelu(conv1, leak=0.2)

            with tf.variable_scope('res1'):                
                res1 = conv2d_layer(conv1, 32, kernel_size=3, strides=1, name="res1_conv0")
                res1 = lrelu(res1, leak=0.2)
                res1 = conv2d_layer(res1, 32, kernel_size=3, strides=1, name="res1_conv1")
                res1 = res1 + conv1
                res1 = lrelu(res1, leak=0.2)
                
            with tf.variable_scope('res2'):
                res2 = conv2d_layer(res1, 32, kernel_size=3, strides=1, name="res2_conv0")
                res2 = lrelu(res2, leak=0.2)
                res2 = conv2d_layer(res2, 32, kernel_size=3, strides=1, name="res2_conv1")
                res2 = res2 + res1
                res2 = lrelu(res2, leak=0.2)

            with tf.variable_scope('conv2'):
                conv2 = conv2d_layer(res2, 64, kernel_size=4, strides=2, name="conv2")
                conv2 = lrelu(conv2, leak=0.2)

            with tf.variable_scope('res3'):
                res3 = conv2d_layer(conv2, 64, kernel_size=3, strides=1, name="res3_conv0")
                res3 = lrelu(res3, leak=0.2)
                res3 = conv2d_layer(res3, 64, kernel_size=3, strides=1, name="res3_conv1")
                res3 = res3 + conv2
                res3 = lrelu(res3, leak=0.2)            

            with tf.variable_scope('res4'):
                res4 = conv2d_layer(res3, 64, kernel_size=3, strides=1, name="res4_conv0")
                res4 = lrelu(res4, leak=0.2)
                res4 = conv2d_layer(res4, 64, kernel_size=3, strides=1, name="res4_conv1")
                res4 = res4 + res3
                res4 = lrelu(res4, leak=0.2)                    
                
            with tf.variable_scope('conv3'):
                conv3 = conv2d_layer(res4, 128, kernel_size=4, strides=2, name="conv3")
                conv3 = lrelu(conv3, leak=0.2)

            num_res_itr = 3
            layers = []
            layers.append(conv3)
            
            depth = [128, 256, 512, 1024]
            for i in range(int(num_res_itr)):
                with tf.variable_scope('res_%d_1' % (i+1+4)):
                    res = conv2d_layer(layers[-1], depth[i], kernel_size=3, strides=1, name="res_%d" % (5*i+0))
                    res = lrelu(res)
                    res = conv2d_layer(res, depth[i], kernel_size=3, strides=1, name="res_%d" % (5*i+1))
                    res = lrelu(res)
                    res = layers[-1] + res
                    res = lrelu(res)
                layers.append(res)

                with tf.variable_scope('res_%d_2' % (i+1+4)):
                    res = conv2d_layer(layers[-1], depth[i], kernel_size=3, strides=1, name="res_%d" % (5*i+2))
                    res = lrelu(res)
                    res = conv2d_layer(res, depth[i], kernel_size=3, strides=1, name="res_%d" % (5*i+3))
                    res = lrelu(res)
                    res = layers[-1] + res
                    res = lrelu(res)                  
                    
                conv = conv2d_layer(res, depth[i+1], kernel_size=4, strides=2, name="res_%d" % (5*i+4))
                conv = lrelu(conv) 
                layers.append(conv)
                
            disc = full_connection_layer(layers[-1], 1, name="disc")
            aux = full_connection_layer(layers[-1], self.cat_size, name="aux")

        self.reuse = True
        self.variables = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='d')

        return disc, aux

In [None]:
class GAN:
    def __init__(self):
        self.batch_size = 16
        self.img_size = 128
        self.rand_size = 50
        self.cat_size = 47
        self.z_size = self.rand_size + self.cat_size
        
        self.epochs = 50000
        self.epoch_saveMetrics = 100
        self.epoch_saveSampleImg = 100
        self.epoch_saveParamter = 5000
        self.losses = {"d_loss":[], "g_loss":[]}

        # unrolled counts
        self.steps = 5

        #self.dataset = np.load("irasutoya_face_1813x96x96x3_jpg.npy")
        #self.dataset = (self.dataset/255)# - 0.5

        self.X_tr = tf.placeholder(tf.float32, shape=[None, self.img_size, self.img_size, 3])
        self.Y_tr = tf.placeholder(tf.float32, shape=[None, self.cat_size])
        self.z = tf.placeholder(tf.float32, [None, self.z_size])
        self.X_per = tf.placeholder(tf.float32, shape=[None, self.img_size, self.img_size, 3])

        self.g = Generator()
        self.d = Discriminator(self.cat_size)
        self.Xg = self.g(self.z)
        self.dtd = DTD()

    def loss(self):
        disc_tr, aux_tr = self.d(self.X_tr)
        disc_gen, aux_gen = self.d(self.Xg)
        
        lambda_adv = 34
        lambda_gp = 0.5

       
        loss_g = lambda_adv*tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=disc_gen, labels=tf.ones_like(disc_gen)))
        #loss_g = lambda_adv*tf.reduce_mean(1 - tf.log(disc_gen + TINY))

        diff = self.X_per - self.X_tr
        #print(g_outputs.shape[0])
        alpha = tf.random_uniform(shape=[self.batch_size,1,1,1], minval=0., maxval=1.)
        interpolates = self.X_tr + (alpha*diff)
        disc_interplates, _ = self.d(interpolates)
        gradients = tf.gradients(disc_interplates, [interpolates])[0]
        slopes = tf.sqrt(tf.reduce_sum(tf.square(gradients), reduction_indices=[1]))
        gradient_penalty = tf.reduce_mean((slopes-1.)**2)

        loss_d_tr = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=disc_tr, labels=tf.ones_like(disc_tr)))
        loss_d_gen = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=disc_gen, labels=tf.zeros_like(disc_gen)))
        loss_d = lambda_adv*(loss_d_tr + loss_d_gen)
        loss_d += lambda_gp*gradient_penalty

        loss_c_tr = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=aux_tr, labels=self.Y_tr))
        loss_c_gen = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=aux_gen, labels=self.Y_tr))
        loss_c = (loss_c_tr + loss_c_gen)

        loss_g += loss_c
        loss_d += loss_d
        return loss_g, loss_d

    def train(self):
        # Optimizer
        d_lr = 2e-4
        d_beta1 = 0.5
        g_lr = 2e-4
        g_beta1 = 0.5

        self.L_g, self.L_d = self.loss()

        d_opt = tf.train.AdamOptimizer(learning_rate=d_lr)
        d_train_op = d_opt.minimize(self.L_d, var_list=self.d.variables)
        g_opt = tf.train.AdamOptimizer(learning_rate=g_lr)
        g_train_op = g_opt.minimize(self.L_g, var_list=self.g.variables)

        saver = tf.train.Saver()
        
        config = tf.ConfigProto(
            gpu_options=tf.GPUOptions(
                visible_device_list= "1"
            )
        )
                
        with tf.Session(config=config) as sess:
            sess.run(tf.global_variables_initializer())

            # preparing noise vec for test
            bs = 100
            test_z = np.random.uniform(-1, 1, size=[bs, self.z_size])

            for epoch in range(self.epochs):
                for step in range(self.steps):
                    # extract images for training
                    #rand_index = np.random.randint(0, self.dataset.shape[0], size=self.batch_size)
                    #X_mb, Y_mb = self.dataset[rand_index, :].astype(np.float32)
                    X_mb, Y_mb = self.dtd.extract(self.batch_size, self.img_size)
                    X_mb = np.reshape(X_mb, [-1, self.img_size, self.img_size, 3])
                    X_mb_per = X_mb + 0.5*np.std(X_mb)*np.random.random(X_mb.shape)

                    rand = np.random.uniform(-1, 1, size=[self.batch_size, self.rand_size])
                    #print(rand.shape)
                    #print(Y_mb.shape)
                    z = np.hstack((rand, Y_mb))
                    #print(z.shape)

                    # train Discriminator
                    _, d_loss_value = sess.run([d_train_op, self.L_d], feed_dict={
                        self.X_tr: X_mb,
                        self.z:z,
                        self.Y_tr: Y_mb,
                        self.X_per: X_mb_per,
                    })

                    # train Generator
                    _, g_loss_value = sess.run([g_train_op, self.L_g], feed_dict={
                        self.X_tr: X_mb,
                        self.z:z,
                        self.Y_tr: Y_mb,
                        self.X_per: X_mb_per,
                    })

                # append loss value for visualizing
                self.losses["d_loss"].append(np.sum(d_loss_value))
                self.losses["g_loss"].append(np.sum(g_loss_value))
                
                # print epoch
                if epoch % 50 == 0:
                    print('epoch:{0}, d_loss:{1}, g_loss: {2} '.format(epoch, d_loss_value, g_loss_value))
                
                # visualize loss
                if epoch % self.epoch_saveMetrics == 0:
                    save_metrics(model_name, self.losses, epoch)

                # visualize generated images during training
                if epoch % self.epoch_saveSampleImg == 0:
                    img = sess.run(self.Xg, feed_dict={self.z: test_z})
                    img = (img)
                    save_imgs(model_name, img, name=str(epoch))

                # save model parameters 
                if epoch % self.epoch_saveParamter == 0:
                    dir_path = "model_" + model_name
                    if not os.path.isdir(dir_path):
                        os.makedirs(dir_path)

                    saver.save(sess, dir_path + "/" + str(epoch) + ".ckpt")

In [None]:
gan = GAN()
gan.train()

init DTD
datasize of texture:5641




epoch:0, d_loss:52.26744079589844, g_loss: -332.1894836425781 
epoch:50, d_loss:0.780781090259552, g_loss: -4156.552734375 
