# pix2pix

In [None]:
import tensorflow as tf
import os

from PIL import Image
import numpy as np
import matplotlib.pyplot as plt

#from util import *
#from ops import *

In [None]:
# convert imgs into .npy file
def ConvertIntoNpy(src, dst):
    files = os.listdir(src)
    firstImg = Image.open(src + "/" + files[0])
    np_firstImg = np.asarray(firstImg)
    fileList = np.copy(np_firstImg)
    fileList = fileList[np.newaxis, :]

    print("num of files: " + str(len(files)))
    print("file size: " + str(np_firstImg.shape))

    for i in range(len(files)):
        if i % 100 == 1:
            print(i)

        img = Image.open(src + "/" + files[i])
        np_img = np.asarray(img)
        if np_img.shape != np_firstImg.shape:
            continue

        np_img = np_img[np.newaxis, :]
        fileList = np.vstack((fileList, np_img))
        img.close()

    print("generated npy size: " + str(fileList.shape))
    np.save(dst, fileList)

#ConvertIntoNpy("ukiyoe2photo/trainA", "./ukiyoe2photo_trainA.npy")

In [None]:
# save metrics
def save_metrics(metrics, epoch=None):
    # make directory if there is not
    path = "metrics"
    if not os.path.isdir(path):
        os.makedirs(path)

    # save metrics
    plt.figure(figsize=(10,8))
    plt.plot(metrics["d_loss_X"], label="discriminative loss X", color="r")
    plt.plot(metrics["d_loss_Y"], label="discriminative loss Y", color="c")
    plt.ylim(0,4)
    plt.legend()
    plt.savefig(os.path.join(path, "d_loss" + str(epoch) + ".png"))
    plt.close()

    plt.figure(figsize=(10,8))
    plt.plot(metrics["g_loss_X2Y"], label="generative loss X2Y", color="g")
    plt.plot(metrics["g_loss_Y2X"], label="generative loss Y2X", color="b")
    plt.ylim(0,4)
    plt.legend()
    plt.savefig(os.path.join(path, "g_loss" + str(epoch) + ".png"))
    plt.close()

In [None]:
# plot images
def save_imgs(images, plot_dim=(1,2), size=(8,4), epoch=None):
    # make directory if there is not
    path = "generated_figures"
    if not os.path.isdir(path):
        os.makedirs(path)

    numImgs = images.shape[0]
    eachNumImgs = images.shape[0]/3

    #num_examples = plot_dim[0]*plot_dim[1]
    num_examples = numImgs
    size = (3*4, eachNumImgs*4)
    plot_dim = (eachNumImgs,3)

    fig = plt.figure(figsize=size)
    for i in range(num_examples):
        plt.subplot(plot_dim[0], plot_dim[1], i+1)
        img = images[i, :]
        img = img.reshape((256, 256, 3))
        plt.tight_layout()
        plt.imshow(img)
        plt.axis("off")
    plt.subplots_adjust(wspace=0.1, hspace=0.1)
    plt.savefig(os.path.join(path, str(epoch) + ".png"))
    plt.close()

# model

In [None]:
def instance_norm(input, name):
    with tf.variable_scope(name):
        depth = input.get_shape()[3]
        scale = tf.get_variable("scale", [depth], initializer=tf.random_normal_initializer(1.0, 0.02, dtype=tf.float32))
        offset = tf.get_variable("offset", [depth], initializer=tf.constant_initializer(0.0))
        mean, variance = tf.nn.moments(input, axes=[1,2], keep_dims=True)
        epsilon = 1e-5
        inv = tf.rsqrt(variance + epsilon)
        normalized = (input-mean)*inv
    return scale*normalized + offset

In [None]:
class Generator_UNet:
    def __init__(self, name=""):
        self.reuse = False
        self.name = name
        print("init... name of g:" + str(self.name))

    def __call__(self, inputs, gf_dim=64):
        def leaky_relu(x, leak=0.2, name='outputs'):
            return tf.maximum(x, x * leak, name=name)

        with tf.variable_scope('g_' + self.name, reuse=self.reuse):
            # 256x256x3
            print("called... name of g:" + str(self.name))
            e0 = tf.layers.conv2d(inputs, gf_dim, [3,3], [2,2], padding="SAME") # 128x128 
            e1 = leaky_relu(e0)
            e1 = tf.layers.conv2d(e1, gf_dim*2, [3,3], [2,2], padding="SAME")
            e1 = instance_norm(e1, name="e1")
            # 64x64

            e2 = leaky_relu(e1)
            e2 = tf.layers.conv2d(e2, gf_dim*4, [3,3], [2,2], padding="SAME")
            e2 = instance_norm(e2, name="e2")
            # 32x32

            e3 = leaky_relu(e2)
            e3 = tf.layers.conv2d(e3, gf_dim*8, [3,3], [2,2], padding="SAME")
            e3 = instance_norm(e3, name="e3")
            # 16x16

            e4 = leaky_relu(e3)
            e4 = tf.layers.conv2d(e4, gf_dim*8, [3,3], [2,2], padding="SAME")
            e4 = instance_norm(e4, name="e4")
            # 8x8

            e5 = leaky_relu(e4)
            e5 = tf.layers.conv2d(e5, gf_dim*8, [3,3], [2,2], padding="SAME")
            e5 = instance_norm(e5, name="e5")
            # 4x4

            e6 = leaky_relu(e5)
            e6 = tf.layers.conv2d(e6, gf_dim*8, [3,3], [2,2], padding="SAME")
            e6 = instance_norm(e6, name="e6")
            # 2x2

            e7 = leaky_relu(e6)
            e7 = tf.layers.conv2d(e7, gf_dim*8, [3,3], [2,2], padding="SAME")
            e7 = instance_norm(e7, name="e7")
            # 1x1

            # 
            d0 = tf.nn.relu(e7)
            d0 = tf.layers.conv2d_transpose(d0, gf_dim*8, [3,3], [2,2], padding="SAME") 
            d0 = instance_norm(d0, name="d0")
            d0 = tf.nn.dropout(d0, 0.5)
            d0 = tf.concat([d0, e6], 3)
            # 2x2
            
            d1 = tf.nn.relu(d0)
            d1 = tf.layers.conv2d_transpose(d1, gf_dim*8, [3,3], [2,2], padding="SAME") 
            d1 = instance_norm(d1, name="d1")
            d1 = tf.nn.dropout(d1, 0.5)
            d1 = tf.concat([d1, e5], 3)
            # 4x4 

            d2 = tf.nn.relu(d1)
            d2 = tf.layers.conv2d_transpose(d2, gf_dim*8, [3,3], [2,2], padding="SAME") 
            d2 = instance_norm(d2, name="d2")
            d2 = tf.nn.dropout(d2, 0.5)
            d2 = tf.concat([d2, e4], 3)
            # 8x8

            d3 = tf.nn.relu(d2)
            d3 = tf.layers.conv2d_transpose(d3, gf_dim*8, [3,3], [2,2], padding="SAME") 
            d3 = instance_norm(d3, name="d3")
            d3 = tf.concat([d3, e3], 3)
            # 16x16

            d4 = tf.nn.relu(d3)
            d4 = tf.layers.conv2d_transpose(d4, gf_dim*4, [3,3], [2,2], padding="SAME") 
            d4 = instance_norm(d4, name="d4")
            d4 = tf.concat([d4, e2], 3)
            # 32x32

            d5 = tf.nn.relu(d4)
            d5 = tf.layers.conv2d_transpose(d5, gf_dim*2, [3,3], [2,2], padding="SAME") 
            d5 = instance_norm(d5, name="d5")
            d5 = tf.concat([d5, e1], 3)
            # 64x64

            d6 = tf.nn.relu(d5)
            d6 = tf.layers.conv2d_transpose(d6, gf_dim, [3,3], [2,2], padding="SAME") 
            d6 = instance_norm(d6, name="d6")
            d6 = tf.concat([d6, e0], 3)
            # 128x128

            d7 = tf.nn.relu(d6)
            d7 = tf.layers.conv2d_transpose(d7, 3, [3,3], [2,2], padding="SAME") 
            d7 = tf.nn.tanh(d7)
            # 256x256x3

            # print("y" + str(y.get_shape()))     
        self.reuse = True
        self.variables = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='g')
        return d7

In [None]:
class Generator_ResNet:
    def __init__(self, name=""):
        self.reuse = False
        self.name = name
        print("init... name of g:" + str(self.name))

    def __call__(self, inputs, gf_dim=64):
        def residual_block(x, dim):
            # hxwxdim
            y = tf.pad(x, [[0,0], [1,1], [1,1], [0,0]], "REFLECT") # (h+1)x(w+1)x(dim)
            y = tf.layers.conv2d(y, dim, [3,3], [1,1], padding="VALID") # (h)x(w)x(dim)
            y = tf.layers.batch_normalization(y)
            y = tf.nn.relu(y)
            y = tf.pad(y, [[0,0], [1,1], [1,1], [0,0]], "REFLECT") # (h+1)x(w+1)x(dim)
            y = tf.layers.conv2d(y, dim, [3,3], [1,1], padding="VALID") # (h)x(w)x(dim)
            y = tf.layers.batch_normalization(y)
            return y + x

        with tf.variable_scope('g_' + self.name, reuse=self.reuse):
            # 256x256x3
            print("called... name of g:" + str(self.name))
            # print("inputs" + str(inputs.get_shape()))
            c0 = tf.pad(inputs, [[0,0], [3,3], [3,3], [0,0]], "REFLECT")  # (262)x(262)x3
            # print("c0" + str(c0.get_shape()))
            c1 = tf.layers.conv2d(c0, gf_dim, [7,7], [1,1], padding="VALID") # (256)x(256)xgf_dim
            c1 = tf.nn.relu(tf.layers.batch_normalization(c1))
            # print("c1" + str(c1.get_shape()))
            
            c2 = tf.layers.conv2d(c1, gf_dim*2, [3,3], [2,2], padding="SAME") # (128)x(128)x(gf_dim*2)
            c2 = tf.nn.relu(tf.layers.batch_normalization(c2)) 
            # print("c2" + str(c2.get_shape()))
            
            c3 = tf.layers.conv2d(c2, gf_dim*4, [3,3], [2,2], padding="SAME") # (64x64xgf_dim*4)
            c3 = tf.nn.relu(tf.layers.batch_normalization(c3))
            # print("c3" + str(c3.get_shape()))
            

            r1 = residual_block(c3, gf_dim*4)
            r2 = residual_block(r1, gf_dim*4)
            r3 = residual_block(r2, gf_dim*4)
            r4 = residual_block(r3, gf_dim*4)
            r5 = residual_block(r4, gf_dim*4)
            r6 = residual_block(r5, gf_dim*4)
            r7 = residual_block(r6, gf_dim*4)
            r8 = residual_block(r7, gf_dim*4)
            r9 = residual_block(r8, gf_dim*4)
            # print("r9" + str(r9.get_shape()))            
            # 64x64xgf_dim*4
            d1 = tf.layers.conv2d_transpose(r9, gf_dim*2, [3,3], [2,2], padding="SAME") # 128x128
            d1 = tf.nn.relu(tf.layers.batch_normalization(d1))
            # print("d1" + str(d1.get_shape()))     
            d2 = tf.layers.conv2d_transpose(d1, gf_dim, [3,3], [2,2], padding="SAME") # 256x256
            d2 = tf.nn.relu(tf.layers.batch_normalization(d2))
            d2 = tf.pad(d2, [[0,0], [3,3], [3,3], [0,0]], "REFLECT")
            # print("d2" + str(d2.get_shape()))                 
            y = tf.layers.conv2d(d2, 3, [7,7], [1,1], padding="VALID") # 256x256x3
            y = tf.nn.tanh(y)
            # print("y" + str(y.get_shape()))     
        self.reuse = True
        self.variables = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='g')
        return y

In [None]:
class Discriminator:
    def __init__(self, name=""):
        self.reuse = False
        self.name = name
        print("init... name of d:" + self.name)
    def __call__(self, x, df_dim=64):
        def leaky_relu(x, leak=0.2, name='outputs'):
            return tf.maximum(x, x * leak, name=name)

        with tf.variable_scope('d_' + self.name, reuse=self.reuse):
            # x: 256x256x3
            print("called... name of d:" + self.name)
            conv1 = tf.layers.conv2d(x, df_dim, [4,4], [2,2], name="d_c1") # 128x128xdif_dim
            conv1 = leaky_relu(conv1)

            conv2 = tf.layers.conv2d(conv1, df_dim*2, [4,4], [2,2]) # 64x64xdf_dim*2
            conv2 = instance_norm(conv2, name="conv2")
            conv2 = leaky_relu(conv2)

            conv3 = tf.layers.conv2d(conv2, df_dim*4, [4,4], [2,2]) # 32x32xdf_dim*4
            conv3 = instance_norm(conv3, name="conv3")
            conv3 = leaky_relu(conv3)

            conv4 = tf.layers.conv2d(conv3, df_dim*8, [4,4], [1,1]) # 32x32xdf_dim*8
            conv4 = instance_norm(conv4, name="conv4")
            conv4 = leaky_relu(conv4)

            conv5 = tf.layers.conv2d(conv4, 1, [4,4], [1,1]) # 32x32x1

        self.reuse = True
        self.variables = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='d')
        return conv5

In [None]:
def l2_loss(x1, x2, weights=1.0):
    loss = tf.reduce_mean((x1 - x2) ** 2) * weights
    return loss

def l1_loss(x1, x2, weights=1.0):
    loss = tf.reduce_mean(tf.abs(x1 - x2)) * weights
    return loss

In [None]:
class cycleGAN:
    def __init__(self):
        # params
        self.batch_size = 8
        img_size = 256

        self.epochs = 1000000
        self.epoch_saveMetrics = 2000
        self.epoch_saveSampleImg = 1000
        self.epoch_saveParamter = 10000
        self.loss = {"d_loss_X":[], "d_loss_Y":[], "g_loss_X2Y":[], "g_loss_Y2X":[]}

        # training data
        #self.X_train = np.load("ukiyoe2photo_testA.npy")
        #self.X_train = self.X_train/255
        #self.Y_train = np.load("ukiyoe2photo_testB.npy")
        #self.Y_train = self.Y_train/255

        self.X_tr = tf.placeholder(tf.float32, shape=[None, img_size, img_size, 3])
        self.Y_tr = tf.placeholder(tf.float32, shape=[None, img_size, img_size, 3])

        # definition of G and D for each X2Y, Y2X
        self.g_X2Y = Generator_UNet(name="X2Y")
        self.g_Y2X = Generator_UNet(name="Y2X")
        #self.g_X2Y = Generator_ResNet(name="X2Y")
        #self.g_Y2X = Generator_ResNet(name="Y2X")

        self.d_X2Y = Discriminator(name="X2Y")
        self.d_Y2X = Discriminator(name="Y2X")

        # the results of generation
        # generate imgs from training data
        self.X2Y = self.g_X2Y(self.X_tr) #X→Y
        self.Y2X = self.g_Y2X(self.Y_tr) #Y→X

        # generate img from generated data
        #self.X2Y2X = self.g_Y2X(self.X2Y) #X→Y→X
        #self.Y2X2Y = self.g_X2Y(self.Y2X) #Y→X→Y

        self.lr = 0.0002

        # the results of discrimination
        X_dis = self.d_Y2X(self.X_tr)
        Y2X_dis = self.d_Y2X(self.Y2X)

        Y_dis = self.d_X2Y(self.Y_tr)
        X2Y_dis = self.d_X2Y(self.X2Y)

        # g_losses
        g_loss_X2Y = tf.identity(l2_loss(X2Y_dis, tf.ones_like(X2Y_dis)))
        g_loss_Y2X = tf.identity(l2_loss(Y2X_dis, tf.ones_like(Y2X_dis)))
        #cyc_loss_X = tf.identity(l1_loss(self.X_tr, self.X2Y2X) * 10.0)
        #cyc_loss_Y = tf.identity(l1_loss(self.Y_tr, self.Y2X2Y) * 10.0)
        # self.g_loss = g_loss_X2Y + g_loss_Y2X + cyc_loss_X + cyc_loss_Y
        self.g_loss_X2Y = g_loss_X2Y# + cyc_loss_X + cyc_loss_Y
        self.g_loss_Y2X = g_loss_Y2X# + cyc_loss_X + cyc_loss_Y
        
        #  d_losses
        d_loss_Xtr = l2_loss(X_dis, tf.ones_like(X_dis))
        d_loss_Y2X = l2_loss(Y2X_dis, tf.zeros_like(Y2X_dis))
        self.d_loss_X = d_loss_Xtr + d_loss_Y2X
        
        d_loss_Ytr = l2_loss(Y_dis, tf.ones_like(Y_dis))
        d_loss_X2Y = l2_loss(X2Y_dis, tf.zeros_like(X2Y_dis))
        self.d_loss_Y = d_loss_Ytr + d_loss_X2Y
 
        
    def train(self):
        # Optimizer
        #g_var = [var for var in (self.g_X2Y.variables or self.g_Y2X.variables)]
        d_X_train_op = tf.train.AdamOptimizer(self.lr, beta1=0.5).minimize(self.d_loss_X, var_list=self.d_Y2X.variables)
        d_Y_train_op = tf.train.AdamOptimizer(self.lr, beta1=0.5).minimize(self.d_loss_Y, var_list=self.d_X2Y.variables)
        g_X2Y_train_op = tf.train.AdamOptimizer(self.lr, beta1=0.5).minimize(self.g_loss_X2Y, var_list=self.g_X2Y.variables)
        g_Y2X_train_op = tf.train.AdamOptimizer(self.lr, beta1=0.5).minimize(self.g_loss_Y2X, var_list=self.g_Y2X.variables)

        saver = tf.train.Saver()
        #%debug

        config = tf.ConfigProto(
            gpu_options=tf.GPUOptions(
                visible_device_list="1"
            )
        )
        
        with tf.Session(config=config) as sess:
            sess.run(tf.global_variables_initializer())
            path = "./../1_Sitting_pose_viewed_from_the_front_1/Training_data/Depth_jpg"
            imgs_ = os.listdir(path)
            for epoch in range(self.epochs):
                rand_id = np.random.randint(0, len(imgs_), size=self.batch_size)

                # X_mb, Y_mbを収集
                def extractXimg(path, batch_size, rand_id):
                    #imgs = os.listdir(path)
                    X_mb = np.zeros((1, 256, 256, 3))

                    for i in range(batch_size):
                        img = Image.open(path+"/"+imgs_[rand_id[i]])
                        img_np = np.asarray(img)
                        #print(img_np.shape)
                        X_mb = np.vstack((X_mb, img_np[np.newaxis, :]))
                        img.close()
                    X_mb = X_mb[1:,:]
                    return X_mb

                X_mb = extractXimg("./../1_Sitting_pose_viewed_from_the_front_1/Training_data/Depth_jpg", self.batch_size, rand_id).astype(np.float32)
                X_mb = X_mb/255
                
                Y_mb = extractXimg("./../1_Sitting_pose_viewed_from_the_front_1/Training_data/Label_jpg", self.batch_size, rand_id).astype(np.float32)
                Y_mb = Y_mb/255

                #rand_index = np.random.randint(0, self.X_train.shape[0], size=self.batch_size)
                #X_mb = self.X_train[rand_index, :].astype(np.float32)

                #rand_index = np.random.randint(0, self.Y_train.shape[0], size=self.batch_size)
                #Y_mb = self.Y_train[rand_index, :].astype(np.float32)

                # train G
                _, _, g_loss_X2Y_value, g_loss_Y2X_value = sess.run([g_X2Y_train_op, g_Y2X_train_op, self.g_loss_X2Y, self.g_loss_Y2X], feed_dict={self.X_tr: X_mb, self.Y_tr: Y_mb})

                # train D_X
                _, d_loss_X_value = sess.run([d_X_train_op, self.d_loss_X], feed_dict={self.X_tr: X_mb, self.Y_tr: Y_mb})

                # train D_Y
                _, d_loss_Y_value = sess.run([d_Y_train_op, self.d_loss_Y], feed_dict={self.X_tr: X_mb, self.Y_tr:Y_mb})
              
                # generate Sample Imgs
                sampleX2Y, sampleY2X= sess.run([self.X2Y, self.Y2X], feed_dict={self.X_tr: X_mb, self.Y_tr: Y_mb})

                # 結果をappend
                self.loss["d_loss_X"].append(d_loss_X_value)
                self.loss["d_loss_Y"].append(d_loss_Y_value)
                self.loss["g_loss_X2Y"].append(g_loss_X2Y_value)
                self.loss["g_loss_Y2X"].append(g_loss_Y2X_value)
                
                print("epoch:" + str(epoch))

                # lossの可視化
                if epoch % self.epoch_saveMetrics == 0:
                    save_metrics(self.loss, epoch)

                # 画像の変換テスト
                if epoch % self.epoch_saveSampleImg == 0:
                    imgs_X = np.zeros((1, 256, 256, 3))
                    imgs_Y = np.zeros((1, 256, 256, 3))

                    for i in range(X_mb.shape[0]):
                        imgs_X = np.vstack((imgs_X, X_mb[i][np.newaxis,:]))
                        imgs_X = np.vstack((imgs_X, sampleX2Y[i][np.newaxis, :]))
                        imgs_X = np.vstack((imgs_X, Y_mb[i][np.newaxis, :]))

                        imgs_Y = np.vstack((imgs_Y, Y_mb[i][np.newaxis,:]))
                        imgs_Y = np.vstack((imgs_Y, sampleY2X[i][np.newaxis, :]))
                        imgs_Y = np.vstack((imgs_Y, X_mb[i][np.newaxis, :]))

                    imgs_X = imgs_X[1:,:]
                    imgs_Y = imgs_Y[1:,:]
                    save_imgs(imgs_Y, epoch="X2Y"+str(epoch))
                    save_imgs(imgs_X, epoch="Y2X"+str(epoch))

                    #save_imgs(sampleImgsOfX2Y, epoch=str(epoch)+"X2Y")
                    #save_imgs(sampleImgsOfY2X, epoch=str(epoch)+"Y2X")

                # parameterのsave
                if epoch % self.epoch_saveParamter == 0:
                    path = "pix2pix"
                    if not os.path.isdir(path):
                        os.makedirs(path)

                    saver.save(sess, path + "/pix2pix_model" + str(epoch) + ".ckpt")
       
    
    def sample_images(self, row=5, col=12, inputs=None, epoch=None):
        images = self.g(inputs, training=True)
        return images

# main func

In [None]:
if __name__ == "__main__":
    CGAN = cycleGAN()
    CGAN.train()