# BEGAN implementation by tensorflow

In [1]:
import tensorflow as tf
import os
from PIL import Image
import numpy as np
import matplotlib.pyplot as plt

# utility関数


In [2]:
def save_metrics(metrics, epoch=None):
    plt.figure(figsize=(10,8))
    plt.plot(metrics["dis_loss"], label="discriminative loss", color="b")
    plt.legend()
    plt.savefig(os.path.join("metrics", "dloss" + str(epoch) + ".png"))
    plt.close()

    plt.figure(figsize=(10,8))
    plt.plot(metrics["gen_loss"], label="generative loss", color="r")
    plt.legend()
    plt.savefig(os.path.join("metrics", "g_loss" + str(epoch) + ".png"))
    plt.close()

    plt.figure(figsize=(10,8))
    plt.plot(metrics["dis_train_loss"], label="discriminative loss(train_data)", color="g")
    plt.legend()
    plt.savefig(os.path.join("metrics", "dis_train_loss" + str(epoch) + ".png"))
    plt.close()

    plt.figure(figsize=(10,8))
    plt.plot(metrics["dis_gen_loss"], label="discriminative loss(generated data)", color="c")
    plt.legend()
    plt.savefig(os.path.join("metrics", "dis_gen_loss" + str(epoch) + ".png"))
    plt.close()

    plt.figure(figsize=(10,8))
    plt.plot(metrics["m_global_loss"], label="global loss", color="k")
    plt.legend()
    plt.savefig(os.path.join("metrics", "global_loss" + str(epoch) + ".png"))
    plt.close()

In [3]:
# noise[[examples, 100]]から生成した画像をplot_dim(例えば4x4)で表示
def save_imgs(images, plot_dim=(5,12), size=(12,5), epoch=None):
    examples = plot_dim[0]*plot_dim[1]

    # 表示
    fig = plt.figure(figsize=size)
    for i in range(examples):
        plt.subplot(plot_dim[0], plot_dim[1], i+1)
        img = images[i, :]
        img = img.reshape((96, 96, 3))
        plt.tight_layout()
        plt.imshow(img)
        plt.axis("off")
    plt.subplots_adjust(wspace=0.1, hspace=0.1)
    plt.savefig(os.path.join("generated_figures", str(epoch) + ".png"))
    plt.close()


# モデル

In [4]:
def conv2d(x, filter_shape, stride, padding, name):
    initializer = tf.random_normal_initializer(0., 0.001)
    with tf.variable_scope(name):
        weight = tf.get_variable("weight", shape=filter_shape, initializer=initializer)
        bias = tf.get_variable("bias", shape=filter_shape[-1], initializer=tf.constant_initializer(0.))

        x = tf.nn.conv2d(x, weight, [1, stride, stride, 1], padding=padding)
        x = tf.nn.bias_add(x,bias)
    return x

def fc(x, output_shape, name):
    shape = x.get_shape().as_list()
    dim = np.prod(shape[1:])
    x = tf.reshape(x, [-1, dim])
    input_shape = dim

    initializer = tf.random_normal_initializer(0., 0.001)
    with tf.variable_scope(name):
        weight = tf.get_variable("weight", shape=[input_shape, output_shape], initializer=initializer)
        x = tf.matmul(x, weight)

        bias = tf.get_variable("bias", shape=[output_shape], initializer=tf.constant_initializer(0.))
        x = tf.nn.bias_add(x, bias)

    return x

def l1_loss(x, y):
    return tf.reduce_mean(tf.abs(x - y))

In [5]:
class Generator:
    def __init__(self):
        self.reuse = False
        self.n = 64
        self.size = 96

    def __call__(self, x, reuse=False):
        with tf.variable_scope("gen_", reuse=reuse):
            x = fc(x, 6*6*self.n, name="fc")
            x = tf.reshape(x, [-1, 6, 6, self.n])
            
            x = conv2d(x, [3,3,self.n,self.n], stride=1, padding="SAME", name="conv1_a")
            x = tf.nn.elu(x)
            x = conv2d(x, [3,3,self.n,self.n], stride=1, padding="SAME", name="conv1_b")
            x = tf.nn.elu(x)
            
            x = tf.image.resize_nearest_neighbor(x, size=(self.size//8, self.size//8))
            x = conv2d(x, [3,3,self.n,self.n], stride=1, padding="SAME", name="conv2_a")
            x = tf.nn.elu(x)
            x = conv2d(x, [3,3,self.n,self.n], stride=1, padding="SAME", name="conv2_b")
            x = tf.nn.elu(x)

            x = tf.image.resize_nearest_neighbor(x, size=(self.size//4, self.size//4))
            x = conv2d(x, [3,3,self.n,self.n], stride=1, padding="SAME", name="conv3_a")
            x = tf.nn.elu(x)
            x = conv2d(x, [3,3,self.n,self.n], stride=1, padding="SAME", name="conv3_b")
            x = tf.nn.elu(x)

            x = tf.image.resize_nearest_neighbor(x, size=(self.size//2, self.size//2))
            x = conv2d(x, [3,3,self.n,self.n], stride=1, padding="SAME", name="conv4_a")
            x = tf.nn.elu(x)
            x = conv2d(x, [3,3,self.n,self.n], stride=1, padding="SAME", name="conv4_b")
            x = tf.nn.elu(x)
            
            x = tf.image.resize_nearest_neighbor(x, size=(self.size, self.size))
            x = conv2d(x, [3,3,self.n,self.n], stride=1, padding="SAME", name="conv5_a")
            x = tf.nn.elu(x)
            x = conv2d(x, [3,3,self.n,self.n], stride=1, padding="SAME", name="conv5_b")
            x = tf.nn.elu(x)

            x = conv2d(x, [3,3,self.n,3], stride=1, padding="SAME", name="conv6_a")
        return x


In [6]:
class AutoEncoder:
    def __init__(self):
        self.reuse = False
        self.n = 64
        self.size = 96

    def __call__(self, x, reuse=False):
        with tf.variable_scope("ae_", reuse=reuse):
            with tf.variable_scope("encoder"):
                x = conv2d(x, [3,3,3,self.n], stride=1, padding="SAME", name="conv1_enc_a")
                x = tf.nn.elu(x)

                x = conv2d(x, [3,3,self.n,self.n], stride=1, padding="SAME", name="conv2_enc_a")
                x = tf.nn.elu(x)
                x = conv2d(x, [3,3,self.n,self.n], stride=1, padding="SAME", name="conv2_enc_b")
                x = tf.nn.elu(x)

                x = conv2d(x, [1,1,self.n,self.n*2], stride=1, padding="SAME", name="conv3_enc_0")
                x = tf.nn.avg_pool(x, ksize=[1,2,2,1], strides=[1,2,2,1], padding="SAME")
                x = conv2d(x, [3,3,self.n*2,self.n*2], stride=1, padding="SAME", name="conv3_enc_a")
                x = tf.nn.elu(x)
                x = conv2d(x, [3,3,self.n*2,self.n*2], stride=1, padding="SAME", name="conv3_enc_b")
                x = tf.nn.elu(x)

                x = conv2d(x, [1,1,self.n*2,self.n*3], stride=1, padding="SAME", name="conv4_enc_0")
                x = tf.nn.avg_pool(x, ksize=[1,2,2,1], strides=[1,2,2,1], padding="SAME")
                x = conv2d(x, [3,3,self.n*3,self.n*3], stride=1, padding="SAME", name="conv4_enc_a")
                x = tf.nn.elu(x)
                x = conv2d(x, [3,3,self.n*3,self.n*3], stride=1, padding="SAME", name="conv4_enc_b")
                x = tf.nn.elu(x)

                x = conv2d(x, [1,1,self.n*3,self.n*4], stride=1, padding="SAME", name="conv5_enc_0")
                x = tf.nn.avg_pool(x, ksize=[1,2,2,1], strides=[1,2,2,1], padding="SAME")
                x = conv2d(x, [3,3,self.n*4,self.n*4], stride=1, padding="SAME", name="conv5_enc_a")
                x = tf.nn.elu(x)
                x = conv2d(x, [3,3,self.n*4,self.n*4], stride=1, padding="SAME", name="conv5_enc_b")
                x = tf.nn.elu(x)

                x = conv2d(x, [1,1,self.n*4,self.n*5], stride=1, padding="SAME", name="conv6_enc_0")
                x = tf.nn.avg_pool(x, ksize=[1,2,2,1], strides=[1,2,2,1], padding="SAME")
                x = conv2d(x, [3,3,self.n*5,self.n*5], stride=1, padding="SAME", name="conv6_enc_a")
                x = tf.nn.elu(x)
                x = conv2d(x, [3,3,self.n*5,self.n*5], stride=1, padding="SAME", name="conv6_enc_b")
                x = tf.nn.elu(x)

                x = fc(x, 64, name="enc_fc") ## TODO:修正 ## 

            with tf.variable_scope("decoder"):
                x = fc(x, 6*6*self.n, name="fc")
                x = tf.reshape(x, [-1, 6, 6, self.n])

                x = conv2d(x, [3,3,self.n,self.n], stride=1, padding="SAME", name="conv1_a")
                x = tf.nn.elu(x)
                x = conv2d(x, [3,3,self.n,self.n], stride=1, padding="SAME", name="conv1_b")
                x = tf.nn.elu(x)

                x = tf.image.resize_nearest_neighbor(x, size=(self.size//8, self.size//8))
                x = conv2d(x, [3,3,self.n,self.n], stride=1, padding="SAME", name="conv2_a")
                x = tf.nn.elu(x)
                x = conv2d(x, [3,3,self.n,self.n], stride=1, padding="SAME", name="conv2_b")
                x = tf.nn.elu(x)

                x = tf.image.resize_nearest_neighbor(x, size=(self.size//4, self.size//4))
                x = conv2d(x, [3,3,self.n,self.n], stride=1, padding="SAME", name="conv3_a")
                x = tf.nn.elu(x)
                x = conv2d(x, [3,3,self.n,self.n], stride=1, padding="SAME", name="conv3_b")
                x = tf.nn.elu(x)

                x = tf.image.resize_nearest_neighbor(x, size=(self.size//2, self.size//2))
                x = conv2d(x, [3,3,self.n,self.n], stride=1, padding="SAME", name="conv4_a")
                x = tf.nn.elu(x)
                x = conv2d(x, [3,3,self.n,self.n], stride=1, padding="SAME", name="conv4_b")
                x = tf.nn.elu(x)

                x = tf.image.resize_nearest_neighbor(x, size=(self.size, self.size))
                x = conv2d(x, [3,3,self.n,self.n], stride=1, padding="SAME", name="conv5_a")
                x = tf.nn.elu(x)
                x = conv2d(x, [3,3,self.n,self.n], stride=1, padding="SAME", name="conv5_b")
                x = tf.nn.elu(x)

                x = conv2d(x, [3,3,self.n,3], stride=1, padding="SAME", name="conv6_a")
        return x



In [7]:
import numpy as np

X_train = np.load("irasutoya_face_1813x96x96x3_jpg.npy")
X_train = X_train/255

z_dim = 64
batch_size = 16
epochs = 400000
display_epoch = 1000
param_save_epoch = 10000
loss = {"dis_loss":[],
        "gen_loss":[],
        "dis_train_loss":[],
        "dis_gen_loss":[],
        "m_global_loss":[]}

gamma = 0.5 ## TODO ##
lambda_ = 0.001
lr_ = 1e-4 ## TODO ##
kt_ = 0.
mm = 0.5

nsnapshot = 2440
niter = 50

In [None]:
z = tf.placeholder(tf.float32, shape=[None, z_dim], name="z")
x = tf.placeholder(tf.float32, shape=[None, 96, 96, 3], name="x")
kt = tf.placeholder(tf.float32, name="kt")
lr = tf.placeholder(tf.float32, name="lr")

Gen = Generator()
AE = AutoEncoder()

AE_traindata = AE(x)
generateddata = Gen(z)
AE_generateddata = AE(generateddata, reuse=True)

d_traindata_loss = l1_loss(x, AE_traindata)
d_generateddata_loss = l1_loss(generateddata, AE_generateddata)
d_loss = d_traindata_loss - kt * d_generateddata_loss
g_loss = d_generateddata_loss

t_vars = tf.global_variables()
g_vars = [var for var in t_vars if "gen_" in var.name]
d_vars = [var for var in t_vars if "ae_" in var.name]

opt_g = tf.train.AdamOptimizer(lr, mm).minimize(g_loss, var_list=g_vars)
opt_d = tf.train.AdamOptimizer(lr, mm).minimize(d_loss, var_list=d_vars)

In [None]:
noise_check = np.random.uniform(-1, 1, size=[60, z_dim]).astype(np.float32)
sample_imgs = Gen(z, reuse=True)


"""
p_noise = tf.placeholder(tf.float32, [None, z_dim])


dcgan = DCGAN()
p_train_images = tf.placeholder(tf.float32, [batch_size, 96, 96, 3])
losses = dcgan.loss(traindata=p_train_images)

d_train_op = dcgan.d_train(losses)
g_train_op = dcgan.g_train(losses)

clip_D = [p.assign(tf.clip_by_value(p, -0.01, 0.01)) for p in dcgan.d.variables]
"""

saver = tf.train.Saver()

with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    
    for epoch in range(epochs):
        # ノイズから生成データを生成
        noise = np.random.uniform(-1, 1, size=[batch_size, z_dim])
        # 訓練データを抜粋
        rand_index = np.random.randint(0, X_train.shape[0], size=batch_size)
        exImgs = X_train[rand_index, :].astype(np.float32)

        _, loss_g, d_train_loss, d_gen_loss = sess.run([opt_g, g_loss, d_traindata_loss, d_generateddata_loss,], feed_dict={z: noise, x: exImgs, kt:kt_, lr:lr_})
        _, loss_d = sess.run([opt_d, d_loss], feed_dict={z: noise, x:exImgs, kt:kt_, lr:lr_})

        kt_ = np.maximum(np.minimum(1., kt_ + lambda_ * (gamma * d_train_loss - d_gen_loss)), 0)
        m_global = d_train_loss + np.abs(gamma*d_train_loss - d_gen_loss)

        if epoch % display_epoch == 0:
            imgs_gen = sess.run(sample_imgs, feed_dict={z:noise_check})
            print("saving images")
            save_imgs(imgs_gen, epoch=epoch)

        # 結果をappend
        loss["dis_loss"].append(loss_d)
        loss["gen_loss"].append(loss_g)
        loss["dis_train_loss"].append(d_train_loss)
        loss["dis_gen_loss"].append(d_gen_loss)
        loss["m_global_loss"].append(m_global)

        if epoch % nsnapshot == nsnapshot-1:
            lr_ = lr_*0.95

        # グラフの描画（余裕があったら）
        if epoch % display_epoch == 0:
            save_metrics(loss, epoch)
            print("epoch:" + str(epoch))
        if epoch % param_save_epoch == 0:
            saver.save(sess, "./model/dcgan_model" + str(epoch) + ".ckpt")

saving images
epoch:0
saving images
epoch:1000
saving images
epoch:2000
saving images
epoch:3000
saving images
epoch:4000
saving images
epoch:5000
saving images
epoch:6000
saving images
epoch:7000
