In [27]:
import warnings
warnings.filterwarnings("ignore")

from ops import conv2d, deconv2d, lrelu, fc, batch_norm, init_embedding, embedding_lookup
from dataset import TrainDataProvider, InjectDataProvider, NeverEndingLoopingProvider
from utils import scale_back, merge, save_concat_images
import tensorflow as tf
import torch
import torchvision
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.autograd import Variable
from torch.utils.data import DataLoader, TensorDataset

### Load Data

In [23]:
data_dir = './data/'
batch_size = 32

# 23가지 폰트만 가져오기
data_provider = TrainDataProvider(data_dir, filter_by=range(23))
total_batches = data_provider.compute_total_batch_num(batch_size)
print(total_batches)

train_batch_iter = data_provider.get_train_iter(batch_size)

unpickled total 86423 examples
unpickled total 21677 examples
filter by label -> range(0, 23)
train examples -> 43193, val examples -> 10857
1350


In [24]:
count = 0
for bid, batch in enumerate(train_batch_iter):
    labels, codes, batch_images = batch
    count += 1
    break
# print(count)
print(len(labels), batch_images.shape)

32 torch.Size([32, 2, 64, 64])


---
## Tensorflow code test

In [4]:
def batch_norm(x, is_training=True, epsilon=1e-5, decay=0.9, scope="batch_norm"):
    return tf.contrib.layers.batch_norm(x, decay=decay, updates_collections=None, epsilon=epsilon,
                                        scale=True, is_training=is_training, scope=scope)


def conv2d(x, output_filters, kh=5, kw=5, sh=2, sw=2, stddev=0.02, scope="conv2d"):
    with tf.variable_scope(scope):
        shape = x.shape#.get_shape().as_list()
        W = tf.get_variable('W', [kh, kw, shape[-1], output_filters],
                            initializer=tf.truncated_normal_initializer(stddev=stddev))
        Wconv = tf.nn.conv2d(x, W, strides=[1, sh, sw, 1], padding='SAME')

        biases = tf.get_variable('b', [output_filters], initializer=tf.constant_initializer(0.0))
        Wconv_plus_b = tf.reshape(tf.nn.bias_add(Wconv, biases), Wconv.get_shape())

        return Wconv_plus_b


def deconv2d(x, output_shape, kh=5, kw=5, sh=2, sw=2, stddev=0.02, scope="deconv2d"):
    with tf.variable_scope(scope):
        # filter : [height, width, output_channels, in_channels]
        input_shape = x.get_shape().as_list()
        W = tf.get_variable('W', [kh, kw, output_shape[-1], input_shape[-1]],
                            initializer=tf.random_normal_initializer(stddev=stddev))

        deconv = tf.nn.conv2d_transpose(x, W, output_shape=output_shape,
                                        strides=[1, sh, sw, 1])

        biases = tf.get_variable('b', [output_shape[-1]], initializer=tf.constant_initializer(0.0))
        deconv_plus_b = tf.reshape(tf.nn.bias_add(deconv, biases), deconv.get_shape())

        return deconv_plus_b


def lrelu(x, leak=0.2):
    return tf.maximum(x, leak * x)


def fc(x, output_size, stddev=0.02, scope="fc"):
    with tf.variable_scope(scope):
        shape = x.get_shape().as_list()
        W = tf.get_variable("W", [shape[1], output_size], tf.float32,
                            tf.random_normal_initializer(stddev=stddev))
        b = tf.get_variable("b", [output_size],
                            initializer=tf.constant_initializer(0.0))
        return tf.matmul(x, W) + b
    
def tf_init_embedding(size, dimension, stddev=0.01, scope="embedding"):
    with tf.variable_scope(scope):
        return tf.get_variable("E", [size, 1, 1, dimension], tf.float32,
                               tf.random_normal_initializer(stddev=stddev))


def encoder(images, is_training=True, reuse=False):
    with tf.variable_scope("generator"):
        if reuse:
            tf.get_variable_scope().reuse_variables()

        encode_layers = dict()

        def encode_layer(x, output_filters, layer):
            act = lrelu(x)
            conv = conv2d(act, output_filters=output_filters, scope="g_e%d_conv" % layer)
            enc = batch_norm(conv, is_training, scope="g_e%d_bn" % layer)
            encode_layers["e%d" % layer] = enc
            return enc

        e1 = conv2d(images, generator_dim, scope="g_e1_conv")
        encode_layers["e1"] = e1
        e2 = encode_layer(e1, generator_dim * 2, 2)
        e3 = encode_layer(e2, generator_dim * 4, 3)
        e4 = encode_layer(e3, generator_dim * 8, 4)
        e5 = encode_layer(e4, generator_dim * 8, 5)
        e6 = encode_layer(e5, generator_dim * 8, 6)
        e7 = encode_layer(e6, generator_dim * 8, 7)
        e8 = encode_layer(e7, generator_dim * 8, 8)

        return e8, encode_layers

def decoder(encoded, encoding_layers, ids, inst_norm=False, is_training=True, reuse=False):
    with tf.variable_scope("generator"):
        if reuse:
            tf.get_variable_scope().reuse_variables()
        
        decode_layers = dict()
        output_width = 64
        output_filters = 2
        s = output_width
        s2, s4, s8, s16, s32, s64 = int(s / 2), int(s / 4), int(s / 8), int(s / 16), int(s / 32), int(
            s / 64)

        def decode_layer(x, output_width, output_filters, layer, enc_layer, dropout=False, do_concat=True):
            
            dec = deconv2d(tf.nn.relu(x), [batch_size, output_width,
                                           output_width, output_filters], scope="g_d%d_deconv" % layer)
            if layer != 8:
                # IMPORTANT: normalization for last layer
                # Very important, otherwise GAN is unstable
                # Trying conditional instance normalization to
                # overcome the fact that batch normalization offers
                # different train/test statistics
                if inst_norm:
                    dec = conditional_instance_norm(dec, ids, embedding_num, scope="g_d%d_inst_norm" % layer)
                else:
                    dec = batch_norm(dec, is_training, scope="g_d%d_bn" % layer)
            if dropout:
                dec = tf.nn.dropout(dec, 0.5)
            if do_concat:
                dec = tf.concat([dec, enc_layer], 3)
            decode_layers["d%d" % layer] = dec
            return dec

        d1 = decode_layer(encoded, s64, generator_dim * 8, layer=1, enc_layer=encoding_layers["e7"], dropout=True)
        print(d1.shape)
        d2 = decode_layer(d1, s64, generator_dim * 8, layer=2, enc_layer=encoding_layers["e6"], dropout=True)
        print(d2.shape)
        d3 = decode_layer(d2, s32, generator_dim * 8, layer=3, enc_layer=encoding_layers["e5"], dropout=True)
        print(d3.shape)
        d4 = decode_layer(d3, s16, generator_dim * 8, layer=4, enc_layer=encoding_layers["e4"])
        print(d4.shape)
        d5 = decode_layer(d4, s8, generator_dim * 4, layer=5, enc_layer=encoding_layers["e3"])
        print(d5.shape)
        d6 = decode_layer(d5, s4, generator_dim * 2, layer=6, enc_layer=encoding_layers["e2"])
        print(d6.shape)
        d7 = decode_layer(d6, s2, generator_dim, layer=7, enc_layer=encoding_layers["e1"])
        print(d7.shape)
        d8 = decode_layer(d7, s, output_filters, layer=8, enc_layer=None, do_concat=False)
        print(d8.shape)

        output = tf.nn.tanh(d8)  # scale to (-1, 1)
        return output, decode_layers

def generator(images, embeddings, embedding_ids, inst_norm, is_training, reuse=False):
    e8, enc_layers = encoder(images, is_training=is_training, reuse=reuse)
    local_embeddings = tf.nn.embedding_lookup(embeddings, ids=embedding_ids)
    local_embeddings = tf.reshape(local_embeddings, [batch_size, 1, 1, embedding_dim])
    embedded = tf.concat([e8, local_embeddings], 3)
    output, _ = decoder(embedded, enc_layers, embedding_ids, inst_norm, is_training=is_training, reuse=reuse)
    return output, e8

def discriminator(image):
    with tf.variable_scope("discriminator"):
        h0 = lrelu(conv2d(image, discriminator_dim, scope="d_h0_conv"))
        print(h0.shape)
        h1 = lrelu(batch_norm(conv2d(h0, discriminator_dim * 2, scope="d_h1_conv"),
                              is_training=True, scope="d_bn_1"))
        print(h1.shape)
        h2 = lrelu(batch_norm(conv2d(h1, discriminator_dim * 4, scope="d_h2_conv"),
                              is_training=True, scope="d_bn_2"))
        print(h2.shape)
        h3 = lrelu(batch_norm(conv2d(h2, discriminator_dim * 8, sh=1, sw=1, scope="d_h3_conv"),
                              is_training=True, scope="d_bn_3"))
        print(h3.shape)
        # real or fake binary loss
        fc1 = fc(tf.reshape(h3, [batch_size, -1]), 1, scope="d_fc1")
        print(fc1.shape)
        # category loss
        fc2 = fc(tf.reshape(h3, [batch_size, -1]), embedding_num, scope="d_fc2")
        print(fc2.shape)

        return tf.nn.sigmoid(fc1), fc1, fc2

In [5]:
generator_dim = 64
tf_images = batch_images.reshape((32, 64, 64, 2))
e8, tf_encode_layers = encoder(tf_images)
# batch_images.shape

Instructions for updating:
Colocations handled automatically by placer.

For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
If you depend on functionality not listed there, please file an issue.



In [6]:
e8

<tf.Tensor 'generator/g_e8_bn/Identity:0' shape=(32, 1, 1, 512) dtype=float32>

In [7]:
tf_encode_layers

{'e1': <tf.Tensor 'generator/g_e1_conv/Reshape:0' shape=(32, 32, 32, 64) dtype=float32>,
 'e2': <tf.Tensor 'generator/g_e2_bn/Identity:0' shape=(32, 16, 16, 128) dtype=float32>,
 'e3': <tf.Tensor 'generator/g_e3_bn/Identity:0' shape=(32, 8, 8, 256) dtype=float32>,
 'e4': <tf.Tensor 'generator/g_e4_bn/Identity:0' shape=(32, 4, 4, 512) dtype=float32>,
 'e5': <tf.Tensor 'generator/g_e5_bn/Identity:0' shape=(32, 2, 2, 512) dtype=float32>,
 'e6': <tf.Tensor 'generator/g_e6_bn/Identity:0' shape=(32, 1, 1, 512) dtype=float32>,
 'e7': <tf.Tensor 'generator/g_e7_bn/Identity:0' shape=(32, 1, 1, 512) dtype=float32>,
 'e8': <tf.Tensor 'generator/g_e8_bn/Identity:0' shape=(32, 1, 1, 512) dtype=float32>}

In [8]:
batch_images.shape

torch.Size([32, 2, 64, 64])

In [9]:
embedding_num = 23
embedding_dim = 128
embedding_ids = labels
embeddings = tf_init_embedding(embedding_num, embedding_dim)
embeddings.shape

TensorShape([Dimension(23), Dimension(1), Dimension(1), Dimension(128)])

In [10]:
type(embeddings), type(embedding_ids)

(tensorflow.python.ops.variables.RefVariable, list)

In [11]:
local_embeddings = tf.nn.embedding_lookup(embeddings, ids=embedding_ids)
print(local_embeddings.shape)
local_embeddings = tf.reshape(local_embeddings, [batch_size, 1, 1, 128])

(32, 1, 1, 128)


In [12]:
local_embeddings

<tf.Tensor 'Reshape:0' shape=(32, 1, 1, 128) dtype=float32>

In [13]:
encoded = tf.concat([e8, local_embeddings], 3)
encoded

<tf.Tensor 'concat:0' shape=(32, 1, 1, 640) dtype=float32>

In [14]:
output, decode_layer = decoder(encoded, tf_encode_layers, ids=0, inst_norm=False, is_training=True, reuse=False)

Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
(32, 1, 1, 1024)
(32, 1, 1, 1024)
(32, 2, 2, 1024)
(32, 4, 4, 1024)
(32, 8, 8, 512)
(32, 16, 16, 256)
(32, 32, 32, 128)
(32, 64, 64, 2)


In [15]:
discriminator_dim = 64
embedding_num
tf_loss, tf_loss_logit, category_loss = discriminator(tf_images)

(32, 32, 32, 64)
(32, 16, 16, 128)
(32, 8, 8, 256)
(32, 8, 8, 512)
(32, 1)
(32, 23)


---

### Functions

In [16]:
def batch_norm(c_out, momentum=0.1):
    return nn.BatchNorm2d(c_out, momentum=momentum)


def conv2d(c_in, c_out, k_size=5, stride=2, pad=2, dilation=2, bn=True, lrelu=True, leak=0.2):
    layers = []
    layers.append(nn.Conv2d(c_in, c_out, k_size, stride, pad))
    if bn:
        layers.append(nn.BatchNorm2d(c_out))
    if lrelu:
        layers.append(nn.LeakyReLU(leak))
    return nn.Sequential(*layers)


def deconv2d(c_in, c_out, k_size=3, stride=1, pad=1, dilation=1, bn=True, dropout=True, p=0.5):
    layers = []
    layers.append(nn.ConvTranspose2d(c_in, c_out, k_size, stride, pad))
    if bn:
        layers.append(nn.BatchNorm2d(c_out))
    if dropout:
        layers.append(nn.Dropout(p))
    layers.append(nn.LeakyReLU(0.1))
    return nn.Sequential(*layers)


def lrelu(leak=0.2):
    return nn.LeakyReLU(leak)


def dropout(p=0.2):
    return nn.Dropout(p)


def fc(input_size, output_size):
    return nn.Linear(input_size, output_size)

In [42]:
class Encoder(nn.Module):
    
    def __init__(self, img_dim=2, conv_dim=64):
        super(Encoder, self).__init__()
        self.conv1 = conv2d(img_dim, conv_dim)
        self.conv2 = conv2d(conv_dim, conv_dim*2)
        self.conv3 = conv2d(conv_dim*2, conv_dim*4)
        self.conv4 = conv2d(conv_dim*4, conv_dim*8)
        self.conv5 = conv2d(conv_dim*8, conv_dim*8)
        self.conv6 = conv2d(conv_dim*8, conv_dim*8)
        self.conv7 = conv2d(conv_dim*8, conv_dim*8)
        self.conv8 = conv2d(conv_dim*8, conv_dim*8)
    
    def forward(self, images):
        encode_layers = dict()
        
        e1 = self.conv1(images)
        encode_layers['e1'] = e1
        e2 = self.conv2(e1)
        encode_layers['e2'] = e2
        e3 = self.conv3(e2)
        encode_layers['e3'] = e3
        e4 = self.conv4(e3)
        encode_layers['e4'] = e4
        e5 = self.conv5(e4)
        encode_layers['e5'] = e5
        e6 = self.conv6(e5)
        encode_layers['e6'] = e6
        e7 = self.conv7(e6)
        encode_layers['e7'] = e7
        encoded_images = self.conv8(e7)
        encode_layers['e8'] = encoded_images
        
        return encoded_images, encode_layers

In [43]:
FONTS_NUM = 30
EMBEDDING_DIM = 128
BATCH_SIZE = 32
embedding_num = BATCH_SIZE

In [44]:
EMBEDDINGS = init_embedding(FONTS_NUM, EMBEDDING_DIM)
embedding_ids = labels
local_embeddings = embedding_lookup(EMBEDDINGS, embedding_ids)
local_embeddings.shape

torch.Size([32, 128, 1, 1])

In [45]:
img_dim = 2
En = Encoder()
En

Encoder(
  (conv1): Sequential(
    (0): Conv2d(2, 64, kernel_size=(5, 5), stride=(2, 2), padding=(2, 2))
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): LeakyReLU(negative_slope=0.2)
  )
  (conv2): Sequential(
    (0): Conv2d(64, 128, kernel_size=(5, 5), stride=(2, 2), padding=(2, 2))
    (1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): LeakyReLU(negative_slope=0.2)
  )
  (conv3): Sequential(
    (0): Conv2d(128, 256, kernel_size=(5, 5), stride=(2, 2), padding=(2, 2))
    (1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): LeakyReLU(negative_slope=0.2)
  )
  (conv4): Sequential(
    (0): Conv2d(256, 512, kernel_size=(5, 5), stride=(2, 2), padding=(2, 2))
    (1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): LeakyReLU(negative_slope=0.2)
  )
  (conv5): Sequential(
    (0): Conv2d(512, 512, kernel_size=(5,

In [53]:
encoded_images, fake_images = En(batch_images)

In [47]:
encoded_images.shape, fake_images.shape

(torch.Size([32, 512, 1, 1]), torch.Size([32, 2, 64, 64]))

In [48]:
def init_embedding(embedding_num, embedding_dim, stddev=0.01):
    embedding = torch.randn(embedding_num, embedding_dim) * stddev
    embedding = embedding.reshape((embedding_num, 1, 1, embedding_dim))
    return embedding

In [49]:
embedding_num = 23
embedding_dim = 128
embeddings = init_embedding(embedding_num, embedding_dim)
embeddings.shape

torch.Size([23, 1, 1, 128])

In [50]:
embedding_ids = labels
len(embedding_ids)

32

In [51]:
def embedding_lookup(embeddings, embedding_ids):
    local_embeddings = []
    for id_ in embedding_ids:
        local_embeddings.append(embeddings[id_].data.numpy())
    local_embeddings = torch.from_numpy(np.array(local_embeddings))
    local_embeddings = local_embeddings.reshape(batch_size, embedding_dim, 1, 1)
    return local_embeddings

In [52]:
local_embeddings = embedding_lookup(embeddings, embedding_ids)
local_embeddings.shape

torch.Size([32, 128, 1, 1])

In [None]:
e8.shape, local_embeddings.shape

In [None]:
embedded = torch.cat((e8, local_embeddings), 1)
embedded.shape

In [None]:
class Decoder(nn.Module):
    
    def __init__(self, embedded_dim=640, conv_dim=64):
        super(Decoder, self).__init__()
        self.deconv1 = deconv2d(embedded_dim, conv_dim*8)
        self.deconv2 = deconv2d(conv_dim*16, conv_dim*8)
        self.deconv3 = deconv2d(conv_dim*16, conv_dim*8, k_size=4, dilation=2)
        self.deconv4 = deconv2d(conv_dim*16, conv_dim*8, k_size=5, dilation=2)
        self.deconv5 = deconv2d(conv_dim*16, conv_dim*4, k_size=4, dilation=2, stride=2)
        self.deconv6 = deconv2d(conv_dim*8, conv_dim*2, k_size=4, dilation=2, stride=2)
        self.deconv7 = deconv2d(conv_dim*4, conv_dim*1, k_size=4, dilation=2, stride=2)
        self.deconv8 = deconv2d(conv_dim*2, image_dim, k_size=4, dilation=2, stride=2, bn=False)
    
    
    def forward(self, embedded, encode_layers):
        decode_layers = dict()
        
        d1 = self.deconv1(embedded)
        d1 = torch.cat((d1, encode_layers['e7']), dim=1)
        d2 = self.deconv2(d1)
        d2 = torch.cat((d2, encode_layers['e6']), dim=1)
        d3 = self.deconv3(d2)
        d3 = torch.cat((d3, encode_layers['e5']), dim=1)
        d4 = self.deconv4(d3)
        d4 = torch.cat((d4, encode_layers['e4']), dim=1)
        d5 = self.deconv5(d4)
        d5 = torch.cat((d5, encode_layers['e3']), dim=1)
        d6 = self.deconv6(d5)
        d6 = torch.cat((d6, encode_layers['e2']), dim=1)
        d7 = self.deconv7(d6)
        d7 = torch.cat((d7, encode_layers['e1']), dim=1)
        d8 = self.deconv8(d7)        
        fake_images = torch.tanh(d8)
        
        decode_layers['d1'] = d1
        decode_layers['d2'] = d2
        decode_layers['d3'] = d3
        decode_layers['d4'] = d4
        decode_layers['d5'] = d5
        decode_layers['d6'] = d6
        decode_layers['d7'] = d7
        decode_layers['d8'] = d8
        
        return fake_images, decode_layers

In [None]:
De = Decoder()
De

In [None]:
d8, decode_layers = De(embedded, encode_layers)

In [None]:
print(embedded.shape)
for key, value in decode_layers.items():
    print(key, value.shape)

In [None]:
def generator(images, embeddings, embedding_ids):
    encoded_images, encode_layers = En(images)
    local_embeddings = embedding_lookup(embeddings, embedding_ids)
    embedded = torch.cat((e8, local_embeddings), 1)
    fake_images, decode_layers = De(embedded, encode_layers)
    return fake_images, encoded_images

In [None]:
fake_image, encoded_image = generator(images, embeddings, embedding_ids)

In [None]:
fake_image.shape

In [None]:
plt.figure(figsize=(1, 1))
plt.imshow(fake_image[0][0].data, cmap='gray')
plt.grid()
plt.show()

In [None]:
encoded_image.shape

In [None]:
fake_image.data.min()

In [None]:
fake_image[0][0]

In [None]:
e8.data.min()

In [None]:
class Discriminator(nn.Module):
    def __init__(self, embedding_num, img_dim=2, disc_dim=64):
        super(Discriminator, self).__init__()
        self.conv1 = conv2d(img_dim, disc_dim, bn=False)
        self.conv2 = conv2d(disc_dim, disc_dim*2)
        self.conv3 = conv2d(disc_dim*2, disc_dim*4)
        self.conv4 = conv2d(disc_dim*4, disc_dim*8, stride=1)
        self.fc1 = fc(disc_dim*8*8*8, 1)
        self.fc2 = fc(disc_dim*8*8*8, embedding_num)
        
    def forward(self, images):
        h1 = self.conv1(images)
        h2 = self.conv2(h1)
        h3 = self.conv3(h2)
        h4 = self.conv4(h3)
        
        tf_loss_logit = self.fc1(h4.reshape(batch_size, -1))
        tf_loss = torch.sigmoid(tf_loss_logit)
        cat_loss = self.fc2(h4.reshape(batch_size, -1))
        
        return tf_loss, tf_loss_logit, cat_loss

In [None]:
D = Discriminator(embedding_num=23)
D

In [None]:
tf_loss, tf_loss_logit, cat_loss = D(images)

In [None]:
tf_loss.shape, tf_loss_logit.shape, cat_loss.shape