# M2177.003100 Deep Learning <br> Final Proejct: Text to Image Synthesis (Tensorflow)

Copyright (C) Data Science & AI Laboratory, Seoul National University. This material is for educational uses only. Some contents are based on the material provided by other paper/book authors and may be copyrighted by them. 

**For understanding of this work, please carefully look at given PPT file.**

**Note**: certain details are missing or ambiguous on purpose, in order to test your knowledge on the related materials. However, if you really feel that something essential is missing and cannot proceed to the next step, then contact the teaching staff with clear description of your problem.

### Submitting your work:
<font color=red>**DO NOT clear the training process **</font> so that TAs can grade both your code and results.  
**The TA will set a config file as 'eval_birds.yml' when evaluating the code using 'hidden test dataset'. Thus, please make sure that your code can generate proper data to measure inception score and R-precision of 'hidden test dataset'.**

## 1. Load datasets
The Birds dataset will be downloaded automatically if it is not located in the *data* directory. <br>

In [1]:
import tensorflow as tf
import matplotlib.pyplot as plt
import os, nltk
from miscc.config import cfg, cfg_from_file
import pprint
import datetime
import dateutil.tz
import numpy as np
import scipy
from utils.data_utils import CUBDataset
from utils.loss import cosine_similarity
import pandas as pd
from scipy.io import loadmat
import re
import string
import random
import time

#################################################
# DO NOT CHANGE 
from utils.model import CNN_ENCODER, RNN_ENCODER, GENERATOR, DISCRIMINATOR
#################################################

%matplotlib inline

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


In [2]:
# Set a config file as 'train_birds.yml' in training, as 'eval_birds.yml' for evaluation
cfg_from_file('cfg/train_birds.yml') # eval_birds.yml

print('Using config:')
pprint.pprint(cfg)

os.environ['CUDA_VISIBLE_DEVICES'] = cfg.GPU_ID

now = datetime.datetime.now(dateutil.tz.tzlocal())
timestamp = now.strftime('%Y_%m_%d_%H_%M_%S')
output_dir = 'sample/%s_%s_%s' % (cfg.DATASET_NAME, cfg.CONFIG_NAME, timestamp)

Using config:
{'BATCH_SIZE': 64,
 'CHECKPOINT_DIR': './checkpoint',
 'CHECKPOINT_NAME': 'model.ckpt',
 'CNN': {'EMBEDDING_DIM': 0, 'H_DIM': 0},
 'CONFIG_NAME': 'text-to-image',
 'CUDA': False,
 'DATASET_NAME': 'birds',
 'DATA_DIR': 'data/birds',
 'EMBEDDING_TYPE': 'cnn-rnn',
 'GAN': {'B_ATTENTION': False,
         'B_CONDITION': False,
         'B_DCGAN': False,
         'CONDITION_DIM': 0,
         'DF_DIM': 0,
         'EMBEDDING_DIM': 0,
         'GF_DIM': 0,
         'R_NUM': 0,
         'Z_DIM': 512},
 'GPU_ID': '0',
 'IMAGE_SIZE': 256,
 'NUM_BATCH_FOR_TEST': 0,
 'RANDOM_SEED': 0,
 'RNN': {'EMBEDDING_DIM': 0,
         'H_DIM': 0,
         'TYPE': '',
         'VOCAB_SIZE': 0,
         'WORD_EMBEDDING_DIM': 0},
 'R_PRECISION_DIR': './evaluation',
 'R_PRECISION_FILE': 'r_precision.npz',
 'R_PRECISION_FILE_HIDDEN': 'r_precision_hidden.npz',
 'TEST': {'B_EXAMPLE': False,
          'GENERATED_HIDDEN_TEST_IMAGES': './evaluation/generated_images_hidden',
          'GENERATED_TEST_IMAGES'

  yaml_cfg = edict(yaml.load(f))


In [3]:
train_dataset = CUBDataset(cfg.DATA_DIR, split='train')
test_dataset = CUBDataset(cfg.DATA_DIR, split='test')

print(f'\ntrain data directory:\n{train_dataset.split_dir}')
print(f'test data directory:\n{test_dataset.split_dir}\n')

print(f'# of train filenames:{train_dataset.filenames.shape}')
print(f'# of test filenames:{test_dataset.filenames.shape}\n')

print(f'example of filename of train image:{train_dataset.filenames[0]}')
print(f'example of filename of valid image:{test_dataset.filenames[0]}\n')

print(f'example of caption and its ids:\n{train_dataset.captions[0]}\n{train_dataset.captions_ids[0]}\n')
print(f'example of caption and its ids:\n{test_dataset.captions[0]}\n{test_dataset.captions_ids[0]}\n')

print(f'# of train captions:{np.asarray(train_dataset.captions).shape}')
print(f'# of test captions:{np.asarray(test_dataset.captions).shape}\n')

print(f'# of train caption ids:{np.asarray(train_dataset.captions_ids).shape}')
print(f'# of test caption ids:{np.asarray(test_dataset.captions_ids).shape}\n')

print(f'# of train images:{train_dataset.images.shape}')
print(f'# of test images:{test_dataset.images.shape}\n')

self.current_dir:
/home/chszerg/final-project-deep-learning-19-tf

self.data_dir:
/home/chszerg/final-project-deep-learning-19-tf/data/birds

self.image_dir:
/home/chszerg/final-project-deep-learning-19-tf/data/birds/CUB_200_2011.tgz

Downloading: CUB-200-2011 (birds images) Bytes: 1150585339
unzipping /home/chszerg/final-project-deep-learning-19-tf/data/birds/CUB_200_2011.tgz
self.image_dir:
/home/chszerg/final-project-deep-learning-19-tf/data/birds/CUB_200_2011/images

Save to:  data/birds/captions.pickle
self.current_dir:
/home/chszerg/final-project-deep-learning-19-tf

self.data_dir:
/home/chszerg/final-project-deep-learning-19-tf/data/birds

self.image_dir:
/home/chszerg/final-project-deep-learning-19-tf/data/birds/CUB_200_2011.tgz

Dataset already exists
self.image_dir:
/home/chszerg/final-project-deep-learning-19-tf/data/birds/CUB_200_2011/images

Load from:  data/birds/captions.pickle

train data directory:
/home/chszerg/final-project-deep-learning-19-tf/data/birds/train
test d

In [4]:
train_images = train_dataset.images
test_images = test_dataset.images
train_captions = np.asarray(train_dataset.captions_ids)
test_captions = np.asarray(test_dataset.captions_ids)
print(train_images.shape)
print(test_images.shape)
print(train_captions.shape)
print(test_captions.shape)

(8855, 256, 256, 3)
(2933, 256, 256, 3)
(88550, 20)
(29330, 20)


In [6]:
from skimage.transform import resize
train_images_64 = []
for train_image in train_images:
    train_images_64.append(resize(train_image, (64, 64, 3)))
train_images_64 = np.asarray(train_images_64)
print(train_images_64.shape)
assert train_images_64.shape[0] == train_images.shape[0]
test_images_64 = []
for test_image in test_images:
    test_images_64.append(resize(test_image, (64, 64, 3)))
test_images_64 = np.asarray(test_images_64)
print(test_images_64.shape)
assert test_images_64.shape[0] == test_images.shape[0]

(8855, 64, 64, 3)
(2933, 64, 64, 3)


In [7]:
train_images = train_images_64
test_images = test_images_64
n_captions_train = len(train_captions)
n_captions_per_image = 10
n_images_train = len(train_images)

In [8]:
import scipy.misc
import threading
import scipy.ndimage as ndi
from skimage import transform
from skimage import exposure
import skimage
from nltk.tokenize import RegexpTokenizer

def sent2ID(sample_sentence):
    caption = []
    cap = sample_sentence
    if len(cap) == 0:
        exit()
    cap = cap.replace("\ufffd\ufffd", " ")
    tokenizer = RegexpTokenizer(r'\w+')
    tokens = tokenizer.tokenize(cap.lower())
    # print('tokens', tokens)
    tokens_new = []
    for t in tokens:
        t = t.encode('ascii', 'ignore').decode('ascii')
        if len(t) > 0:
            tokens_new.append(t)
    caption.append(tokens_new)
    caption_new = []
    t = caption[0]
    rev = []
    for w in t:
        if w in train_dataset.wordtoix:
            rev.append(train_dataset.wordtoix[w])
    # rev.append(0)  # do not need '<end>' token
    x, x_len = train_dataset.get_caption(rev)
    caption_new.append(np.squeeze(x, axis=1))
    return caption_new

def ID2sent(sample_caption):
    sentence = []
    for ID in sample_caption:
        if ID != train_dataset.ixtoword['<PAD>']:
            sentence.append(train_dataset.ixtoword[ID])
    return sentence

def get_random_int(min=0, max=10, number=5):
    """Return a list of random integer by the given range and quantity.
    Examples
    ---------
    >>> r = get_random_int(min=0, max=10, number=5)
    ... [10, 2, 3, 3, 7]
    """
    return [random.randint(min,max) for p in range(0,number)]

## Save images
def merge(images, size):
    h, w = images.shape[1], images.shape[2]
    img = np.zeros((h * size[0], w * size[1], 3))
    for idx, image in enumerate(images):
        i = idx % size[1]
        j = idx // size[1]
        img[j*h:j*h+h, i*w:i*w+w, :] = image
    return img

def imsave(images, size, path):
    return scipy.misc.imsave(path, merge(images, size))

def save_images(images, size, image_path):
    return imsave(images, size, image_path)

# Data Augmentation reference: https://github.com/tensorlayer/tensorlayer/tree/master/tensorlayer
def threading_data(data=None, fn=None, **kwargs):
    def apply_fn(results, i, data, kwargs):
        results[i] = fn(data, **kwargs)
    ## start multi-threaded reading.
    results = [None] * len(data) ## preallocate result list
    threads = []
    for i in range(len(data)):
        t = threading.Thread(
                        name='threading_and_return',
                        target=apply_fn,
                        args=(results, i, data[i], kwargs)
                        )
        t.start()
        threads.append(t)
    for t in threads:
        t.join()
    return np.asarray(results)

def apply_transform(x, transform_matrix, channel_index=2, fill_mode='nearest', cval=0., order=1):
    x = np.rollaxis(x, channel_index, 0)
    final_affine_matrix = transform_matrix[:2, :2]
    final_offset = transform_matrix[:2, 2]
    channel_images = [ndi.interpolation.affine_transform(x_channel, final_affine_matrix,
                      final_offset, order=order, mode=fill_mode, cval=cval) for x_channel in x]
    x = np.stack(channel_images, axis=0)
    x = np.rollaxis(x, 0, channel_index + 1)
    return x

def transform_matrix_offset_center(matrix, x, y):
    o_x = float(x) / 2 + 0.5
    o_y = float(y) / 2 + 0.5
    offset_matrix = np.array([[1, 0, o_x], [0, 1, o_y], [0, 0, 1]])
    reset_matrix = np.array([[1, 0, -o_x], [0, 1, -o_y], [0, 0, 1]])
    transform_matrix = np.dot(np.dot(offset_matrix, matrix), reset_matrix)
    return transform_matrix

def rotation(x, rg=20, is_random=False, row_index=0, col_index=1, channel_index=2,
                    fill_mode='nearest', cval=0.):
    if is_random:
        theta = np.pi / 180 * np.random.uniform(-rg, rg)
    else:
        theta = np.pi / 180 * rg
    rotation_matrix = np.array([[np.cos(theta), -np.sin(theta), 0],
                                [np.sin(theta), np.cos(theta), 0],
                                [0, 0, 1]])
    h, w = x.shape[row_index], x.shape[col_index]
    transform_matrix = transform_matrix_offset_center(rotation_matrix, h, w)
    x = apply_transform(x, transform_matrix, channel_index, fill_mode, cval)
    return x

def crop(x, wrg, hrg, is_random=False, row_index=0, col_index=1, channel_index=2):
    h, w = x.shape[row_index], x.shape[col_index]
    assert (h > hrg) and (w > wrg), "The size of cropping should smaller than the original image"
    if is_random:
        h_offset = int(np.random.uniform(0, h-hrg) - 1)
        w_offset = int(np.random.uniform(0, w-wrg) - 1)
        return x[h_offset: hrg + h_offset ,w_offset: wrg + w_offset]
    else:   # central crop
        h_offset = int(np.floor((h - hrg)/ 2.))
        w_offset = int(np.floor((w - wrg)/ 2.))
        h_end = h_offset + hrg
        w_end = w_offset + wrg
        return x[h_offset: h_end, w_offset: w_end]

def flip_axis(x, axis, is_random=False):
    if is_random:
        factor = np.random.uniform(-1, 1)
        if factor > 0:
            x = np.asarray(x).swapaxes(axis, 0)
            x = x[::-1, ...]
            x = x.swapaxes(0, axis)
            return x
        else:
            return x
    else:
        x = np.asarray(x).swapaxes(axis, 0)
        x = x[::-1, ...]
        x = x.swapaxes(0, axis)
        return x

def imresize(x, size=[100, 100], interp='bilinear', mode=None):
    if x.shape[-1] == 1:
        # greyscale
        x = scipy.misc.imresize(x[:, :, 0], size, interp=interp, mode=mode)
        return x[:, :, np.newaxis]
    elif x.shape[-1] == 3:
        # rgb, bgr ..
        return scipy.misc.imresize(x, size, interp=interp, mode=mode)
    else:
        raise Exception("Unsupported channel %d" % x.shape[-1])

def prepro_img(x, mode=None):
    # rescale [0, 255] --> (-1, 1), random flip, crop, rotate
    if mode=='train':
        x = flip_axis(x, axis=1, is_random=True)
        x = rotation(x, rg=16, is_random=True, fill_mode='nearest')
        x = imresize(x, size=[64 + 15, 64 + 15], interp='bilinear', mode=None)
        x = crop(x, wrg=64, hrg=64, is_random=True)
        x = x / (255. / 2.)
        x = x - 1.
        # x = x * 0.9999
    return x

def combine_and_save_image_sets(image_sets, directory):
    for i in range(len(image_sets[0])):
        combined_image = []
        for set_no in range(len(image_sets)):
            combined_image.append(image_sets[set_no][i])
            combined_image.append(np.zeros((image_sets[set_no][i].shape[0], 5, 3)))
        combined_image = np.concatenate(combined_image, axis = 1)
        scipy.misc.imsave(os.path.join(directory, 'combined_{}.jpg'.format(i)), combined_image)

def save(saver, sess, logdir, step):
    model_name = 'model.ckpt'
    checkpoint_path = os.path.join(logdir, model_name)
    if not os.path.exists(logdir):
        os.makedirs(logdir)
    saver.save(sess, checkpoint_path, global_step=step)
    print('The checkpoint has been created.')

def load(saver, sess, ckpt_path):
    saver.restore(sess, ckpt_path)
    print("Restored model parameters from {}".format(ckpt_path))

In [9]:
tf.reset_default_graph()
train_samples_dir = 'train_samples'
if os.path.exists(train_samples_dir) == False:
    os.makedirs(train_samples_dir)

lr = 0.0002
lr_decay = 0.5      
decay_every = 100  
beta1 = 0.5
checkpoint_dir = './checkpoint'
z_dim = 512
image_size = 64
c_dim = 3
batch_size = 64
ni = int(np.ceil(np.sqrt(batch_size)))

sample_size = batch_size
sample_seed = np.random.normal(loc=0.0, scale=1.0, size=(sample_size, z_dim)).astype(np.float32)
sample_sentence = ["a black bird with oily black feathers and rounded black beak."] * int(sample_size/ni) + \
                  ["a black bird with oily black feathers and rounded black beak."] * int(sample_size/ni) + \
                  ["a black bird with oily black feathers and rounded black beak."] * int(sample_size/ni) + \
                  ["a black bird with oily black feathers and rounded black beak."] * int(sample_size/ni) + \
                  ["a black bird with oily black feathers and rounded black beak."] * int(sample_size/ni) + \
                  ["a black bird with oily black feathers and rounded black beak."] * int(sample_size/ni) + \
                  ["a black bird with oily black feathers and rounded black beak."] * int(sample_size/ni) + \
                  ["a black bird with oily black feathers and rounded black beak."] * int(sample_size/ni)
for i, sent in enumerate(sample_sentence):
    sample_sentence[i] = sent2ID(sent)
sample_sentence = np.asarray(sample_sentence)
sample_sentence = np.reshape(sample_sentence, (sample_size, 20))
print(sample_sentence.shape)

(64, 20)


In [10]:
t_real_image = tf.placeholder('float32', [batch_size, image_size, image_size, 3], name = 'real_image')
t_wrong_image = tf.placeholder('float32', [batch_size ,image_size, image_size, 3], name = 'wrong_image')
t_real_caption = tf.placeholder(dtype=tf.int64, shape=[batch_size, None], name='real_caption_input')
t_wrong_caption = tf.placeholder(dtype=tf.int64, shape=[batch_size, None], name='wrong_caption_input')
t_z = tf.placeholder(tf.float32, [batch_size, z_dim], name='z_noise')

### Training Phase - CNN - RNN mapping
net_cnn = CNN_ENCODER(t_real_image, is_training=True, reuse=False)
x = net_cnn.outputs
v = RNN_ENCODER(t_real_caption, is_training=True, reuse=False).outputs
x_w = CNN_ENCODER(t_wrong_image, is_training=True, reuse=True).outputs
v_w = RNN_ENCODER(t_wrong_caption, is_training=True, reuse=True).outputs

alpha = 0.2 # margin alpha
rnn_loss = tf.reduce_mean(tf.maximum(0., alpha - cosine_similarity(x, v) + cosine_similarity(x, v_w))) + \
            tf.reduce_mean(tf.maximum(0., alpha - cosine_similarity(x, v) + cosine_similarity(x_w, v)))

### Training Phase - GAN
net_rnn = RNN_ENCODER(t_real_caption, is_training=False, reuse=True)
net_fake_image = GENERATOR(t_z, net_rnn.outputs, is_training=True, reuse=False)

net_disc_fake = DISCRIMINATOR(net_fake_image.outputs, net_rnn.outputs, is_training=True, reuse=False)
disc_fake_logits = net_disc_fake.logits

net_disc_real = DISCRIMINATOR(t_real_image, net_rnn.outputs, is_training=True, reuse=True)
disc_real_logits = net_disc_real.logits

net_disc_mismatch = DISCRIMINATOR(t_real_image, RNN_ENCODER(t_wrong_caption, is_training=False, reuse=True).outputs,
                                is_training=True, reuse=True)
disc_mismatch_logits = net_disc_mismatch.logits

d_loss1 = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=disc_real_logits,     labels=tf.ones_like(disc_real_logits),      name='d1'))
d_loss2 = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=disc_mismatch_logits, labels=tf.zeros_like(disc_mismatch_logits), name='d2'))
d_loss3 = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=disc_fake_logits,     labels=tf.zeros_like(disc_fake_logits),     name='d3'))
d_loss = d_loss1 + (d_loss2 + d_loss3) * 0.5

g_loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=disc_fake_logits, labels=tf.ones_like(disc_fake_logits), name='g'))

### Testing Phase
net_g = GENERATOR(t_z, RNN_ENCODER(t_real_caption, is_training=False, reuse=True).outputs,
                    is_training=False, reuse=True)

rnn_vars = [var for var in tf.trainable_variables() if 'rnnencoder' in var.name]
cnn_vars = [var for var in tf.trainable_variables() if 'cnnencoder' in var.name]
d_vars = [var for var in tf.trainable_variables() if 'discriminator' in var.name]
g_vars = [var for var in tf.trainable_variables() if 'generator' in var.name]

update_ops_CNN = [var for var in tf.get_collection(tf.GraphKeys.UPDATE_OPS) if 'cnnencoder' in var.name]
update_ops_D = [var for var in tf.get_collection(tf.GraphKeys.UPDATE_OPS) if 'discriminator' in var.name]
update_ops_G = [var for var in tf.get_collection(tf.GraphKeys.UPDATE_OPS) if 'generator' in var.name]

print('----------Update_ops_D--------')
for var in update_ops_D:
    print(var.name)
print('----------Update_ops_G--------')
for var in update_ops_G:
    print(var.name)
print('----------Update_ops_CNN--------')
for var in update_ops_CNN:
    print(var.name)

with tf.variable_scope('learning_rate'):
    lr_v = tf.Variable(lr, trainable=False)

with tf.control_dependencies(update_ops_CNN):
    grads, _ = tf.clip_by_global_norm(tf.gradients(rnn_loss, rnn_vars + cnn_vars), 10)
    optimizer = tf.train.AdamOptimizer(lr_v, beta1=beta1)
    rnn_optim = optimizer.apply_gradients(zip(grads, rnn_vars + cnn_vars))

with tf.control_dependencies(update_ops_D):
    d_optim = tf.train.AdamOptimizer(lr_v, beta1=beta1).minimize(d_loss, var_list=d_vars)

with tf.control_dependencies(update_ops_G):
    g_optim = tf.train.AdamOptimizer(lr_v, beta1=beta1).minimize(g_loss, var_list=g_vars)

Instructions for updating:
This class is deprecated, please use tf.nn.rnn_cell.LSTMCell, which supports all the feature this cell currently has. Please replace the existing code with tf.nn.rnn_cell.LSTMCell(name='basic_lstm_cell').
----------Update_ops_D--------
discriminator/d_h1/batchnorm/AssignMovingAvg
discriminator/d_h1/batchnorm/AssignMovingAvg_1
discriminator/d_h2/batchnorm/AssignMovingAvg
discriminator/d_h2/batchnorm/AssignMovingAvg_1
discriminator/d_h3/batchnorm/AssignMovingAvg
discriminator/d_h3/batchnorm/AssignMovingAvg_1
discriminator/d_h4_res/batchnorm/AssignMovingAvg
discriminator/d_h4_res/batchnorm/AssignMovingAvg_1
discriminator/d_h4_res/batchnorm2/AssignMovingAvg
discriminator/d_h4_res/batchnorm2/AssignMovingAvg_1
discriminator/d_h4_res/batchnorm3/AssignMovingAvg
discriminator/d_h4_res/batchnorm3/AssignMovingAvg_1
discriminator/d_h3/batch_norm_2/AssignMovingAvg
discriminator/d_h3/batch_norm_2/AssignMovingAvg_1
discriminator_1/d_h1/batchnorm/AssignMovingAvg
discriminato

In [None]:
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
config.allow_soft_placement = True
sess = tf.Session(config=config)
init = tf.global_variables_initializer()
sess.run(init)
saver = tf.train.Saver(var_list=tf.global_variables(), max_to_keep=10)
ckpt = tf.train.get_checkpoint_state(checkpoint_dir)
if ckpt and ckpt.model_checkpoint_path:
    loader = tf.train.Saver(var_list=tf.global_variables())
    load_step = int(os.path.basename(ckpt.model_checkpoint_path).split('-')[1])
    load(loader, sess, ckpt.model_checkpoint_path)
else:
    print('no checkpoints find.')

n_epoch = 1000
n_batch_epoch = int(n_images_train / batch_size)
for epoch in range(n_epoch):
    start_time = time.time()
    if epoch !=0 and (epoch % decay_every == 0):
        new_lr_decay = lr_decay ** (epoch // decay_every)
        sess.run(tf.assign(lr_v, lr * new_lr_decay))
        log = " ** new learning rate: %f" % (lr * new_lr_decay)
        print(log)
    elif epoch == 0:
        log = " ** init lr: %f  decay_every_epoch: %d, lr_decay: %f" % (lr, decay_every, lr_decay)
        print(log)
    for step in range(n_batch_epoch):
        step_time = time.time()
        idexs = get_random_int(min=0, max=n_captions_train-1, number=batch_size)
        b_real_caption = train_captions[idexs]
        b_real_images = train_images[np.floor(np.asarray(idexs).astype('float') / n_captions_per_image).astype('int')]
        idexs = get_random_int(min=0, max=n_captions_train-1, number=batch_size)
        b_wrong_caption = train_captions[idexs]
        idexs2 = get_random_int(min=0, max=n_images_train-1, number=batch_size)
        b_wrong_images = train_images[idexs2]
        b_z = np.random.normal(loc=0.0, scale=1.0, size=(batch_size, z_dim)).astype(np.float32)
        b_real_images = threading_data(b_real_images, prepro_img, mode='train')
        b_wrong_images = threading_data(b_wrong_images, prepro_img, mode='train')
        if epoch < 80:
            errRNN, _ = sess.run([rnn_loss, rnn_optim], feed_dict={
                                            t_real_image : b_real_images,
                                            t_wrong_image : b_wrong_images,
                                            t_real_caption : b_real_caption,
                                            t_wrong_caption : b_wrong_caption})
        else:
            errRNN = 0   
        errD, _ = sess.run([d_loss, d_optim], feed_dict={
                            t_real_image : b_real_images,
                            t_wrong_caption : b_wrong_caption,
                            t_real_caption : b_real_caption,
                            t_z : b_z})
        errG, _ = sess.run([g_loss, g_optim], feed_dict={
                            t_real_caption : b_real_caption,
                            t_z : b_z})
    print("Epoch: [%2d/%2d] time: %4.4fs, d_loss: %.8f, g_loss: %.8f, rnn_loss: %.8f" \
                        % (epoch, n_epoch, time.time() - step_time, errD, errG, errRNN))
    if (epoch + 1) % 1 == 0:
        print(" ** Epoch %d took %fs" % (epoch, time.time()-start_time))
        img_gen, rnn_out = sess.run([net_g.outputs, net_rnn.outputs], feed_dict={
                                        t_real_caption : sample_sentence,
                                        t_z : sample_seed})
        save_images(img_gen, [ni, ni], 'train_samples/train_{:02d}.png'.format(epoch))
    if (epoch != 0) and (epoch % 10) == 0:
        save(saver, sess, checkpoint_dir, epoch)
        print("[*] Save checkpoints SUCCESS!")
checkpoint_path = os.path.join(cfg.CHECKPOINT_DIR, cfg.CHECKPOINT_NAME)
saver.save(sess, checkpoint_path, global_step=epoch)
print('The checkpoint has been created.')

no checkpoints find.
 ** init lr: 0.000200  decay_every_epoch: 100, lr_decay: 0.500000


`imresize` is deprecated in SciPy 1.0.0, and will be removed in 1.3.0.
Use Pillow instead: ``numpy.array(Image.fromarray(arr).resize())``.


Epoch: [ 0/1000] time: 0.3975s, d_loss: 1.31753385, g_loss: 2.28362513, rnn_loss: 0.36068851
 ** Epoch 0 took 75.582170s


`imsave` is deprecated in SciPy 1.0.0, and will be removed in 1.2.0.
Use ``imageio.imwrite`` instead.


Epoch: [ 1/1000] time: 0.4020s, d_loss: 1.11479640, g_loss: 2.01116872, rnn_loss: 0.24469408
 ** Epoch 1 took 55.629773s
Epoch: [ 2/1000] time: 0.4004s, d_loss: 1.36073446, g_loss: 1.45685649, rnn_loss: 0.24112934
 ** Epoch 2 took 55.422214s
Epoch: [ 3/1000] time: 0.4016s, d_loss: 1.25851989, g_loss: 1.65432346, rnn_loss: 0.23627174
 ** Epoch 3 took 55.444503s
Epoch: [ 4/1000] time: 0.4067s, d_loss: 1.18874502, g_loss: 1.00608802, rnn_loss: 0.25265095
 ** Epoch 4 took 55.700873s
Epoch: [ 5/1000] time: 0.4046s, d_loss: 1.27227426, g_loss: 2.22307515, rnn_loss: 0.25973552
 ** Epoch 5 took 55.748305s
Epoch: [ 6/1000] time: 0.4067s, d_loss: 1.00639820, g_loss: 1.52219772, rnn_loss: 0.23359019
 ** Epoch 6 took 56.257428s
Epoch: [ 7/1000] time: 0.4040s, d_loss: 0.80261439, g_loss: 1.42177761, rnn_loss: 0.21547946
 ** Epoch 7 took 55.695372s
Epoch: [ 8/1000] time: 0.4021s, d_loss: 1.27246976, g_loss: 1.76366532, rnn_loss: 0.22362310
 ** Epoch 8 took 56.014816s
Epoch: [ 9/1000] time: 0.4061s, 

Epoch: [66/1000] time: 0.3901s, d_loss: 0.85955441, g_loss: 1.68345225, rnn_loss: 0.11777584
 ** Epoch 66 took 55.067351s
Epoch: [67/1000] time: 0.3996s, d_loss: 0.43970105, g_loss: 2.08832574, rnn_loss: 0.08496506
 ** Epoch 67 took 55.086880s
Epoch: [68/1000] time: 0.3918s, d_loss: 0.67033273, g_loss: 1.92664027, rnn_loss: 0.14513549
 ** Epoch 68 took 56.223854s
Epoch: [69/1000] time: 0.3962s, d_loss: 0.67560726, g_loss: 2.04417634, rnn_loss: 0.16112304
 ** Epoch 69 took 54.875975s
Epoch: [70/1000] time: 0.4006s, d_loss: 0.66614962, g_loss: 2.11093903, rnn_loss: 0.13497972
 ** Epoch 70 took 54.873067s
The checkpoint has been created.
[*] Save checkpoints SUCCESS!
Epoch: [71/1000] time: 0.3958s, d_loss: 1.15587115, g_loss: 0.74617231, rnn_loss: 0.15278263
 ** Epoch 71 took 54.889474s
Epoch: [72/1000] time: 0.3997s, d_loss: 0.98668432, g_loss: 2.45380425, rnn_loss: 0.12557690
 ** Epoch 72 took 54.964957s
Epoch: [73/1000] time: 0.4005s, d_loss: 0.48215815, g_loss: 2.60706091, rnn_loss: 0

Epoch: [130/1000] time: 0.3579s, d_loss: 0.23242274, g_loss: 2.48273349, rnn_loss: 0.00000000
 ** Epoch 130 took 49.048067s
The checkpoint has been created.
[*] Save checkpoints SUCCESS!
Epoch: [131/1000] time: 0.3542s, d_loss: 0.72508621, g_loss: 0.63650680, rnn_loss: 0.00000000
 ** Epoch 131 took 48.951739s
Epoch: [132/1000] time: 0.3549s, d_loss: 0.17070550, g_loss: 2.67076015, rnn_loss: 0.00000000
 ** Epoch 132 took 48.794750s
Epoch: [133/1000] time: 0.3555s, d_loss: 0.11932614, g_loss: 3.50745010, rnn_loss: 0.00000000
 ** Epoch 133 took 48.914103s
Epoch: [134/1000] time: 0.3528s, d_loss: 0.21271195, g_loss: 2.32354784, rnn_loss: 0.00000000
 ** Epoch 134 took 48.895077s
Epoch: [135/1000] time: 0.3778s, d_loss: 1.01847219, g_loss: 0.25741497, rnn_loss: 0.00000000
 ** Epoch 135 took 49.565865s
Epoch: [136/1000] time: 0.3575s, d_loss: 0.41197455, g_loss: 1.53678966, rnn_loss: 0.00000000
 ** Epoch 136 took 49.180703s
Epoch: [137/1000] time: 0.3548s, d_loss: 0.17219006, g_loss: 3.393815

Epoch: [193/1000] time: 0.3524s, d_loss: 0.24226700, g_loss: 3.65074825, rnn_loss: 0.00000000
 ** Epoch 193 took 49.370805s
Epoch: [194/1000] time: 0.3533s, d_loss: 0.47249928, g_loss: 4.24387074, rnn_loss: 0.00000000
 ** Epoch 194 took 49.354253s
Epoch: [195/1000] time: 0.3563s, d_loss: 0.18483321, g_loss: 2.44830441, rnn_loss: 0.00000000
 ** Epoch 195 took 49.353141s
Epoch: [196/1000] time: 0.3556s, d_loss: 0.17762324, g_loss: 4.23771763, rnn_loss: 0.00000000
 ** Epoch 196 took 49.350209s
Epoch: [197/1000] time: 0.3545s, d_loss: 0.14515185, g_loss: 4.47289276, rnn_loss: 0.00000000
 ** Epoch 197 took 49.664229s
Epoch: [198/1000] time: 0.3592s, d_loss: 0.36177701, g_loss: 3.76405382, rnn_loss: 0.00000000
 ** Epoch 198 took 49.838691s
Epoch: [199/1000] time: 0.3588s, d_loss: 0.16638196, g_loss: 2.72192311, rnn_loss: 0.00000000
 ** Epoch 199 took 49.337843s
 ** new learning rate: 0.000050
Epoch: [200/1000] time: 0.3515s, d_loss: 0.07343477, g_loss: 4.79296875, rnn_loss: 0.00000000
 ** Ep

Epoch: [256/1000] time: 0.3587s, d_loss: 0.02929157, g_loss: 6.30452108, rnn_loss: 0.00000000
 ** Epoch 256 took 49.119458s
Epoch: [257/1000] time: 0.3564s, d_loss: 0.02269468, g_loss: 4.36610508, rnn_loss: 0.00000000
 ** Epoch 257 took 49.034746s
Epoch: [258/1000] time: 0.3545s, d_loss: 0.07928374, g_loss: 4.06223392, rnn_loss: 0.00000000
 ** Epoch 258 took 49.259016s
Epoch: [259/1000] time: 0.3552s, d_loss: 0.02693177, g_loss: 4.85687828, rnn_loss: 0.00000000
 ** Epoch 259 took 49.064179s
Epoch: [260/1000] time: 0.3590s, d_loss: 0.01402922, g_loss: 4.55659819, rnn_loss: 0.00000000
 ** Epoch 260 took 49.049698s
The checkpoint has been created.
[*] Save checkpoints SUCCESS!
Epoch: [261/1000] time: 0.3571s, d_loss: 0.02790941, g_loss: 5.06291628, rnn_loss: 0.00000000
 ** Epoch 261 took 49.571910s
Epoch: [262/1000] time: 0.3573s, d_loss: 0.04623189, g_loss: 5.50077295, rnn_loss: 0.00000000
 ** Epoch 262 took 49.152594s
Epoch: [263/1000] time: 0.3434s, d_loss: 0.21928494, g_loss: 2.908119

Epoch: [319/1000] time: 0.3563s, d_loss: 0.07026637, g_loss: 5.01712894, rnn_loss: 0.00000000
 ** Epoch 319 took 49.130380s
Epoch: [320/1000] time: 0.3576s, d_loss: 0.05841497, g_loss: 3.61343002, rnn_loss: 0.00000000
 ** Epoch 320 took 49.419571s
The checkpoint has been created.
[*] Save checkpoints SUCCESS!
Epoch: [321/1000] time: 0.3556s, d_loss: 0.03998955, g_loss: 3.84873462, rnn_loss: 0.00000000
 ** Epoch 321 took 49.243620s
Epoch: [322/1000] time: 0.3572s, d_loss: 0.08237834, g_loss: 4.10806656, rnn_loss: 0.00000000
 ** Epoch 322 took 49.279921s
Epoch: [323/1000] time: 0.3826s, d_loss: 0.05141971, g_loss: 3.29020929, rnn_loss: 0.00000000
 ** Epoch 323 took 49.479067s
Epoch: [324/1000] time: 0.3518s, d_loss: 0.07603260, g_loss: 3.38384199, rnn_loss: 0.00000000
 ** Epoch 324 took 49.430808s
Epoch: [325/1000] time: 0.3590s, d_loss: 0.14649418, g_loss: 2.48432660, rnn_loss: 0.00000000
 ** Epoch 325 took 49.217727s
Epoch: [326/1000] time: 0.3577s, d_loss: 0.11269337, g_loss: 3.389093

Epoch: [382/1000] time: 0.3590s, d_loss: 0.02136238, g_loss: 4.00496674, rnn_loss: 0.00000000
 ** Epoch 382 took 49.590140s
Epoch: [383/1000] time: 0.3580s, d_loss: 0.02855474, g_loss: 5.09882355, rnn_loss: 0.00000000
 ** Epoch 383 took 49.291048s
Epoch: [384/1000] time: 0.3563s, d_loss: 0.03658107, g_loss: 4.08952618, rnn_loss: 0.00000000
 ** Epoch 384 took 49.313726s
Epoch: [385/1000] time: 0.3552s, d_loss: 0.11482263, g_loss: 4.05130100, rnn_loss: 0.00000000
 ** Epoch 385 took 49.278081s
Epoch: [386/1000] time: 0.3573s, d_loss: 0.02786632, g_loss: 4.03718090, rnn_loss: 0.00000000
 ** Epoch 386 took 49.810915s
Epoch: [387/1000] time: 0.3522s, d_loss: 0.09083295, g_loss: 4.01690912, rnn_loss: 0.00000000
 ** Epoch 387 took 49.070339s
Epoch: [388/1000] time: 0.3525s, d_loss: 0.02575754, g_loss: 4.29257965, rnn_loss: 0.00000000
 ** Epoch 388 took 49.037310s
Epoch: [389/1000] time: 0.3494s, d_loss: 0.03619905, g_loss: 6.42648792, rnn_loss: 0.00000000
 ** Epoch 389 took 48.836993s
Epoch: [

Epoch: [445/1000] time: 0.3544s, d_loss: 0.04542149, g_loss: 6.32717323, rnn_loss: 0.00000000
 ** Epoch 445 took 49.176899s
Epoch: [446/1000] time: 0.3541s, d_loss: 0.04810356, g_loss: 3.42695999, rnn_loss: 0.00000000
 ** Epoch 446 took 49.247024s
Epoch: [447/1000] time: 0.3561s, d_loss: 0.05019562, g_loss: 3.74522758, rnn_loss: 0.00000000
 ** Epoch 447 took 49.050780s
Epoch: [448/1000] time: 0.3562s, d_loss: 0.01146937, g_loss: 6.31071138, rnn_loss: 0.00000000
 ** Epoch 448 took 49.013345s
Epoch: [449/1000] time: 0.3543s, d_loss: 0.02502973, g_loss: 4.64680243, rnn_loss: 0.00000000
 ** Epoch 449 took 49.672044s
Epoch: [450/1000] time: 0.3573s, d_loss: 0.03071034, g_loss: 3.83420420, rnn_loss: 0.00000000
 ** Epoch 450 took 49.413311s
The checkpoint has been created.
[*] Save checkpoints SUCCESS!
Epoch: [451/1000] time: 0.3532s, d_loss: 0.04982337, g_loss: 9.35076714, rnn_loss: 0.00000000
 ** Epoch 451 took 49.027356s
Epoch: [452/1000] time: 0.3570s, d_loss: 0.01733837, g_loss: 7.219695

Epoch: [508/1000] time: 0.3554s, d_loss: 0.03068328, g_loss: 6.31540823, rnn_loss: 0.00000000
 ** Epoch 508 took 49.199937s
Epoch: [509/1000] time: 0.3537s, d_loss: 0.05395509, g_loss: 4.65782547, rnn_loss: 0.00000000
 ** Epoch 509 took 49.190490s
Epoch: [510/1000] time: 0.3564s, d_loss: 0.02360021, g_loss: 3.60581541, rnn_loss: 0.00000000
 ** Epoch 510 took 49.197985s
The checkpoint has been created.
[*] Save checkpoints SUCCESS!
Epoch: [511/1000] time: 0.3558s, d_loss: 0.01498693, g_loss: 4.44567108, rnn_loss: 0.00000000
 ** Epoch 511 took 49.586478s
Epoch: [512/1000] time: 0.3624s, d_loss: 0.06969298, g_loss: 3.68043685, rnn_loss: 0.00000000
 ** Epoch 512 took 50.201531s
Epoch: [513/1000] time: 0.3598s, d_loss: 0.00316942, g_loss: 6.02050352, rnn_loss: 0.00000000
 ** Epoch 513 took 49.676297s
Epoch: [514/1000] time: 0.3576s, d_loss: 0.00934483, g_loss: 5.00657988, rnn_loss: 0.00000000
 ** Epoch 514 took 49.412100s
Epoch: [515/1000] time: 0.3599s, d_loss: 0.05223550, g_loss: 3.391418

Epoch: [571/1000] time: 0.3568s, d_loss: 0.03198540, g_loss: 3.89867258, rnn_loss: 0.00000000
 ** Epoch 571 took 49.468383s
Epoch: [572/1000] time: 0.3805s, d_loss: 0.00715349, g_loss: 7.39869070, rnn_loss: 0.00000000
 ** Epoch 572 took 49.508871s
Epoch: [573/1000] time: 0.3581s, d_loss: 0.01178999, g_loss: 5.52654743, rnn_loss: 0.00000000
 ** Epoch 573 took 49.331195s
Epoch: [574/1000] time: 0.3609s, d_loss: 0.03817191, g_loss: 5.97525644, rnn_loss: 0.00000000
 ** Epoch 574 took 49.953021s
Epoch: [575/1000] time: 0.3543s, d_loss: 0.00561364, g_loss: 5.31503677, rnn_loss: 0.00000000
 ** Epoch 575 took 49.230999s
Epoch: [576/1000] time: 0.3604s, d_loss: 0.01203582, g_loss: 5.05919743, rnn_loss: 0.00000000
 ** Epoch 576 took 49.328655s
Epoch: [577/1000] time: 0.3597s, d_loss: 0.03509007, g_loss: 7.09956264, rnn_loss: 0.00000000
 ** Epoch 577 took 49.496251s
Epoch: [578/1000] time: 0.3542s, d_loss: 0.01853709, g_loss: 5.90747261, rnn_loss: 0.00000000
 ** Epoch 578 took 49.283321s
Epoch: [

Epoch: [634/1000] time: 0.3568s, d_loss: 0.02254153, g_loss: 5.42447662, rnn_loss: 0.00000000
 ** Epoch 634 took 49.505482s
Epoch: [635/1000] time: 0.3560s, d_loss: 0.05986887, g_loss: 5.43906975, rnn_loss: 0.00000000
 ** Epoch 635 took 49.211157s
Epoch: [636/1000] time: 0.3703s, d_loss: 0.00843725, g_loss: 5.88516331, rnn_loss: 0.00000000
 ** Epoch 636 took 49.212684s
Epoch: [637/1000] time: 0.3604s, d_loss: 0.01342466, g_loss: 4.67422056, rnn_loss: 0.00000000
 ** Epoch 637 took 49.656508s


In [None]:
def test(captions):
    tf.reset_default_graph()
    caption = []
    for i in range(len(captions)):
        caption.append(captions[i])
    caption = np.asarray(caption)
    t_real_caption = tf.placeholder(dtype=tf.int64, shape=[batch_size, None], name='real_caption_input')
    t_z = tf.placeholder(tf.float32, [batch_size, z_dim], name='z_noise')
    net_g = GENERATOR(t_z, RNN_ENCODER(t_real_caption, is_training=False, reuse=False).outputs,
                    is_training=False, reuse=False)
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    config.allow_soft_placement = True
    sess = tf.Session(config=config)
    init = tf.global_variables_initializer()
    sess.run(init)
    ckpt = tf.train.get_checkpoint_state(checkpoint_dir)
    if ckpt and ckpt.model_checkpoint_path:
        loader = tf.train.Saver(var_list=tf.global_variables())
        load_step = int(os.path.basename(ckpt.model_checkpoint_path).split('-')[1])
        load(loader, sess, ckpt.model_checkpoint_path)
    else:
        print('no checkpoints find.')
    n_caption_test = len(caption)
    n_batch_epoch = int(n_caption_test / batch_size) + 1
    caption = np.tile(caption, (2, 1))
    for i in range(n_batch_epoch):
        test_cap = caption[i * batch_size: (i + 1) * batch_size]
        z = np.random.normal(loc=0.0, scale=1.0, size=(batch_size, z_dim)).astype(np.float32)
        gen = sess.run(net_g.outputs, feed_dict={t_real_caption: test_cap, t_z: z})
        save_images(gen, [ni, ni], 'inference/test_samples.png')
        #for j in range(batch_size):
        #    save_images(np.expand_dims(gen[j], axis=0), [ni, ni], 'inference/inference.png')

In [None]:
test(sample_sentence)

## 3. Evaluation metric

In [None]:
def generate_r_precision_data():
    caption_ids = np.reshape(np.asarray(test_dataset.captions_ids), (-1, cfg.TEXT.WORDS_NUM))
    captions_ids_wrong = np.reshape(test_dataset.random_wrong_captions(), (-1, cfg.WRONG_CAPTION, cfg.TEXT.WORDS_NUM))

    sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True))
    init = tf.global_variables_initializer()
    sess.run(init)

    # load the trained checkpoint
    checkpoint_dir = cfg.CHECKPOINT_DIR
    if checkpoint_dir is not None:
        loader = tf.train.Saver(var_list=tf.global_variables())
        ckpt_path = os.path.join(cfg.CHECKPOINT_DIR, CHECKPOINT_NAME)
        loader.restore(sess, ckpt_path)
        print("Restored model parameters from {}".format(ckpt_path))
    else:
        print('no checkpoints find.')

    n_caption_test = len(caption_ids)
    num_batches = n_caption_test // cfg.BATCH_SIZE

    true_cnn_features = np.zeros((num_batches, cfg.BATCH_SIZE, cfg.TEXT.EMBEDDING_DIM), dtype=float)
    true_rnn_features = np.zeros((num_batches, cfg.BATCH_SIZE, cfg.TEXT.EMBEDDING_DIM), dtype=float)
    wrong_rnn_features = np.zeros((num_batches, cfg.WRONG_CAPTION, cfg.BATCH_SIZE, cfg.TEXT.EMBEDDING_DIM), dtype=float)

    for i in range(num_batches):
        test_cap = caption_ids[i * cfg.BATCH_SIZE: (i + 1) * cfg.BATCH_SIZE]

        z = np.random.normal(loc=0.0, scale=1.0, size=(cfg.BATCH_SIZE, cfg.GAN.Z_DIM)).astype(np.float32)
        
        rnn_features = sess.run(rnn_encoder.outputs, feed_dict={t_real_caption: test_cap})
        gen = sess.run(generator.outputs, feed_dict={t_real_caption: test_cap, t_z: z})
        cnn_features = sess.run(cnn_encoder.outputs, feed_dict={t_real_image: gen})

        true_cnn_features[i] = cnn_features
        true_rnn_features[i] = rnn_features

        for per_wrong_caption in range(cfg.WRONG_CAPTION):
            test_cap = captions_ids_wrong[i * cfg.BATCH_SIZE: (i + 1) * cfg.BATCH_SIZE]
            rnn_features = sess.run(rnn_encoder.outputs, feed_dict={t_real_caption: test_cap[:, per_wrong_caption]})
            wrong_rnn_features[i, per_wrong_caption] = rnn_features
    
    # if exists, remove the existing file first
    try:
        os.remove(os.path.join(cfg.R_PRECISION_DIR, cfg.R_PRECISION_FILE))
    except OSError:
        pass
    np.savez(os.path.join(cfg.R_PRECISION_DIR, cfg.R_PRECISION_FILE), true_cnn=true_cnn_features, true_rnn=true_rnn_features,
             wrong_rnn=wrong_rnn_features)

In [None]:
def generate_inception_score_data():
    caption_ids = np.reshape(np.asarray(test_dataset.captions_ids),
                             (-1, cfg.TEXT.CAPTIONS_PER_IMAGE, cfg.TEXT.WORDS_NUM))
    
    sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True))
    init = tf.global_variables_initializer()
    sess.run(init)

    checkpoint_dir = cfg.CHECKPOINT_DIR
    if checkpoint_dir is not None:
        loader = tf.train.Saver(var_list=tf.global_variables())
        ckpt_path = os.path.join(cfg.CHECKPOINT_DIR, cfg.CHECKPOINT_NAME)
        loader.restore(sess, ckpt_path)
        print("Restored model parameters from {}".format(ckpt_path))
    else:
        print('no checkpoints find.')

    n_caption_test = len(caption_ids)
    num_batches = n_caption_test // cfg.BATCH_SIZE

    for i in range(num_batches):
        for per_caption in range(cfg.TEXT.CAPTIONS_PER_IMAGE):
            test_cap = caption_ids[i * cfg.BATCH_SIZE: (i + 1) * cfg.BATCH_SIZE, per_caption]
            test_directory = test_dataset.filenames[i * cfg.BATCH_SIZE: (i + 1) * cfg.BATCH_SIZE]

            z = np.random.normal(loc=0.0, scale=1.0, size=(cfg.BATCH_SIZE, cfg.GAN.Z_DIM)).astype(np.float32)
            gen = sess.run(generator.outputs, feed_dict={t_real_caption: test_cap, t_z: z})
            
            for j in range(cfg.BATCH_SIZE):
                if not os.path.exists(os.path.join(cfg.TEST.GENERATED_TEST_IMAGES, test_directory[j].split('/')[0])):
                    os.mkdir(os.path.join(cfg.TEST.GENERATED_TEST_IMAGES, test_directory[j].split('/')[0]))

                scipy.misc.imsave(os.path.join(cfg.TEST.GENERATED_TEST_IMAGES, test_directory[j] + '_{}.png'.format(per_caption)), gen[j])

In [None]:
generate_r_precision_data()

In [None]:
generate_inception_score_data()

## 3. Measure Inception score and R-precision of given test dataset

After set the config file as 'eval_birds.yml' and run the 'generate_inception_score_data()' and 'generate_r_precision_data()', the synthesized images based on given captions and set of image and caption features should be saved inside a 'evaluation' folder, specifically in 'evaluation/generated_images/..' and as 'evaluation/r_precision.npz' respectively.

**Then, go to the 'evaluation' folder and run each 'inception_score.ipynb' and 'r_precision.ipynb' file in order to measure inception score and r-precision score.**