# M2177.003100 Deep Learning <br> Final Proejct: Text to Image Synthesis (Tensorflow)

Copyright (C) Data Science & AI Laboratory, Seoul National University. This material is for educational uses only. Some contents are based on the material provided by other paper/book authors and may be copyrighted by them. 

**For understanding of this work, please carefully look at given PPT file.**

**Note**: certain details are missing or ambiguous on purpose, in order to test your knowledge on the related materials. However, if you really feel that something essential is missing and cannot proceed to the next step, then contact the teaching staff with clear description of your problem.

### Submitting your work:
<font color=red>**DO NOT clear the training process **</font> so that TAs can grade both your code and results.  
**The TA will set a config file as 'eval_birds.yml' when evaluating the code using 'hidden test dataset'. Thus, please make sure that your code can generate proper data to measure inception score and R-precision of 'hidden test dataset'.**

## 1. Load datasets
The Birds dataset will be downloaded automatically if it is not located in the *data* directory. <br>

In [2]:
import tensorflow as tf
import matplotlib.pyplot as plt
import os, nltk
from miscc.config import cfg, cfg_from_file
import pprint
import datetime
import dateutil.tz
import numpy as np
import scipy
from utils.data_utils import CUBDataset
from utils.loss import cosine_similarity
import pandas as pd
from scipy.io import loadmat
import re
import string
import random
import time

#################################################
# DO NOT CHANGE 
from utils.model_1215_a import CNN_ENCODER, RNN_ENCODER, GENERATOR, DISCRIMINATOR
#################################################

%matplotlib inline

In [3]:
# Set a config file as 'train_birds.yml' in training, as 'eval_birds.yml' for evaluation
cfg_from_file('cfg/train_birds.yml') # eval_birds.yml

print('Using config:')
pprint.pprint(cfg)

os.environ['CUDA_VISIBLE_DEVICES'] = cfg.GPU_ID

now = datetime.datetime.now(dateutil.tz.tzlocal())
timestamp = now.strftime('%Y_%m_%d_%H_%M_%S')
output_dir = 'sample/%s_%s_%s' % (cfg.DATASET_NAME, cfg.CONFIG_NAME, timestamp)

Using config:
{'BATCH_SIZE': 64,
 'CHECKPOINT_DIR': './checkpoint',
 'CHECKPOINT_NAME': 'model.ckpt',
 'CNN': {'EMBEDDING_DIM': 0, 'H_DIM': 0},
 'CONFIG_NAME': 'text-to-image',
 'CUDA': False,
 'DATASET_NAME': 'birds',
 'DATA_DIR': 'data/birds',
 'EMBEDDING_TYPE': 'cnn-rnn',
 'GAN': {'B_ATTENTION': False,
         'B_CONDITION': False,
         'B_DCGAN': False,
         'CONDITION_DIM': 0,
         'DF_DIM': 0,
         'EMBEDDING_DIM': 0,
         'GF_DIM': 0,
         'R_NUM': 0,
         'Z_DIM': 512},
 'GPU_ID': '0',
 'IMAGE_SIZE': 256,
 'NUM_BATCH_FOR_TEST': 0,
 'RANDOM_SEED': 0,
 'RNN': {'EMBEDDING_DIM': 0,
         'H_DIM': 0,
         'TYPE': '',
         'VOCAB_SIZE': 0,
         'WORD_EMBEDDING_DIM': 0},
 'R_PRECISION_DIR': './evaluation',
 'R_PRECISION_FILE': 'r_precision.npz',
 'R_PRECISION_FILE_HIDDEN': 'r_precision_hidden.npz',
 'TEST': {'B_EXAMPLE': False,
          'GENERATED_HIDDEN_TEST_IMAGES': './evaluation/generated_images_hidden',
          'GENERATED_TEST_IMAGES'

  yaml_cfg = edict(yaml.load(f))


In [4]:
train_dataset = CUBDataset(cfg.DATA_DIR, split='train')
test_dataset = CUBDataset(cfg.DATA_DIR, split='test')

print(f'\ntrain data directory:\n{train_dataset.split_dir}')
print(f'test data directory:\n{test_dataset.split_dir}\n')

print(f'# of train filenames:{train_dataset.filenames.shape}')
print(f'# of test filenames:{test_dataset.filenames.shape}\n')

print(f'example of filename of train image:{train_dataset.filenames[0]}')
print(f'example of filename of valid image:{test_dataset.filenames[0]}\n')

print(f'example of caption and its ids:\n{train_dataset.captions[0]}\n{train_dataset.captions_ids[0]}\n')
print(f'example of caption and its ids:\n{test_dataset.captions[0]}\n{test_dataset.captions_ids[0]}\n')

print(f'# of train captions:{np.asarray(train_dataset.captions).shape}')
print(f'# of test captions:{np.asarray(test_dataset.captions).shape}\n')

print(f'# of train caption ids:{np.asarray(train_dataset.captions_ids).shape}')
print(f'# of test caption ids:{np.asarray(test_dataset.captions_ids).shape}\n')

print(f'# of train images:{train_dataset.images.shape}')
print(f'# of test images:{test_dataset.images.shape}\n')

self.current_dir:
/home/chszerg/final-project-deep-learning-19-tf

self.data_dir:
/home/chszerg/final-project-deep-learning-19-tf/data/birds

self.image_dir:
/home/chszerg/final-project-deep-learning-19-tf/data/birds/CUB_200_2011.tgz

Dataset already exists
self.image_dir:
/home/chszerg/final-project-deep-learning-19-tf/data/birds/CUB_200_2011/images

Load from:  data/birds/captions.pickle
self.current_dir:
/home/chszerg/final-project-deep-learning-19-tf

self.data_dir:
/home/chszerg/final-project-deep-learning-19-tf/data/birds

self.image_dir:
/home/chszerg/final-project-deep-learning-19-tf/data/birds/CUB_200_2011.tgz

Dataset already exists
self.image_dir:
/home/chszerg/final-project-deep-learning-19-tf/data/birds/CUB_200_2011/images

Load from:  data/birds/captions.pickle

train data directory:
/home/chszerg/final-project-deep-learning-19-tf/data/birds/train
test data directory:
/home/chszerg/final-project-deep-learning-19-tf/data/birds/test

# of train filenames:(8855,)
# of test f

In [5]:
train_images = train_dataset.images
test_images = test_dataset.images
train_captions = np.asarray(train_dataset.captions_ids)
test_captions = np.asarray(test_dataset.captions_ids)
print(train_images.shape)
print(test_images.shape)
print(train_captions.shape)
print(test_captions.shape)

(8855, 256, 256, 3)
(2933, 256, 256, 3)
(88550, 20)
(29330, 20)


In [6]:
from skimage.transform import resize
train_images_64 = []
for train_image in train_images:
    train_images_64.append(resize(train_image, (64, 64, 3)))
train_images_64 = np.asarray(train_images_64)
print(train_images_64.shape)
assert train_images_64.shape[0] == train_images.shape[0]
test_images_64 = []
for test_image in test_images:
    test_images_64.append(resize(test_image, (64, 64, 3)))
test_images_64 = np.asarray(test_images_64)
print(test_images_64.shape)
assert test_images_64.shape[0] == test_images.shape[0]

(8855, 64, 64, 3)
(2933, 64, 64, 3)


In [7]:
train_images = train_images_64
test_images = test_images_64
n_captions_train = len(train_captions)
n_captions_per_image = 10
n_images_train = len(train_images)

In [8]:
import scipy.misc
import threading
import scipy.ndimage as ndi
from skimage import transform
from skimage import exposure
import skimage
from nltk.tokenize import RegexpTokenizer

def sent2ID(sample_sentence):
    caption = []
    cap = sample_sentence
    if len(cap) == 0:
        exit()
    cap = cap.replace("\ufffd\ufffd", " ")
    tokenizer = RegexpTokenizer(r'\w+')
    tokens = tokenizer.tokenize(cap.lower())
    tokens_new = []
    for t in tokens:
        t = t.encode('ascii', 'ignore').decode('ascii')
        if len(t) > 0:
            tokens_new.append(t)
    caption.append(tokens_new)
    caption_new = []
    t = caption[0]
    rev = []
    for w in t:
        if w in train_dataset.wordtoix:
            rev.append(train_dataset.wordtoix[w])
    x, x_len = train_dataset.get_caption(rev)
    caption_new.append(np.squeeze(x, axis=1))
    return caption_new

def ID2sent(sample_caption):
    sentence = []
    for ID in sample_caption:
        if ID != train_dataset.ixtoword['<PAD>']:
            sentence.append(train_dataset.ixtoword[ID])
    return sentence

def get_random_int(min=0, max=10, number=5):
    return [random.randint(min,max) for p in range(0,number)]

def merge(images, size):
    h, w = images.shape[1], images.shape[2]
    img = np.zeros((h * size[0], w * size[1], 3))
    for idx, image in enumerate(images):
        i = idx % size[1]
        j = idx // size[1]
        img[j*h:j*h+h, i*w:i*w+w, :] = image
    return img

def imsave(images, size, path):
    return scipy.misc.imsave(path, merge(images, size))

def save_images(images, size, image_path):
    return imsave(images, size, image_path)

def threading_data(data=None, fn=None, **kwargs):
    def apply_fn(results, i, data, kwargs):
        results[i] = fn(data, **kwargs)
    results = [None] * len(data)
    threads = []
    for i in range(len(data)):
        t = threading.Thread(
                        name='threading_and_return',
                        target=apply_fn,
                        args=(results, i, data[i], kwargs)
                        )
        t.start()
        threads.append(t)
    for t in threads:
        t.join()
    return np.asarray(results)

def apply_transform(x, transform_matrix, channel_index=2, fill_mode='nearest', cval=0., order=1):
    x = np.rollaxis(x, channel_index, 0)
    final_affine_matrix = transform_matrix[:2, :2]
    final_offset = transform_matrix[:2, 2]
    channel_images = [ndi.interpolation.affine_transform(x_channel, final_affine_matrix,
                      final_offset, order=order, mode=fill_mode, cval=cval) for x_channel in x]
    x = np.stack(channel_images, axis=0)
    x = np.rollaxis(x, 0, channel_index + 1)
    return x

def transform_matrix_offset_center(matrix, x, y):
    o_x = float(x) / 2 + 0.5
    o_y = float(y) / 2 + 0.5
    offset_matrix = np.array([[1, 0, o_x], [0, 1, o_y], [0, 0, 1]])
    reset_matrix = np.array([[1, 0, -o_x], [0, 1, -o_y], [0, 0, 1]])
    transform_matrix = np.dot(np.dot(offset_matrix, matrix), reset_matrix)
    return transform_matrix

def rotation(x, rg=20, is_random=False, row_index=0, col_index=1, channel_index=2,
                    fill_mode='nearest', cval=0.):
    if is_random:
        theta = np.pi / 180 * np.random.uniform(-rg, rg)
    else:
        theta = np.pi / 180 * rg
    rotation_matrix = np.array([[np.cos(theta), -np.sin(theta), 0],
                                [np.sin(theta), np.cos(theta), 0],
                                [0, 0, 1]])
    h, w = x.shape[row_index], x.shape[col_index]
    transform_matrix = transform_matrix_offset_center(rotation_matrix, h, w)
    x = apply_transform(x, transform_matrix, channel_index, fill_mode, cval)
    return x

def crop(x, wrg, hrg, is_random=False, row_index=0, col_index=1, channel_index=2):
    h, w = x.shape[row_index], x.shape[col_index]
    assert (h > hrg) and (w > wrg), "The size of cropping should smaller than the original image"
    if is_random:
        h_offset = int(np.random.uniform(0, h-hrg) - 1)
        w_offset = int(np.random.uniform(0, w-wrg) - 1)
        return x[h_offset: hrg + h_offset ,w_offset: wrg + w_offset]
    else:
        h_offset = int(np.floor((h - hrg)/ 2.))
        w_offset = int(np.floor((w - wrg)/ 2.))
        h_end = h_offset + hrg
        w_end = w_offset + wrg
        return x[h_offset: h_end, w_offset: w_end]

def flip_axis(x, axis, is_random=False):
    if is_random:
        factor = np.random.uniform(-1, 1)
        if factor > 0:
            x = np.asarray(x).swapaxes(axis, 0)
            x = x[::-1, ...]
            x = x.swapaxes(0, axis)
            return x
        else:
            return x
    else:
        x = np.asarray(x).swapaxes(axis, 0)
        x = x[::-1, ...]
        x = x.swapaxes(0, axis)
        return x

def imresize(x, size=[100, 100], interp='bilinear', mode=None):
    if x.shape[-1] == 1:
        x = scipy.misc.imresize(x[:, :, 0], size, interp=interp, mode=mode)
        return x[:, :, np.newaxis]
    elif x.shape[-1] == 3:
        return scipy.misc.imresize(x, size, interp=interp, mode=mode)
    else:
        raise Exception("Unsupported channel %d" % x.shape[-1])

def prepro_img(x, mode=None):
    if mode=='train':
        x = flip_axis(x, axis=1, is_random=True)
        x = rotation(x, rg=16, is_random=True, fill_mode='nearest')
        x = imresize(x, size=[64 + 15, 64 + 15], interp='bilinear', mode=None)
        x = crop(x, wrg=64, hrg=64, is_random=True)
        x = x / (255. / 2.)
        x = x - 1.
    return x

def combine_and_save_image_sets(image_sets, directory):
    for i in range(len(image_sets[0])):
        combined_image = []
        for set_no in range(len(image_sets)):
            combined_image.append(image_sets[set_no][i])
            combined_image.append(np.zeros((image_sets[set_no][i].shape[0], 5, 3)))
        combined_image = np.concatenate(combined_image, axis = 1)
        scipy.misc.imsave(os.path.join(directory, 'combined_{}.jpg'.format(i)), combined_image)

def save(saver, sess, logdir, step):
    model_name = 'model.ckpt'
    checkpoint_path = os.path.join(logdir, model_name)
    if not os.path.exists(logdir):
        os.makedirs(logdir)
    saver.save(sess, checkpoint_path, global_step=step)
    print('The checkpoint has been created.')

def load(saver, sess, ckpt_path):
    saver.restore(sess, ckpt_path)
    print("Restored model parameters from {}".format(ckpt_path))

In [16]:
train_samples_dir = 'train_samples_1215_a'
if os.path.exists(train_samples_dir) == False:
    os.makedirs(train_samples_dir)

lr = 0.0002
lr_decay = 0.5      
decay_every = 200  
beta1 = 0.5
checkpoint_dir = './checkpoint_1215_a'
z_dim = 512
image_size = 64
c_dim = 3
batch_size = 64
ni = int(np.ceil(np.sqrt(batch_size)))

sample_size = batch_size
sample_seed = np.random.normal(loc=0.0, scale=1.0, size=(sample_size, z_dim)).astype(np.float32)
sample_sentence = ["a black bird with oily black feathers and rounded black beak."] * int(sample_size/ni) + \
                  ["a black bird with oily black feathers and rounded black beak."] * int(sample_size/ni) + \
                  ["a black bird with oily black feathers and rounded black beak."] * int(sample_size/ni) + \
                  ["a black bird with oily black feathers and rounded black beak."] * int(sample_size/ni) + \
                  ["a black bird with oily black feathers and rounded black beak."] * int(sample_size/ni) + \
                  ["a black bird with oily black feathers and rounded black beak."] * int(sample_size/ni) + \
                  ["a black bird with oily black feathers and rounded black beak."] * int(sample_size/ni) + \
                  ["a black bird with oily black feathers and rounded black beak."] * int(sample_size/ni)
for i, sent in enumerate(sample_sentence):
    sample_sentence[i] = sent2ID(sent)
sample_sentence = np.asarray(sample_sentence)
sample_sentence = np.reshape(sample_sentence, (sample_size, 20))
print(sample_sentence.shape)

(64, 20)


In [19]:
class Text2Img:
    def __init__(self):
        """ Information """
        self.lr = 0.0002
        self.lr_decay = 0.5      
        self.decay_every = 200  
        self.beta1 = 0.5
        self.z_dim = 512
        self.image_size = 64
        self.c_dim = 3
        self.batch_size = 64
        self.alpha = 0.2
        
        """ Place Holders """
        self.t_real_image = tf.placeholder('float32', [self.batch_size, self.image_size, image_size, 3], name = 'real_image')
        self.t_wrong_image = tf.placeholder('float32', [self.batch_size ,self.image_size, image_size, 3], name = 'wrong_image')
        self.t_real_caption = tf.placeholder(dtype=tf.int64, shape=[self.batch_size, None], name='real_caption_input')
        self.t_wrong_caption = tf.placeholder(dtype=tf.int64, shape=[self.batch_size, None], name='wrong_caption_input')
        self.t_z = tf.placeholder(tf.float32, [self.batch_size, self.z_dim], name='z_noise')
        
        """ Training Phase - CNN - RNN mapping """
        net_cnn = CNN_ENCODER(self.t_real_image, is_training=True, reuse=False)
        x = net_cnn.outputs
        v = RNN_ENCODER(self.t_real_caption, is_training=True, reuse=False).outputs
        x_w = CNN_ENCODER(self.t_wrong_image, is_training=True, reuse=True).outputs
        v_w = RNN_ENCODER(self.t_wrong_caption, is_training=True, reuse=True).outputs
        self.rnn_loss = tf.reduce_mean(tf.maximum(0., self.alpha - cosine_similarity(x, v) + cosine_similarity(x, v_w))) + \
                    tf.reduce_mean(tf.maximum(0., self.alpha - cosine_similarity(x, v) + cosine_similarity(x_w, v)))
        
        """ Training Phase - GAN """
        self.net_rnn = RNN_ENCODER(self.t_real_caption, is_training=False, reuse=True)
        net_fake_image = GENERATOR(self.t_z, self.net_rnn.outputs, is_training=True, reuse=False)
        net_disc_fake = DISCRIMINATOR(net_fake_image.outputs, self.net_rnn.outputs, is_training=True, reuse=False)
        disc_fake_logits = net_disc_fake.logits
        net_disc_real = DISCRIMINATOR(self.t_real_image, self.net_rnn.outputs, is_training=True, reuse=True)
        disc_real_logits = net_disc_real.logits
        net_disc_mismatch = DISCRIMINATOR(self.t_real_image, RNN_ENCODER(self.t_wrong_caption, is_training=False, reuse=True).outputs,
                                        is_training=True, reuse=True)
        disc_mismatch_logits = net_disc_mismatch.logits
        d_loss1 = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=disc_real_logits,     labels=tf.ones_like(disc_real_logits),      name='d1'))
        d_loss2 = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=disc_mismatch_logits, labels=tf.zeros_like(disc_mismatch_logits), name='d2'))
        d_loss3 = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=disc_fake_logits,     labels=tf.zeros_like(disc_fake_logits),     name='d3'))
        self.d_loss = d_loss1 + (d_loss2 + d_loss3) * 0.5
        self.g_loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=disc_fake_logits, labels=tf.ones_like(disc_fake_logits), name='g'))
        
        """ Testing Phase """
        self.net_g = GENERATOR(self.t_z, RNN_ENCODER(self.t_real_caption, is_training=False, reuse=True).outputs,
                            is_training=False, reuse=True)
        
        """ Training """
        rnn_vars = [var for var in tf.trainable_variables() if 'rnnencoder' in var.name]
        cnn_vars = [var for var in tf.trainable_variables() if 'cnnencoder' in var.name]
        d_vars = [var for var in tf.trainable_variables() if 'discriminator' in var.name]
        g_vars = [var for var in tf.trainable_variables() if 'generator' in var.name]
        update_ops_CNN = [var for var in tf.get_collection(tf.GraphKeys.UPDATE_OPS) if 'cnnencoder' in var.name]
        update_ops_D = [var for var in tf.get_collection(tf.GraphKeys.UPDATE_OPS) if 'discriminator' in var.name]
        update_ops_G = [var for var in tf.get_collection(tf.GraphKeys.UPDATE_OPS) if 'generator' in var.name]
        with tf.variable_scope('learning_rate'):
            self.lr_v = tf.Variable(self.lr, trainable=False)
        with tf.control_dependencies(update_ops_CNN):
            grads, _ = tf.clip_by_global_norm(tf.gradients(self.rnn_loss, rnn_vars + cnn_vars), 10)
            optimizer = tf.train.AdamOptimizer(self.lr_v, beta1=self.beta1)
            self.rnn_optim = optimizer.apply_gradients(zip(grads, rnn_vars + cnn_vars))
        with tf.control_dependencies(update_ops_D):
            self.d_optim = tf.train.AdamOptimizer(self.lr_v, beta1=self.beta1).minimize(self.d_loss, var_list=d_vars)
        with tf.control_dependencies(update_ops_G):
            self.g_optim = tf.train.AdamOptimizer(self.lr_v, beta1=self.beta1).minimize(self.g_loss, var_list=g_vars)

In [21]:
tf.reset_default_graph()
model = Text2Img()

config = tf.ConfigProto()
config.gpu_options.allow_growth = True
config.allow_soft_placement = True
sess = tf.Session(config=config)
init = tf.global_variables_initializer()
sess.run(init)
saver = tf.train.Saver(var_list=tf.global_variables(), max_to_keep=10)
ckpt = tf.train.get_checkpoint_state(checkpoint_dir)
if ckpt and ckpt.model_checkpoint_path:
    loader = tf.train.Saver(var_list=tf.global_variables())
    load_step = int(os.path.basename(ckpt.model_checkpoint_path).split('-')[1])
    load(loader, sess, ckpt.model_checkpoint_path)
else:
    print('no checkpoints find.')

n_epoch = 1000
n_batch_epoch = int(n_images_train / batch_size)
for epoch in range(n_epoch):
    start_time = time.time()
    if epoch !=0 and (epoch % decay_every == 0):
        new_lr_decay = lr_decay ** (epoch // decay_every)
        sess.run(tf.assign(model.lr_v, lr * new_lr_decay))
        log = " ** new learning rate: %f" % (lr * new_lr_decay)
        print(log)
    elif epoch == 0:
        log = " ** init lr: %f  decay_every_epoch: %d, lr_decay: %f" % (lr, decay_every, lr_decay)
        print(log)
    for step in range(n_batch_epoch):
        step_time = time.time()
        idexs = get_random_int(min=0, max=n_captions_train-1, number=batch_size)
        b_real_caption = train_captions[idexs]
        b_real_images = train_images[np.floor(np.asarray(idexs).astype('float') / n_captions_per_image).astype('int')]
        idexs = get_random_int(min=0, max=n_captions_train-1, number=batch_size)
        b_wrong_caption = train_captions[idexs]
        idexs2 = get_random_int(min=0, max=n_images_train-1, number=batch_size)
        b_wrong_images = train_images[idexs2]
        b_z = np.random.normal(loc=0.0, scale=1.0, size=(batch_size, z_dim)).astype(np.float32)
        b_real_images = threading_data(b_real_images, prepro_img, mode='train')
        b_wrong_images = threading_data(b_wrong_images, prepro_img, mode='train')
        if epoch < 300:
            errRNN, _ = sess.run([model.rnn_loss, model.rnn_optim], feed_dict={
                                            model.t_real_image : b_real_images,
                                            model.t_wrong_image : b_wrong_images,
                                            model.t_real_caption : b_real_caption,
                                            model.t_wrong_caption : b_wrong_caption})
        else:
            errRNN = 0   
        errD, _ = sess.run([model.d_loss, model.d_optim], feed_dict={
                            model.t_real_image : b_real_images,
                            model.t_wrong_caption : b_wrong_caption,
                            model.t_real_caption : b_real_caption,
                            model.t_z : b_z})
        errG, _ = sess.run([model.g_loss, model.g_optim], feed_dict={
                            model.t_real_caption : b_real_caption,
                            model.t_z : b_z})
    print("Epoch: [%d/%d] time: %4.4fs, d_loss: %.8f, g_loss: %.8f, rnn_loss: %.8f" \
                        % (epoch, n_epoch, time.time() - step_time, errD, errG, errRNN))
    if (epoch + 1) % 1 == 0:
        print(" ** Epoch %d took %fs" % (epoch, time.time()-start_time))
        img_gen, rnn_out = sess.run([model.net_g.outputs, model.net_rnn.outputs], feed_dict={
                                        model.t_real_caption : sample_sentence,
                                        model.t_z : sample_seed})
        save_images(img_gen, [ni, ni], 'train_samples_1215_a/train_{:02d}.png'.format(epoch))
    if (epoch != 0) and (epoch % 50) == 0:
        save(saver, sess, checkpoint_dir, epoch)
        print("[*] Save checkpoints SUCCESS!")
checkpoint_path = os.path.join(cfg.CHECKPOINT_DIR, cfg.CHECKPOINT_NAME)
saver.save(sess, checkpoint_path, global_step=epoch)
print('The checkpoint has been created.')

no checkpoints find.
 ** init lr: 0.000200  decay_every_epoch: 200, lr_decay: 0.500000


`imresize` is deprecated in SciPy 1.0.0, and will be removed in 1.3.0.
Use Pillow instead: ``numpy.array(Image.fromarray(arr).resize())``.


Epoch: [0/1000] time: 0.4526s, d_loss: 1.39677835, g_loss: 3.25727654, rnn_loss: 0.28294724
 ** Epoch 0 took 70.973406s


`imsave` is deprecated in SciPy 1.0.0, and will be removed in 1.2.0.
Use ``imageio.imwrite`` instead.


Epoch: [1/1000] time: 0.4483s, d_loss: 1.08312941, g_loss: 2.02957177, rnn_loss: 0.27835143
 ** Epoch 1 took 62.322615s
Epoch: [2/1000] time: 0.4504s, d_loss: 1.34336734, g_loss: 1.85429072, rnn_loss: 0.22829971
 ** Epoch 2 took 62.223853s
Epoch: [3/1000] time: 0.4546s, d_loss: 1.36559641, g_loss: 0.82441747, rnn_loss: 0.27085733
 ** Epoch 3 took 62.359390s
Epoch: [4/1000] time: 0.4489s, d_loss: 1.65723884, g_loss: 2.21429324, rnn_loss: 0.22263236
 ** Epoch 4 took 62.305847s
Epoch: [5/1000] time: 0.4511s, d_loss: 1.22167110, g_loss: 1.37054646, rnn_loss: 0.17334834
 ** Epoch 5 took 62.724688s
Epoch: [6/1000] time: 0.4441s, d_loss: 1.39337754, g_loss: 0.97370875, rnn_loss: 0.22804393
 ** Epoch 6 took 62.049126s
Epoch: [7/1000] time: 0.4447s, d_loss: 1.01644659, g_loss: 1.34860778, rnn_loss: 0.16401580
 ** Epoch 7 took 61.415032s
Epoch: [8/1000] time: 0.4460s, d_loss: 1.08504283, g_loss: 2.37551737, rnn_loss: 0.23385154
 ** Epoch 8 took 61.242856s
Epoch: [9/1000] time: 0.4406s, d_loss: 1

Epoch: [68/1000] time: 0.4477s, d_loss: 0.57435745, g_loss: 1.41203547, rnn_loss: 0.11899552
 ** Epoch 68 took 61.307806s
Epoch: [69/1000] time: 0.4488s, d_loss: 0.43365258, g_loss: 0.83381140, rnn_loss: 0.10913260
 ** Epoch 69 took 61.338626s
Epoch: [70/1000] time: 0.4446s, d_loss: 0.36439633, g_loss: 3.25707245, rnn_loss: 0.16765557
 ** Epoch 70 took 61.409520s
Epoch: [71/1000] time: 0.4385s, d_loss: 0.72157705, g_loss: 0.54475248, rnn_loss: 0.13295031
 ** Epoch 71 took 62.009081s
Epoch: [72/1000] time: 0.4397s, d_loss: 1.06414485, g_loss: 2.86718917, rnn_loss: 0.12723973
 ** Epoch 72 took 61.140329s
Epoch: [73/1000] time: 0.4367s, d_loss: 0.65720475, g_loss: 0.22930384, rnn_loss: 0.13412008
 ** Epoch 73 took 61.219543s
Epoch: [74/1000] time: 0.4429s, d_loss: 1.04844832, g_loss: 1.85211611, rnn_loss: 0.11930907
 ** Epoch 74 took 61.316349s
Epoch: [75/1000] time: 0.4402s, d_loss: 0.72206068, g_loss: 0.97092807, rnn_loss: 0.10329174
 ** Epoch 75 took 61.432708s
Epoch: [76/1000] time: 0

Epoch: [135/1000] time: 0.4455s, d_loss: 0.82348806, g_loss: 5.56333351, rnn_loss: 0.14405340
 ** Epoch 135 took 61.898005s
Epoch: [136/1000] time: 0.4423s, d_loss: 0.31512851, g_loss: 2.63049364, rnn_loss: 0.07639179
 ** Epoch 136 took 61.942282s
Epoch: [137/1000] time: 0.4457s, d_loss: 0.59404492, g_loss: 3.58719969, rnn_loss: 0.11104029
 ** Epoch 137 took 61.466860s
Epoch: [138/1000] time: 0.4461s, d_loss: 0.40660211, g_loss: 3.83510685, rnn_loss: 0.13480872
 ** Epoch 138 took 61.892293s
Epoch: [139/1000] time: 0.4455s, d_loss: 0.35278887, g_loss: 1.96575153, rnn_loss: 0.12190742
 ** Epoch 139 took 61.608941s
Epoch: [140/1000] time: 0.4448s, d_loss: 0.80812228, g_loss: 3.07921481, rnn_loss: 0.12482641
 ** Epoch 140 took 61.838987s
Epoch: [141/1000] time: 0.4462s, d_loss: 0.27204043, g_loss: 2.68164277, rnn_loss: 0.06444436
 ** Epoch 141 took 62.299920s
Epoch: [142/1000] time: 0.4437s, d_loss: 0.21372288, g_loss: 4.26452017, rnn_loss: 0.08651169
 ** Epoch 142 took 61.595062s
Epoch: [

The checkpoint has been created.
[*] Save checkpoints SUCCESS!
Epoch: [201/1000] time: 0.4428s, d_loss: 0.20356029, g_loss: 1.44445634, rnn_loss: 0.10186975
 ** Epoch 201 took 61.337691s
Epoch: [202/1000] time: 0.4427s, d_loss: 0.28181460, g_loss: 2.66722584, rnn_loss: 0.08691559
 ** Epoch 202 took 60.868838s
Epoch: [203/1000] time: 0.4403s, d_loss: 0.31179506, g_loss: 1.86932135, rnn_loss: 0.06049445
 ** Epoch 203 took 60.949792s
Epoch: [204/1000] time: 0.4407s, d_loss: 0.26664892, g_loss: 2.55959177, rnn_loss: 0.06957857
 ** Epoch 204 took 61.289769s
Epoch: [205/1000] time: 0.4426s, d_loss: 0.22687076, g_loss: 2.31266308, rnn_loss: 0.07165289
 ** Epoch 205 took 61.053741s
Epoch: [206/1000] time: 0.4481s, d_loss: 0.14329326, g_loss: 1.83651352, rnn_loss: 0.12971431
 ** Epoch 206 took 61.842380s
Epoch: [207/1000] time: 0.4486s, d_loss: 0.20269768, g_loss: 2.39168310, rnn_loss: 0.07360142
 ** Epoch 207 took 61.509089s
Epoch: [208/1000] time: 0.4464s, d_loss: 0.30105093, g_loss: 2.686451

Epoch: [267/1000] time: 0.4420s, d_loss: 0.23477641, g_loss: 3.05454922, rnn_loss: 0.09012578
 ** Epoch 267 took 61.657087s
Epoch: [268/1000] time: 0.4426s, d_loss: 0.16186604, g_loss: 2.68859339, rnn_loss: 0.08733658
 ** Epoch 268 took 61.562716s
Epoch: [269/1000] time: 0.4455s, d_loss: 0.23740953, g_loss: 2.86203480, rnn_loss: 0.08120967
 ** Epoch 269 took 61.186738s
Epoch: [270/1000] time: 0.4492s, d_loss: 0.36440700, g_loss: 1.41070676, rnn_loss: 0.07211971
 ** Epoch 270 took 61.411415s
Epoch: [271/1000] time: 0.4477s, d_loss: 0.08237178, g_loss: 4.20739460, rnn_loss: 0.08904888
 ** Epoch 271 took 61.274730s
Epoch: [272/1000] time: 0.4423s, d_loss: 1.07236958, g_loss: 4.15467501, rnn_loss: 0.09542677
 ** Epoch 272 took 61.710705s
Epoch: [273/1000] time: 0.4375s, d_loss: 0.12813896, g_loss: 4.14169264, rnn_loss: 0.10771483
 ** Epoch 273 took 61.404680s
Epoch: [274/1000] time: 0.4443s, d_loss: 0.08710026, g_loss: 3.42265701, rnn_loss: 0.08133576
 ** Epoch 274 took 61.217034s
Epoch: [

Epoch: [333/1000] time: 0.3814s, d_loss: 0.13486296, g_loss: 4.46625614, rnn_loss: 0.00000000
 ** Epoch 333 took 52.518762s
Epoch: [334/1000] time: 0.3760s, d_loss: 0.01855983, g_loss: 4.58971882, rnn_loss: 0.00000000
 ** Epoch 334 took 51.981394s
Epoch: [335/1000] time: 0.3770s, d_loss: 0.04642688, g_loss: 3.96350312, rnn_loss: 0.00000000
 ** Epoch 335 took 52.190089s
Epoch: [336/1000] time: 0.3699s, d_loss: 0.06923918, g_loss: 4.02246761, rnn_loss: 0.00000000
 ** Epoch 336 took 52.162308s
Epoch: [337/1000] time: 0.3763s, d_loss: 0.05042071, g_loss: 4.43398571, rnn_loss: 0.00000000
 ** Epoch 337 took 51.625433s
Epoch: [338/1000] time: 0.3735s, d_loss: 0.12528276, g_loss: 4.07130289, rnn_loss: 0.00000000
 ** Epoch 338 took 52.605779s
Epoch: [339/1000] time: 0.3760s, d_loss: 0.12030324, g_loss: 3.92543316, rnn_loss: 0.00000000
 ** Epoch 339 took 52.375752s
Epoch: [340/1000] time: 0.3764s, d_loss: 0.23278978, g_loss: 3.32425833, rnn_loss: 0.00000000
 ** Epoch 340 took 52.110970s
Epoch: [

Epoch: [399/1000] time: 0.3761s, d_loss: 0.26067021, g_loss: 2.92448235, rnn_loss: 0.00000000
 ** Epoch 399 took 51.856039s
 ** new learning rate: 0.000050
Epoch: [400/1000] time: 0.3787s, d_loss: 0.01210347, g_loss: 5.44188404, rnn_loss: 0.00000000
 ** Epoch 400 took 52.491945s
The checkpoint has been created.
[*] Save checkpoints SUCCESS!
Epoch: [401/1000] time: 0.3842s, d_loss: 0.00938686, g_loss: 6.57713699, rnn_loss: 0.00000000
 ** Epoch 401 took 52.225013s
Epoch: [402/1000] time: 0.3762s, d_loss: 0.02594681, g_loss: 4.57750082, rnn_loss: 0.00000000
 ** Epoch 402 took 52.184524s
Epoch: [403/1000] time: 0.3770s, d_loss: 0.04175401, g_loss: 4.29874420, rnn_loss: 0.00000000
 ** Epoch 403 took 53.218473s
Epoch: [404/1000] time: 0.3784s, d_loss: 0.02436842, g_loss: 4.51301193, rnn_loss: 0.00000000
 ** Epoch 404 took 52.300745s
Epoch: [405/1000] time: 0.3768s, d_loss: 0.01791300, g_loss: 4.86187267, rnn_loss: 0.00000000
 ** Epoch 405 took 52.274415s
Epoch: [406/1000] time: 0.3760s, d_lo

Epoch: [464/1000] time: 0.3756s, d_loss: 0.03644995, g_loss: 6.32729292, rnn_loss: 0.00000000
 ** Epoch 464 took 52.063684s
Epoch: [465/1000] time: 0.3743s, d_loss: 0.04405097, g_loss: 6.02743816, rnn_loss: 0.00000000
 ** Epoch 465 took 52.034441s
Epoch: [466/1000] time: 0.3751s, d_loss: 0.07276414, g_loss: 3.67867112, rnn_loss: 0.00000000
 ** Epoch 466 took 52.022748s
Epoch: [467/1000] time: 0.3796s, d_loss: 0.02548708, g_loss: 4.85992622, rnn_loss: 0.00000000
 ** Epoch 467 took 52.502822s
Epoch: [468/1000] time: 0.3791s, d_loss: 0.06704424, g_loss: 4.85019779, rnn_loss: 0.00000000
 ** Epoch 468 took 52.721846s
Epoch: [469/1000] time: 0.3833s, d_loss: 0.00483389, g_loss: 6.78397083, rnn_loss: 0.00000000
 ** Epoch 469 took 52.148387s
Epoch: [470/1000] time: 0.3824s, d_loss: 0.04015246, g_loss: 3.82990909, rnn_loss: 0.00000000
 ** Epoch 470 took 52.389028s
Epoch: [471/1000] time: 0.3796s, d_loss: 0.00693915, g_loss: 5.88411140, rnn_loss: 0.00000000
 ** Epoch 471 took 52.366512s
Epoch: [

Epoch: [530/1000] time: 0.3814s, d_loss: 0.01022718, g_loss: 5.21137142, rnn_loss: 0.00000000
 ** Epoch 530 took 52.543090s
Epoch: [531/1000] time: 0.3765s, d_loss: 0.03206986, g_loss: 4.11842251, rnn_loss: 0.00000000
 ** Epoch 531 took 52.669337s
Epoch: [532/1000] time: 0.3801s, d_loss: 0.00363065, g_loss: 6.58688545, rnn_loss: 0.00000000
 ** Epoch 532 took 52.430453s
Epoch: [533/1000] time: 0.3805s, d_loss: 0.04057636, g_loss: 4.11376238, rnn_loss: 0.00000000
 ** Epoch 533 took 53.293386s
Epoch: [534/1000] time: 0.3798s, d_loss: 0.06685898, g_loss: 3.11948967, rnn_loss: 0.00000000
 ** Epoch 534 took 52.428019s
Epoch: [535/1000] time: 0.3836s, d_loss: 0.05296512, g_loss: 4.02168798, rnn_loss: 0.00000000
 ** Epoch 535 took 52.405428s
Epoch: [536/1000] time: 0.3800s, d_loss: 0.12641668, g_loss: 4.83500671, rnn_loss: 0.00000000
 ** Epoch 536 took 52.642856s
Epoch: [537/1000] time: 0.3814s, d_loss: 0.02969495, g_loss: 3.06520033, rnn_loss: 0.00000000
 ** Epoch 537 took 52.843877s
Epoch: [

Epoch: [596/1000] time: 0.3829s, d_loss: 0.02649551, g_loss: 5.67996407, rnn_loss: 0.00000000
 ** Epoch 596 took 53.450839s
Epoch: [597/1000] time: 0.4067s, d_loss: 0.00289816, g_loss: 9.80435371, rnn_loss: 0.00000000
 ** Epoch 597 took 53.449812s
Epoch: [598/1000] time: 0.3788s, d_loss: 0.06167121, g_loss: 2.86602068, rnn_loss: 0.00000000
 ** Epoch 598 took 53.305729s
Epoch: [599/1000] time: 0.3785s, d_loss: 0.03551814, g_loss: 4.02850008, rnn_loss: 0.00000000
 ** Epoch 599 took 52.552694s
 ** new learning rate: 0.000025
Epoch: [600/1000] time: 0.3801s, d_loss: 0.05294380, g_loss: 4.22082615, rnn_loss: 0.00000000
 ** Epoch 600 took 53.349433s
The checkpoint has been created.
[*] Save checkpoints SUCCESS!
Epoch: [601/1000] time: 0.3893s, d_loss: 0.01994598, g_loss: 4.28329897, rnn_loss: 0.00000000
 ** Epoch 601 took 52.854660s
Epoch: [602/1000] time: 0.3841s, d_loss: 0.03840312, g_loss: 3.96128368, rnn_loss: 0.00000000
 ** Epoch 602 took 53.092351s
Epoch: [603/1000] time: 0.3858s, d_lo

Epoch: [661/1000] time: 0.4078s, d_loss: 0.00214471, g_loss: 9.45944977, rnn_loss: 0.00000000
 ** Epoch 661 took 53.563015s
Epoch: [662/1000] time: 0.3850s, d_loss: 0.00138806, g_loss: 8.84240341, rnn_loss: 0.00000000
 ** Epoch 662 took 53.522699s
Epoch: [663/1000] time: 0.3853s, d_loss: 0.03717446, g_loss: 4.10726500, rnn_loss: 0.00000000
 ** Epoch 663 took 53.430427s
Epoch: [664/1000] time: 0.3757s, d_loss: 0.01496491, g_loss: 4.46892262, rnn_loss: 0.00000000
 ** Epoch 664 took 52.815218s
Epoch: [665/1000] time: 0.3908s, d_loss: 0.00917734, g_loss: 6.07565308, rnn_loss: 0.00000000
 ** Epoch 665 took 53.360471s
Epoch: [666/1000] time: 0.3927s, d_loss: 0.00341166, g_loss: 8.30986595, rnn_loss: 0.00000000
 ** Epoch 666 took 53.743068s
Epoch: [667/1000] time: 0.3879s, d_loss: 0.13887776, g_loss: 3.93472195, rnn_loss: 0.00000000
 ** Epoch 667 took 54.144067s
Epoch: [668/1000] time: 0.3917s, d_loss: 0.04819794, g_loss: 3.88288212, rnn_loss: 0.00000000
 ** Epoch 668 took 53.994199s
Epoch: [

Epoch: [727/1000] time: 0.3973s, d_loss: 0.07473493, g_loss: 5.97950840, rnn_loss: 0.00000000
 ** Epoch 727 took 54.119787s
Epoch: [728/1000] time: 0.3896s, d_loss: 0.03140585, g_loss: 4.68763924, rnn_loss: 0.00000000
 ** Epoch 728 took 54.194714s
Epoch: [729/1000] time: 0.3947s, d_loss: 0.00069258, g_loss: 11.97150803, rnn_loss: 0.00000000
 ** Epoch 729 took 54.220334s
Epoch: [730/1000] time: 0.4176s, d_loss: 0.04136960, g_loss: 4.27302265, rnn_loss: 0.00000000
 ** Epoch 730 took 54.399343s
Epoch: [731/1000] time: 0.3985s, d_loss: 0.03883731, g_loss: 3.87454748, rnn_loss: 0.00000000
 ** Epoch 731 took 54.891918s
Epoch: [732/1000] time: 0.3891s, d_loss: 0.07054738, g_loss: 6.52570820, rnn_loss: 0.00000000
 ** Epoch 732 took 54.304127s
Epoch: [733/1000] time: 0.3991s, d_loss: 0.00872412, g_loss: 5.67878580, rnn_loss: 0.00000000
 ** Epoch 733 took 54.126649s
Epoch: [734/1000] time: 0.3945s, d_loss: 0.50476122, g_loss: 1.14347339, rnn_loss: 0.00000000
 ** Epoch 734 took 54.203458s
Epoch: 

Epoch: [793/1000] time: 0.3882s, d_loss: 0.00750701, g_loss: 6.54685545, rnn_loss: 0.00000000
 ** Epoch 793 took 54.966171s
Epoch: [794/1000] time: 0.3952s, d_loss: 0.01478549, g_loss: 5.31592274, rnn_loss: 0.00000000
 ** Epoch 794 took 54.570474s
Epoch: [795/1000] time: 0.3996s, d_loss: 0.00228576, g_loss: 9.25747967, rnn_loss: 0.00000000
 ** Epoch 795 took 54.496722s
Epoch: [796/1000] time: 0.3947s, d_loss: 0.08866515, g_loss: 5.54889297, rnn_loss: 0.00000000
 ** Epoch 796 took 54.461194s
Epoch: [797/1000] time: 0.3988s, d_loss: 0.01574985, g_loss: 6.81949139, rnn_loss: 0.00000000
 ** Epoch 797 took 54.526120s
Epoch: [798/1000] time: 0.3963s, d_loss: 0.00373009, g_loss: 6.60390615, rnn_loss: 0.00000000
 ** Epoch 798 took 54.825537s
Epoch: [799/1000] time: 0.3928s, d_loss: 0.01348102, g_loss: 6.43580675, rnn_loss: 0.00000000
 ** Epoch 799 took 55.224668s
 ** new learning rate: 0.000013
Epoch: [800/1000] time: 0.4037s, d_loss: 0.29555830, g_loss: 2.98319912, rnn_loss: 0.00000000
 ** Ep

KeyboardInterrupt: 

## 3. Evaluation metric

In [None]:
def generate_r_precision_data():
    caption_ids = np.reshape(np.asarray(test_dataset.captions_ids), (-1, cfg.TEXT.WORDS_NUM))
    captions_ids_wrong = np.reshape(test_dataset.random_wrong_captions(), (-1, cfg.WRONG_CAPTION, cfg.TEXT.WORDS_NUM))

    sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True))
    init = tf.global_variables_initializer()
    sess.run(init)

    # load the trained checkpoint
    checkpoint_dir = cfg.CHECKPOINT_DIR
    if checkpoint_dir is not None:
        loader = tf.train.Saver(var_list=tf.global_variables())
        ckpt_path = os.path.join(cfg.CHECKPOINT_DIR, CHECKPOINT_NAME)
        loader.restore(sess, ckpt_path)
        print("Restored model parameters from {}".format(ckpt_path))
    else:
        print('no checkpoints find.')

    n_caption_test = len(caption_ids)
    num_batches = n_caption_test // cfg.BATCH_SIZE

    true_cnn_features = np.zeros((num_batches, cfg.BATCH_SIZE, cfg.TEXT.EMBEDDING_DIM), dtype=float)
    true_rnn_features = np.zeros((num_batches, cfg.BATCH_SIZE, cfg.TEXT.EMBEDDING_DIM), dtype=float)
    wrong_rnn_features = np.zeros((num_batches, cfg.WRONG_CAPTION, cfg.BATCH_SIZE, cfg.TEXT.EMBEDDING_DIM), dtype=float)

    for i in range(num_batches):
        test_cap = caption_ids[i * cfg.BATCH_SIZE: (i + 1) * cfg.BATCH_SIZE]

        z = np.random.normal(loc=0.0, scale=1.0, size=(cfg.BATCH_SIZE, cfg.GAN.Z_DIM)).astype(np.float32)
        
        rnn_features = sess.run(rnn_encoder.outputs, feed_dict={t_real_caption: test_cap})
        gen = sess.run(generator.outputs, feed_dict={t_real_caption: test_cap, t_z: z})
        cnn_features = sess.run(cnn_encoder.outputs, feed_dict={t_real_image: gen})

        true_cnn_features[i] = cnn_features
        true_rnn_features[i] = rnn_features

        for per_wrong_caption in range(cfg.WRONG_CAPTION):
            test_cap = captions_ids_wrong[i * cfg.BATCH_SIZE: (i + 1) * cfg.BATCH_SIZE]
            rnn_features = sess.run(rnn_encoder.outputs, feed_dict={t_real_caption: test_cap[:, per_wrong_caption]})
            wrong_rnn_features[i, per_wrong_caption] = rnn_features
    
    # if exists, remove the existing file first
    try:
        os.remove(os.path.join(cfg.R_PRECISION_DIR, cfg.R_PRECISION_FILE))
    except OSError:
        pass
    np.savez(os.path.join(cfg.R_PRECISION_DIR, cfg.R_PRECISION_FILE), true_cnn=true_cnn_features, true_rnn=true_rnn_features,
             wrong_rnn=wrong_rnn_features)

In [None]:
def generate_inception_score_data():
    caption_ids = np.reshape(np.asarray(test_dataset.captions_ids),
                             (-1, cfg.TEXT.CAPTIONS_PER_IMAGE, cfg.TEXT.WORDS_NUM))
    
    sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True))
    init = tf.global_variables_initializer()
    sess.run(init)

    checkpoint_dir = cfg.CHECKPOINT_DIR
    if checkpoint_dir is not None:
        loader = tf.train.Saver(var_list=tf.global_variables())
        ckpt_path = os.path.join(cfg.CHECKPOINT_DIR, cfg.CHECKPOINT_NAME)
        loader.restore(sess, ckpt_path)
        print("Restored model parameters from {}".format(ckpt_path))
    else:
        print('no checkpoints find.')

    n_caption_test = len(caption_ids)
    num_batches = n_caption_test // cfg.BATCH_SIZE

    for i in range(num_batches):
        for per_caption in range(cfg.TEXT.CAPTIONS_PER_IMAGE):
            test_cap = caption_ids[i * cfg.BATCH_SIZE: (i + 1) * cfg.BATCH_SIZE, per_caption]
            test_directory = test_dataset.filenames[i * cfg.BATCH_SIZE: (i + 1) * cfg.BATCH_SIZE]

            z = np.random.normal(loc=0.0, scale=1.0, size=(cfg.BATCH_SIZE, cfg.GAN.Z_DIM)).astype(np.float32)
            gen = sess.run(generator.outputs, feed_dict={t_real_caption: test_cap, t_z: z})
            
            for j in range(cfg.BATCH_SIZE):
                if not os.path.exists(os.path.join(cfg.TEST.GENERATED_TEST_IMAGES, test_directory[j].split('/')[0])):
                    os.mkdir(os.path.join(cfg.TEST.GENERATED_TEST_IMAGES, test_directory[j].split('/')[0]))

                scipy.misc.imsave(os.path.join(cfg.TEST.GENERATED_TEST_IMAGES, test_directory[j] + '_{}.png'.format(per_caption)), gen[j])

In [None]:
generate_r_precision_data()

In [None]:
generate_inception_score_data()

## 3. Measure Inception score and R-precision of given test dataset

After set the config file as 'eval_birds.yml' and run the 'generate_inception_score_data()' and 'generate_r_precision_data()', the synthesized images based on given captions and set of image and caption features should be saved inside a 'evaluation' folder, specifically in 'evaluation/generated_images/..' and as 'evaluation/r_precision.npz' respectively.

**Then, go to the 'evaluation' folder and run each 'inception_score.ipynb' and 'r_precision.ipynb' file in order to measure inception score and r-precision score.**