# M2177.003100 Deep Learning <br> Final Proejct: Text to Image Synthesis (Tensorflow)

Copyright (C) Data Science & AI Laboratory, Seoul National University. This material is for educational uses only. Some contents are based on the material provided by other paper/book authors and may be copyrighted by them. 

**For understanding of this work, please carefully look at given PPT file.**

**Note**: certain details are missing or ambiguous on purpose, in order to test your knowledge on the related materials. However, if you really feel that something essential is missing and cannot proceed to the next step, then contact the teaching staff with clear description of your problem.

### Submitting your work:
<font color=red>**DO NOT clear the training process **</font> so that TAs can grade both your code and results.  
**The TA will set a config file as 'eval_birds.yml' when evaluating the code using 'hidden test dataset'. Thus, please make sure that your code can generate proper data to measure inception score and R-precision of 'hidden test dataset'.**

## 1. Load datasets
The Birds dataset will be downloaded automatically if it is not located in the *data* directory. <br>

In [12]:
import tensorflow as tf
import matplotlib.pyplot as plt
import os, nltk
from miscc.config import cfg, cfg_from_file
import pprint
import datetime
import dateutil.tz
import numpy as np
import scipy
from utils.data_utils import CUBDataset
from utils.loss import cosine_similarity
import pandas as pd
from scipy.io import loadmat
import re
import string
import random
import time

#################################################
# DO NOT CHANGE 
from utils.model_1215_a import CNN_ENCODER, RNN_ENCODER, GENERATOR, DISCRIMINATOR
#################################################

%matplotlib inline

In [2]:
# Set a config file as 'train_birds.yml' in training, as 'eval_birds.yml' for evaluation
cfg_from_file('cfg/train_birds.yml') # eval_birds.yml

print('Using config:')
pprint.pprint(cfg)

os.environ['CUDA_VISIBLE_DEVICES'] = cfg.GPU_ID

now = datetime.datetime.now(dateutil.tz.tzlocal())
timestamp = now.strftime('%Y_%m_%d_%H_%M_%S')
output_dir = 'sample/%s_%s_%s' % (cfg.DATASET_NAME, cfg.CONFIG_NAME, timestamp)

Using config:
{'BATCH_SIZE': 64,
 'CHECKPOINT_DIR': './checkpoint',
 'CHECKPOINT_NAME': 'model.ckpt',
 'CNN': {'EMBEDDING_DIM': 0, 'H_DIM': 0},
 'CONFIG_NAME': 'text-to-image',
 'CUDA': False,
 'DATASET_NAME': 'birds',
 'DATA_DIR': 'data/birds',
 'EMBEDDING_TYPE': 'cnn-rnn',
 'GAN': {'B_ATTENTION': False,
         'B_CONDITION': False,
         'B_DCGAN': False,
         'CONDITION_DIM': 0,
         'DF_DIM': 0,
         'EMBEDDING_DIM': 0,
         'GF_DIM': 0,
         'R_NUM': 0,
         'Z_DIM': 512},
 'GPU_ID': '0',
 'IMAGE_SIZE': 256,
 'NUM_BATCH_FOR_TEST': 0,
 'RANDOM_SEED': 0,
 'RNN': {'EMBEDDING_DIM': 0,
         'H_DIM': 0,
         'TYPE': '',
         'VOCAB_SIZE': 0,
         'WORD_EMBEDDING_DIM': 0},
 'R_PRECISION_DIR': './evaluation',
 'R_PRECISION_FILE': 'r_precision.npz',
 'R_PRECISION_FILE_HIDDEN': 'r_precision_hidden.npz',
 'TEST': {'B_EXAMPLE': False,
          'GENERATED_HIDDEN_TEST_IMAGES': './evaluation/generated_images_hidden',
          'GENERATED_TEST_IMAGES'

  yaml_cfg = edict(yaml.load(f))


In [3]:
train_dataset = CUBDataset(cfg.DATA_DIR, split='train')
test_dataset = CUBDataset(cfg.DATA_DIR, split='test')

print(f'\ntrain data directory:\n{train_dataset.split_dir}')
print(f'test data directory:\n{test_dataset.split_dir}\n')

print(f'# of train filenames:{train_dataset.filenames.shape}')
print(f'# of test filenames:{test_dataset.filenames.shape}\n')

print(f'example of filename of train image:{train_dataset.filenames[0]}')
print(f'example of filename of valid image:{test_dataset.filenames[0]}\n')

print(f'example of caption and its ids:\n{train_dataset.captions[0]}\n{train_dataset.captions_ids[0]}\n')
print(f'example of caption and its ids:\n{test_dataset.captions[0]}\n{test_dataset.captions_ids[0]}\n')

print(f'# of train captions:{np.asarray(train_dataset.captions).shape}')
print(f'# of test captions:{np.asarray(test_dataset.captions).shape}\n')

print(f'# of train caption ids:{np.asarray(train_dataset.captions_ids).shape}')
print(f'# of test caption ids:{np.asarray(test_dataset.captions_ids).shape}\n')

print(f'# of train images:{train_dataset.images.shape}')
print(f'# of test images:{test_dataset.images.shape}\n')

self.current_dir:
/home/chszerg/final-project-deep-learning-19-tf

self.data_dir:
/home/chszerg/final-project-deep-learning-19-tf/data/birds

self.image_dir:
/home/chszerg/final-project-deep-learning-19-tf/data/birds/CUB_200_2011.tgz

Dataset already exists
self.image_dir:
/home/chszerg/final-project-deep-learning-19-tf/data/birds/CUB_200_2011/images

Load from:  data/birds/captions.pickle
self.current_dir:
/home/chszerg/final-project-deep-learning-19-tf

self.data_dir:
/home/chszerg/final-project-deep-learning-19-tf/data/birds

self.image_dir:
/home/chszerg/final-project-deep-learning-19-tf/data/birds/CUB_200_2011.tgz

Dataset already exists
self.image_dir:
/home/chszerg/final-project-deep-learning-19-tf/data/birds/CUB_200_2011/images

Load from:  data/birds/captions.pickle

train data directory:
/home/chszerg/final-project-deep-learning-19-tf/data/birds/train
test data directory:
/home/chszerg/final-project-deep-learning-19-tf/data/birds/test

# of train filenames:(8855,)
# of test f

In [4]:
train_images = train_dataset.images
test_images = test_dataset.images
train_captions = np.asarray(train_dataset.captions_ids)
test_captions = np.asarray(test_dataset.captions_ids)
print(train_images.shape)
print(test_images.shape)
print(train_captions.shape)
print(test_captions.shape)

(8855, 256, 256, 3)
(2933, 256, 256, 3)
(88550, 20)
(29330, 20)


In [5]:
from skimage.transform import resize
train_images_64 = []
for train_image in train_images:
    train_images_64.append(resize(train_image, (64, 64, 3)))
train_images_64 = np.asarray(train_images_64)
print(train_images_64.shape)
assert train_images_64.shape[0] == train_images.shape[0]
test_images_64 = []
for test_image in test_images:
    test_images_64.append(resize(test_image, (64, 64, 3)))
test_images_64 = np.asarray(test_images_64)
print(test_images_64.shape)
assert test_images_64.shape[0] == test_images.shape[0]

(8855, 64, 64, 3)
(2933, 64, 64, 3)


In [6]:
train_images = train_images_64
test_images = test_images_64
n_captions_train = len(train_captions)
n_captions_per_image = 10
n_images_train = len(train_images)

In [7]:
import scipy.misc
import threading
import scipy.ndimage as ndi
from skimage import transform
from skimage import exposure
import skimage
from nltk.tokenize import RegexpTokenizer

def sent2ID(sample_sentence):
    caption = []
    cap = sample_sentence
    if len(cap) == 0:
        exit()
    cap = cap.replace("\ufffd\ufffd", " ")
    tokenizer = RegexpTokenizer(r'\w+')
    tokens = tokenizer.tokenize(cap.lower())
    tokens_new = []
    for t in tokens:
        t = t.encode('ascii', 'ignore').decode('ascii')
        if len(t) > 0:
            tokens_new.append(t)
    caption.append(tokens_new)
    caption_new = []
    t = caption[0]
    rev = []
    for w in t:
        if w in train_dataset.wordtoix:
            rev.append(train_dataset.wordtoix[w])
    x, x_len = train_dataset.get_caption(rev)
    caption_new.append(np.squeeze(x, axis=1))
    return caption_new

def ID2sent(sample_caption):
    sentence = []
    for ID in sample_caption:
        if ID != train_dataset.ixtoword['<PAD>']:
            sentence.append(train_dataset.ixtoword[ID])
    return sentence

def get_random_int(min=0, max=10, number=5):
    return [random.randint(min,max) for p in range(0,number)]

def merge(images, size):
    h, w = images.shape[1], images.shape[2]
    img = np.zeros((h * size[0], w * size[1], 3))
    for idx, image in enumerate(images):
        i = idx % size[1]
        j = idx // size[1]
        img[j*h:j*h+h, i*w:i*w+w, :] = image
    return img

def imsave(images, size, path):
    return scipy.misc.imsave(path, merge(images, size))

def save_images(images, size, image_path):
    return imsave(images, size, image_path)

def threading_data(data=None, fn=None, **kwargs):
    def apply_fn(results, i, data, kwargs):
        results[i] = fn(data, **kwargs)
    results = [None] * len(data)
    threads = []
    for i in range(len(data)):
        t = threading.Thread(
                        name='threading_and_return',
                        target=apply_fn,
                        args=(results, i, data[i], kwargs)
                        )
        t.start()
        threads.append(t)
    for t in threads:
        t.join()
    return np.asarray(results)

def apply_transform(x, transform_matrix, channel_index=2, fill_mode='nearest', cval=0., order=1):
    x = np.rollaxis(x, channel_index, 0)
    final_affine_matrix = transform_matrix[:2, :2]
    final_offset = transform_matrix[:2, 2]
    channel_images = [ndi.interpolation.affine_transform(x_channel, final_affine_matrix,
                      final_offset, order=order, mode=fill_mode, cval=cval) for x_channel in x]
    x = np.stack(channel_images, axis=0)
    x = np.rollaxis(x, 0, channel_index + 1)
    return x

def transform_matrix_offset_center(matrix, x, y):
    o_x = float(x) / 2 + 0.5
    o_y = float(y) / 2 + 0.5
    offset_matrix = np.array([[1, 0, o_x], [0, 1, o_y], [0, 0, 1]])
    reset_matrix = np.array([[1, 0, -o_x], [0, 1, -o_y], [0, 0, 1]])
    transform_matrix = np.dot(np.dot(offset_matrix, matrix), reset_matrix)
    return transform_matrix

def rotation(x, rg=20, is_random=False, row_index=0, col_index=1, channel_index=2,
                    fill_mode='nearest', cval=0.):
    if is_random:
        theta = np.pi / 180 * np.random.uniform(-rg, rg)
    else:
        theta = np.pi / 180 * rg
    rotation_matrix = np.array([[np.cos(theta), -np.sin(theta), 0],
                                [np.sin(theta), np.cos(theta), 0],
                                [0, 0, 1]])
    h, w = x.shape[row_index], x.shape[col_index]
    transform_matrix = transform_matrix_offset_center(rotation_matrix, h, w)
    x = apply_transform(x, transform_matrix, channel_index, fill_mode, cval)
    return x

def crop(x, wrg, hrg, is_random=False, row_index=0, col_index=1, channel_index=2):
    h, w = x.shape[row_index], x.shape[col_index]
    assert (h > hrg) and (w > wrg), "The size of cropping should smaller than the original image"
    if is_random:
        h_offset = int(np.random.uniform(0, h-hrg) - 1)
        w_offset = int(np.random.uniform(0, w-wrg) - 1)
        return x[h_offset: hrg + h_offset ,w_offset: wrg + w_offset]
    else:
        h_offset = int(np.floor((h - hrg)/ 2.))
        w_offset = int(np.floor((w - wrg)/ 2.))
        h_end = h_offset + hrg
        w_end = w_offset + wrg
        return x[h_offset: h_end, w_offset: w_end]

def flip_axis(x, axis, is_random=False):
    if is_random:
        factor = np.random.uniform(-1, 1)
        if factor > 0:
            x = np.asarray(x).swapaxes(axis, 0)
            x = x[::-1, ...]
            x = x.swapaxes(0, axis)
            return x
        else:
            return x
    else:
        x = np.asarray(x).swapaxes(axis, 0)
        x = x[::-1, ...]
        x = x.swapaxes(0, axis)
        return x

def imresize(x, size=[100, 100], interp='bilinear', mode=None):
    if x.shape[-1] == 1:
        x = scipy.misc.imresize(x[:, :, 0], size, interp=interp, mode=mode)
        return x[:, :, np.newaxis]
    elif x.shape[-1] == 3:
        return scipy.misc.imresize(x, size, interp=interp, mode=mode)
    else:
        raise Exception("Unsupported channel %d" % x.shape[-1])

def prepro_img(x, mode=None):
    if mode=='train':
        x = flip_axis(x, axis=1, is_random=True)
        x = rotation(x, rg=16, is_random=True, fill_mode='nearest')
        x = imresize(x, size=[64 + 15, 64 + 15], interp='bilinear', mode=None)
        x = crop(x, wrg=64, hrg=64, is_random=True)
        x = x / (255. / 2.)
        x = x - 1.
    return x

def combine_and_save_image_sets(image_sets, directory):
    for i in range(len(image_sets[0])):
        combined_image = []
        for set_no in range(len(image_sets)):
            combined_image.append(image_sets[set_no][i])
            combined_image.append(np.zeros((image_sets[set_no][i].shape[0], 5, 3)))
        combined_image = np.concatenate(combined_image, axis = 1)
        scipy.misc.imsave(os.path.join(directory, 'combined_{}.jpg'.format(i)), combined_image)

def save(saver, sess, logdir, step):
    model_name = 'model.ckpt'
    checkpoint_path = os.path.join(logdir, model_name)
    if not os.path.exists(logdir):
        os.makedirs(logdir)
    saver.save(sess, checkpoint_path, global_step=step)
    print('The checkpoint has been created.')

def load(saver, sess, ckpt_path):
    saver.restore(sess, ckpt_path)
    print("Restored model parameters from {}".format(ckpt_path))

In [8]:
train_samples_dir = 'train_samples_1215_b'
if os.path.exists(train_samples_dir) == False:
    os.makedirs(train_samples_dir)

lr = 0.0002
lr_decay = 0.5      
decay_every = 200  
beta1 = 0.5
checkpoint_dir = './checkpoint_1215_b'
z_dim = 512
image_size = 64
c_dim = 3
batch_size = 64
ni = int(np.ceil(np.sqrt(batch_size)))

sample_size = batch_size
sample_seed = np.random.normal(loc=0.0, scale=1.0, size=(sample_size, z_dim)).astype(np.float32)
sample_sentence = ["a black bird with oily black feathers and rounded black beak."] * int(sample_size/ni) + \
                  ["a black bird with oily black feathers and rounded black beak."] * int(sample_size/ni) + \
                  ["a black bird with oily black feathers and rounded black beak."] * int(sample_size/ni) + \
                  ["a black bird with oily black feathers and rounded black beak."] * int(sample_size/ni) + \
                  ["a black bird with oily black feathers and rounded black beak."] * int(sample_size/ni) + \
                  ["a black bird with oily black feathers and rounded black beak."] * int(sample_size/ni) + \
                  ["a black bird with oily black feathers and rounded black beak."] * int(sample_size/ni) + \
                  ["a black bird with oily black feathers and rounded black beak."] * int(sample_size/ni)
for i, sent in enumerate(sample_sentence):
    sample_sentence[i] = sent2ID(sent)
sample_sentence = np.asarray(sample_sentence)
sample_sentence = np.reshape(sample_sentence, (sample_size, 20))
print(sample_sentence.shape)

(64, 20)


In [13]:
class Text2Img:
    def __init__(self):
        """ Information """
        self.lr = 0.0002
        self.lr_decay = 0.5      
        self.decay_every = 200  
        self.beta1 = 0.5
        self.z_dim = 512
        self.image_size = 64
        self.c_dim = 3
        self.batch_size = 64
        self.alpha = 0.2
        
        """ Place Holders """
        self.t_real_image = tf.placeholder('float32', [self.batch_size, self.image_size, image_size, 3], name = 'real_image')
        self.t_wrong_image = tf.placeholder('float32', [self.batch_size ,self.image_size, image_size, 3], name = 'wrong_image')
        self.t_real_caption = tf.placeholder(dtype=tf.int64, shape=[self.batch_size, None], name='real_caption_input')
        self.t_wrong_caption = tf.placeholder(dtype=tf.int64, shape=[self.batch_size, None], name='wrong_caption_input')
        self.t_z = tf.placeholder(tf.float32, [self.batch_size, self.z_dim], name='z_noise')
        
        """ Training Phase - CNN - RNN mapping """
        net_cnn = CNN_ENCODER(self.t_real_image, is_training=True, reuse=False)
        x = net_cnn.outputs
        v = RNN_ENCODER(self.t_real_caption, is_training=True, reuse=False).outputs
        x_w = CNN_ENCODER(self.t_wrong_image, is_training=True, reuse=True).outputs
        v_w = RNN_ENCODER(self.t_wrong_caption, is_training=True, reuse=True).outputs
        self.rnn_loss = tf.reduce_mean(tf.maximum(0., self.alpha - cosine_similarity(x, v) + cosine_similarity(x, v_w))) + \
                    tf.reduce_mean(tf.maximum(0., self.alpha - cosine_similarity(x, v) + cosine_similarity(x_w, v)))
        
        """ Training Phase - GAN """
        self.net_rnn = RNN_ENCODER(self.t_real_caption, is_training=False, reuse=True)
        net_fake_image = GENERATOR(self.t_z, self.net_rnn.outputs, is_training=True, reuse=False)
        net_disc_fake = DISCRIMINATOR(net_fake_image.outputs, self.net_rnn.outputs, is_training=True, reuse=False)
        disc_fake_logits = net_disc_fake.logits
        net_disc_real = DISCRIMINATOR(self.t_real_image, self.net_rnn.outputs, is_training=True, reuse=True)
        disc_real_logits = net_disc_real.logits
        net_disc_mismatch = DISCRIMINATOR(self.t_real_image, RNN_ENCODER(self.t_wrong_caption, is_training=False, reuse=True).outputs,
                                        is_training=True, reuse=True)
        disc_mismatch_logits = net_disc_mismatch.logits
        d_loss1 = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=disc_real_logits,     labels=tf.ones_like(disc_real_logits),      name='d1'))
        d_loss2 = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=disc_mismatch_logits, labels=tf.zeros_like(disc_mismatch_logits), name='d2'))
        d_loss3 = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=disc_fake_logits,     labels=tf.zeros_like(disc_fake_logits),     name='d3'))
        self.d_loss = d_loss1 + (d_loss2 + d_loss3) * 0.5
        self.g_loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=disc_fake_logits, labels=tf.ones_like(disc_fake_logits), name='g'))
        
        """ Testing Phase """
        self.net_g = GENERATOR(self.t_z, RNN_ENCODER(self.t_real_caption, is_training=False, reuse=True).outputs,
                            is_training=False, reuse=True)
        
        """ Training """
        rnn_vars = [var for var in tf.trainable_variables() if 'rnnencoder' in var.name]
        cnn_vars = [var for var in tf.trainable_variables() if 'cnnencoder' in var.name]
        d_vars = [var for var in tf.trainable_variables() if 'discriminator' in var.name]
        g_vars = [var for var in tf.trainable_variables() if 'generator' in var.name]
        update_ops_CNN = [var for var in tf.get_collection(tf.GraphKeys.UPDATE_OPS) if 'cnnencoder' in var.name]
        update_ops_D = [var for var in tf.get_collection(tf.GraphKeys.UPDATE_OPS) if 'discriminator' in var.name]
        update_ops_G = [var for var in tf.get_collection(tf.GraphKeys.UPDATE_OPS) if 'generator' in var.name]
        with tf.variable_scope('learning_rate'):
            self.lr_v = tf.Variable(self.lr, trainable=False)
        with tf.control_dependencies(update_ops_CNN):
            grads, _ = tf.clip_by_global_norm(tf.gradients(self.rnn_loss, rnn_vars + cnn_vars), 10)
            optimizer = tf.train.AdamOptimizer(self.lr_v, beta1=self.beta1)
            self.rnn_optim = optimizer.apply_gradients(zip(grads, rnn_vars + cnn_vars))
        with tf.control_dependencies(update_ops_D):
            self.d_optim = tf.train.AdamOptimizer(self.lr_v, beta1=self.beta1).minimize(self.d_loss, var_list=d_vars)
        with tf.control_dependencies(update_ops_G):
            self.g_optim = tf.train.AdamOptimizer(self.lr_v, beta1=self.beta1).minimize(self.g_loss, var_list=g_vars)

In [14]:
tf.reset_default_graph()
model = Text2Img()

config = tf.ConfigProto()
config.gpu_options.allow_growth = True
config.allow_soft_placement = True
sess = tf.Session(config=config)
init = tf.global_variables_initializer()
sess.run(init)
saver = tf.train.Saver(var_list=tf.global_variables(), max_to_keep=10)
ckpt = tf.train.get_checkpoint_state(checkpoint_dir)
if ckpt and ckpt.model_checkpoint_path:
    loader = tf.train.Saver(var_list=tf.global_variables())
    load_step = int(os.path.basename(ckpt.model_checkpoint_path).split('-')[1])
    load(loader, sess, ckpt.model_checkpoint_path)
else:
    print('no checkpoints find.')

n_epoch = 1000
n_batch_epoch = int(n_images_train / batch_size)
for epoch in range(n_epoch):
    start_time = time.time()
    if epoch !=0 and (epoch % decay_every == 0):
        new_lr_decay = lr_decay ** (epoch // decay_every)
        sess.run(tf.assign(model.lr_v, lr * new_lr_decay))
        log = " ** new learning rate: %f" % (lr * new_lr_decay)
        print(log)
    elif epoch == 0:
        log = " ** init lr: %f  decay_every_epoch: %d, lr_decay: %f" % (lr, decay_every, lr_decay)
        print(log)
    for step in range(n_batch_epoch):
        step_time = time.time()
        idexs = get_random_int(min=0, max=n_captions_train-1, number=batch_size)
        b_real_caption = train_captions[idexs]
        b_real_images = train_images[np.floor(np.asarray(idexs).astype('float') / n_captions_per_image).astype('int')]
        idexs = get_random_int(min=0, max=n_captions_train-1, number=batch_size)
        b_wrong_caption = train_captions[idexs]
        idexs2 = get_random_int(min=0, max=n_images_train-1, number=batch_size)
        b_wrong_images = train_images[idexs2]
        b_z = np.random.normal(loc=0.0, scale=1.0, size=(batch_size, z_dim)).astype(np.float32)
        b_real_images = threading_data(b_real_images, prepro_img, mode='train')
        b_wrong_images = threading_data(b_wrong_images, prepro_img, mode='train')
        if epoch < 300:
            errRNN, _ = sess.run([model.rnn_loss, model.rnn_optim], feed_dict={
                                            model.t_real_image : b_real_images,
                                            model.t_wrong_image : b_wrong_images,
                                            model.t_real_caption : b_real_caption,
                                            model.t_wrong_caption : b_wrong_caption})
        else:
            errRNN = 0   
        errD, _ = sess.run([model.d_loss, model.d_optim], feed_dict={
                            model.t_real_image : b_real_images,
                            model.t_wrong_caption : b_wrong_caption,
                            model.t_real_caption : b_real_caption,
                            model.t_z : b_z})
        errG, _ = sess.run([model.g_loss, model.g_optim], feed_dict={
                            model.t_real_caption : b_real_caption,
                            model.t_z : b_z})
    print("Epoch: [%d/%d] time: %4.4fs, d_loss: %.8f, g_loss: %.8f, rnn_loss: %.8f" \
                        % (epoch, n_epoch, time.time() - step_time, errD, errG, errRNN))
    if (epoch + 1) % 1 == 0:
        print(" ** Epoch %d took %fs" % (epoch, time.time()-start_time))
        img_gen, rnn_out = sess.run([model.net_g.outputs, model.net_rnn.outputs], feed_dict={
                                        model.t_real_caption : sample_sentence,
                                        model.t_z : sample_seed})
        save_images(img_gen, [ni, ni], 'train_samples_1215_b/train_{:02d}.png'.format(epoch))
    if (epoch != 0) and (epoch % 50) == 0:
        save(saver, sess, checkpoint_dir, epoch)
        print("[*] Save checkpoints SUCCESS!")
checkpoint_path = os.path.join(cfg.CHECKPOINT_DIR, cfg.CHECKPOINT_NAME)
saver.save(sess, checkpoint_path, global_step=epoch)
print('The checkpoint has been created.')

no checkpoints find.
 ** init lr: 0.000200  decay_every_epoch: 200, lr_decay: 0.500000


`imresize` is deprecated in SciPy 1.0.0, and will be removed in 1.3.0.
Use Pillow instead: ``numpy.array(Image.fromarray(arr).resize())``.


Epoch: [0/1000] time: 0.7290s, d_loss: 1.49649596, g_loss: 0.85392177, rnn_loss: 0.29632312
 ** Epoch 0 took 119.728280s


`imsave` is deprecated in SciPy 1.0.0, and will be removed in 1.2.0.
Use ``imageio.imwrite`` instead.


Epoch: [1/1000] time: 0.7312s, d_loss: 1.37754560, g_loss: 0.99413884, rnn_loss: 0.29497206
 ** Epoch 1 took 100.732207s
Epoch: [2/1000] time: 0.7283s, d_loss: 1.37133169, g_loss: 0.84102124, rnn_loss: 0.31148279
 ** Epoch 2 took 100.582112s
Epoch: [3/1000] time: 0.7309s, d_loss: 1.34644961, g_loss: 0.97726154, rnn_loss: 0.24170528
 ** Epoch 3 took 100.698752s
Epoch: [4/1000] time: 0.7322s, d_loss: 1.34033656, g_loss: 0.88991940, rnn_loss: 0.26539040
 ** Epoch 4 took 100.877783s
Epoch: [5/1000] time: 0.7242s, d_loss: 1.25726700, g_loss: 1.61848593, rnn_loss: 0.22631121
 ** Epoch 5 took 100.273991s
Epoch: [6/1000] time: 0.7258s, d_loss: 1.28749657, g_loss: 1.62431848, rnn_loss: 0.24634567
 ** Epoch 6 took 100.194320s
Epoch: [7/1000] time: 0.7244s, d_loss: 1.30340540, g_loss: 0.75689375, rnn_loss: 0.29054916
 ** Epoch 7 took 100.353995s
Epoch: [8/1000] time: 0.7192s, d_loss: 1.26029658, g_loss: 0.69842845, rnn_loss: 0.26558471
 ** Epoch 8 took 99.971570s
Epoch: [9/1000] time: 0.7262s, d_

Epoch: [68/1000] time: 0.7224s, d_loss: 0.69536513, g_loss: 2.25021648, rnn_loss: 0.12083762
 ** Epoch 68 took 100.070199s
Epoch: [69/1000] time: 0.7244s, d_loss: 0.70287126, g_loss: 1.62849259, rnn_loss: 0.20237416
 ** Epoch 69 took 100.292163s
Epoch: [70/1000] time: 0.7233s, d_loss: 0.83298361, g_loss: 2.69859219, rnn_loss: 0.16642335
 ** Epoch 70 took 100.236041s
Epoch: [71/1000] time: 0.7331s, d_loss: 0.90104675, g_loss: 0.92706370, rnn_loss: 0.17245217
 ** Epoch 71 took 99.920842s
Epoch: [72/1000] time: 0.7213s, d_loss: 0.86087036, g_loss: 0.90935361, rnn_loss: 0.18205656
 ** Epoch 72 took 100.186673s
Epoch: [73/1000] time: 0.7254s, d_loss: 0.71827585, g_loss: 1.48651004, rnn_loss: 0.10914633
 ** Epoch 73 took 100.102146s
Epoch: [74/1000] time: 0.7207s, d_loss: 1.01200438, g_loss: 0.47195983, rnn_loss: 0.12687314
 ** Epoch 74 took 100.030205s
Epoch: [75/1000] time: 0.7194s, d_loss: 0.61856294, g_loss: 1.96362579, rnn_loss: 0.07783947
 ** Epoch 75 took 100.221638s
Epoch: [76/1000] 

Epoch: [134/1000] time: 0.7289s, d_loss: 0.36012375, g_loss: 3.12904978, rnn_loss: 0.05978405
 ** Epoch 134 took 100.392999s
Epoch: [135/1000] time: 0.7275s, d_loss: 0.58893585, g_loss: 2.93194437, rnn_loss: 0.10933587
 ** Epoch 135 took 100.169287s
Epoch: [136/1000] time: 0.7241s, d_loss: 0.61770403, g_loss: 3.50717759, rnn_loss: 0.09381709
 ** Epoch 136 took 100.427167s
Epoch: [137/1000] time: 0.7255s, d_loss: 0.54991072, g_loss: 3.58596635, rnn_loss: 0.15599479
 ** Epoch 137 took 100.720543s
Epoch: [138/1000] time: 0.7222s, d_loss: 0.62247247, g_loss: 2.80832767, rnn_loss: 0.06736452
 ** Epoch 138 took 100.077939s
Epoch: [139/1000] time: 0.7421s, d_loss: 0.54062134, g_loss: 1.45620489, rnn_loss: 0.12845470
 ** Epoch 139 took 100.614348s
Epoch: [140/1000] time: 0.7261s, d_loss: 1.75169694, g_loss: 0.46919703, rnn_loss: 0.11354178
 ** Epoch 140 took 100.671023s
Epoch: [141/1000] time: 0.7290s, d_loss: 0.44249225, g_loss: 2.46707392, rnn_loss: 0.06774154
 ** Epoch 141 took 100.979275s


 ** new learning rate: 0.000100
Epoch: [200/1000] time: 0.7350s, d_loss: 0.68554288, g_loss: 1.47026610, rnn_loss: 0.08299430
 ** Epoch 200 took 100.723303s
The checkpoint has been created.
[*] Save checkpoints SUCCESS!
Epoch: [201/1000] time: 0.7307s, d_loss: 0.39364752, g_loss: 1.86816347, rnn_loss: 0.12625578
 ** Epoch 201 took 101.385787s
Epoch: [202/1000] time: 0.7348s, d_loss: 0.30989486, g_loss: 1.98233652, rnn_loss: 0.12357607
 ** Epoch 202 took 101.176440s
Epoch: [203/1000] time: 0.7333s, d_loss: 0.36042565, g_loss: 1.52097631, rnn_loss: 0.11553637
 ** Epoch 203 took 101.005152s
Epoch: [204/1000] time: 0.7321s, d_loss: 0.33497396, g_loss: 2.28640747, rnn_loss: 0.08232877
 ** Epoch 204 took 101.170265s
Epoch: [205/1000] time: 0.7317s, d_loss: 0.40763026, g_loss: 1.29966164, rnn_loss: 0.09354288
 ** Epoch 205 took 101.019372s
Epoch: [206/1000] time: 0.7290s, d_loss: 0.40399712, g_loss: 2.23905420, rnn_loss: 0.11000635
 ** Epoch 206 took 100.832890s
Epoch: [207/1000] time: 0.7296

Epoch: [265/1000] time: 0.7280s, d_loss: 0.29639593, g_loss: 1.97412479, rnn_loss: 0.08518781
 ** Epoch 265 took 100.986466s
Epoch: [266/1000] time: 0.7285s, d_loss: 0.28512785, g_loss: 2.35233188, rnn_loss: 0.10793336
 ** Epoch 266 took 100.484382s
Epoch: [267/1000] time: 0.7332s, d_loss: 0.52441961, g_loss: 1.35312891, rnn_loss: 0.07670699
 ** Epoch 267 took 100.426997s
Epoch: [268/1000] time: 0.7294s, d_loss: 0.31828138, g_loss: 2.50327587, rnn_loss: 0.10760441
 ** Epoch 268 took 100.853459s
Epoch: [269/1000] time: 0.7264s, d_loss: 0.26218948, g_loss: 2.77501059, rnn_loss: 0.05476382
 ** Epoch 269 took 100.642251s
Epoch: [270/1000] time: 0.7239s, d_loss: 0.25869757, g_loss: 2.30982447, rnn_loss: 0.07530057
 ** Epoch 270 took 100.441041s
Epoch: [271/1000] time: 0.7374s, d_loss: 0.34136382, g_loss: 1.86899471, rnn_loss: 0.05409866
 ** Epoch 271 took 100.711624s
Epoch: [272/1000] time: 0.7244s, d_loss: 0.48648956, g_loss: 3.37645102, rnn_loss: 0.07512838
 ** Epoch 272 took 100.464275s


Epoch: [331/1000] time: 0.6862s, d_loss: 0.53908539, g_loss: 1.73854375, rnn_loss: 0.00000000
 ** Epoch 331 took 95.554707s
Epoch: [332/1000] time: 0.6895s, d_loss: 0.53178620, g_loss: 1.68420684, rnn_loss: 0.00000000
 ** Epoch 332 took 95.120155s
Epoch: [333/1000] time: 0.6915s, d_loss: 0.33108112, g_loss: 3.20127678, rnn_loss: 0.00000000
 ** Epoch 333 took 94.955430s
Epoch: [334/1000] time: 0.6891s, d_loss: 0.37862426, g_loss: 3.36491752, rnn_loss: 0.00000000
 ** Epoch 334 took 95.896468s
Epoch: [335/1000] time: 0.6879s, d_loss: 0.35916191, g_loss: 1.13773608, rnn_loss: 0.00000000
 ** Epoch 335 took 95.161989s
Epoch: [336/1000] time: 0.6932s, d_loss: 0.36986715, g_loss: 3.73849630, rnn_loss: 0.00000000
 ** Epoch 336 took 95.188981s
Epoch: [337/1000] time: 0.6941s, d_loss: 0.32665598, g_loss: 2.00104594, rnn_loss: 0.00000000
 ** Epoch 337 took 95.673546s
Epoch: [338/1000] time: 0.6939s, d_loss: 0.36614895, g_loss: 3.10471869, rnn_loss: 0.00000000
 ** Epoch 338 took 95.467385s
Epoch: [

Epoch: [397/1000] time: 0.6874s, d_loss: 1.46679902, g_loss: 2.57434082, rnn_loss: 0.00000000
 ** Epoch 397 took 94.937856s
Epoch: [398/1000] time: 0.6954s, d_loss: 0.14283602, g_loss: 2.54510832, rnn_loss: 0.00000000
 ** Epoch 398 took 95.411429s
Epoch: [399/1000] time: 0.7045s, d_loss: 0.20784599, g_loss: 2.93210149, rnn_loss: 0.00000000
 ** Epoch 399 took 96.000744s
 ** new learning rate: 0.000050
Epoch: [400/1000] time: 0.6904s, d_loss: 0.36145061, g_loss: 2.37543273, rnn_loss: 0.00000000
 ** Epoch 400 took 95.635947s
The checkpoint has been created.
[*] Save checkpoints SUCCESS!
Epoch: [401/1000] time: 0.7024s, d_loss: 0.22304128, g_loss: 3.44137502, rnn_loss: 0.00000000
 ** Epoch 401 took 95.879329s
Epoch: [402/1000] time: 0.6892s, d_loss: 0.27717543, g_loss: 2.44635940, rnn_loss: 0.00000000
 ** Epoch 402 took 96.081604s
Epoch: [403/1000] time: 0.6897s, d_loss: 0.33901063, g_loss: 2.36472940, rnn_loss: 0.00000000
 ** Epoch 403 took 95.348230s
Epoch: [404/1000] time: 0.6968s, d_lo

Epoch: [462/1000] time: 0.6955s, d_loss: 0.31525826, g_loss: 2.95553112, rnn_loss: 0.00000000
 ** Epoch 462 took 95.556427s
Epoch: [463/1000] time: 0.6997s, d_loss: 0.38235861, g_loss: 2.14511633, rnn_loss: 0.00000000
 ** Epoch 463 took 95.948259s
Epoch: [464/1000] time: 0.6977s, d_loss: 0.34715939, g_loss: 2.15880227, rnn_loss: 0.00000000
 ** Epoch 464 took 95.950935s
Epoch: [465/1000] time: 0.6897s, d_loss: 0.04915903, g_loss: 2.91892290, rnn_loss: 0.00000000
 ** Epoch 465 took 95.685373s
Epoch: [466/1000] time: 0.6917s, d_loss: 0.22480787, g_loss: 2.23410296, rnn_loss: 0.00000000
 ** Epoch 466 took 95.589964s
Epoch: [467/1000] time: 0.6861s, d_loss: 0.38242653, g_loss: 2.60787368, rnn_loss: 0.00000000
 ** Epoch 467 took 95.726803s
Epoch: [468/1000] time: 0.6829s, d_loss: 0.59770286, g_loss: 1.89008713, rnn_loss: 0.00000000
 ** Epoch 468 took 95.000569s
Epoch: [469/1000] time: 0.6920s, d_loss: 0.11035276, g_loss: 2.60275698, rnn_loss: 0.00000000
 ** Epoch 469 took 95.203493s
Epoch: [

Epoch: [528/1000] time: 0.6848s, d_loss: 0.82303190, g_loss: 2.06422400, rnn_loss: 0.00000000
 ** Epoch 528 took 94.902857s
Epoch: [529/1000] time: 0.6843s, d_loss: 0.12057817, g_loss: 3.13660336, rnn_loss: 0.00000000
 ** Epoch 529 took 95.023198s
Epoch: [530/1000] time: 0.6891s, d_loss: 0.15067495, g_loss: 2.21040916, rnn_loss: 0.00000000
 ** Epoch 530 took 95.104677s
Epoch: [531/1000] time: 0.6909s, d_loss: 0.25549686, g_loss: 2.31875730, rnn_loss: 0.00000000
 ** Epoch 531 took 94.908033s
Epoch: [532/1000] time: 0.6920s, d_loss: 0.05891945, g_loss: 3.06131959, rnn_loss: 0.00000000
 ** Epoch 532 took 94.926098s
Epoch: [533/1000] time: 0.6890s, d_loss: 0.32792461, g_loss: 1.91638589, rnn_loss: 0.00000000
 ** Epoch 533 took 95.124152s
Epoch: [534/1000] time: 0.6869s, d_loss: 0.17631978, g_loss: 4.46703720, rnn_loss: 0.00000000
 ** Epoch 534 took 94.832918s
Epoch: [535/1000] time: 0.6950s, d_loss: 0.28695148, g_loss: 2.58179355, rnn_loss: 0.00000000
 ** Epoch 535 took 94.834571s
Epoch: [

Epoch: [594/1000] time: 0.6894s, d_loss: 0.24764132, g_loss: 2.98362255, rnn_loss: 0.00000000
 ** Epoch 594 took 95.033024s
Epoch: [595/1000] time: 0.6875s, d_loss: 0.15852898, g_loss: 3.23102403, rnn_loss: 0.00000000
 ** Epoch 595 took 95.234772s
Epoch: [596/1000] time: 0.6881s, d_loss: 0.11442304, g_loss: 3.04267430, rnn_loss: 0.00000000
 ** Epoch 596 took 95.114132s
Epoch: [597/1000] time: 0.6847s, d_loss: 0.14859420, g_loss: 2.73307419, rnn_loss: 0.00000000
 ** Epoch 597 took 95.101358s
Epoch: [598/1000] time: 0.6901s, d_loss: 0.46998674, g_loss: 3.11230278, rnn_loss: 0.00000000
 ** Epoch 598 took 95.311799s
Epoch: [599/1000] time: 0.6858s, d_loss: 0.59509313, g_loss: 3.05604315, rnn_loss: 0.00000000
 ** Epoch 599 took 95.168427s
 ** new learning rate: 0.000025
Epoch: [600/1000] time: 0.6948s, d_loss: 0.22222646, g_loss: 1.99030948, rnn_loss: 0.00000000
 ** Epoch 600 took 95.574857s
The checkpoint has been created.
[*] Save checkpoints SUCCESS!
Epoch: [601/1000] time: 0.6894s, d_lo

Epoch: [659/1000] time: 0.6941s, d_loss: 0.13953911, g_loss: 3.90833902, rnn_loss: 0.00000000
 ** Epoch 659 took 95.582210s
Epoch: [660/1000] time: 0.6973s, d_loss: 0.18873431, g_loss: 2.57450771, rnn_loss: 0.00000000
 ** Epoch 660 took 95.368997s
Epoch: [661/1000] time: 0.6888s, d_loss: 0.10105397, g_loss: 2.24803925, rnn_loss: 0.00000000
 ** Epoch 661 took 95.263026s
Epoch: [662/1000] time: 0.6888s, d_loss: 0.19642240, g_loss: 2.92910886, rnn_loss: 0.00000000
 ** Epoch 662 took 95.532489s
Epoch: [663/1000] time: 0.6875s, d_loss: 0.19517994, g_loss: 2.43901944, rnn_loss: 0.00000000
 ** Epoch 663 took 95.384099s
Epoch: [664/1000] time: 0.7041s, d_loss: 0.07068720, g_loss: 3.69943166, rnn_loss: 0.00000000
 ** Epoch 664 took 95.323778s
Epoch: [665/1000] time: 0.7024s, d_loss: 0.19392033, g_loss: 2.55140924, rnn_loss: 0.00000000
 ** Epoch 665 took 95.235702s
Epoch: [666/1000] time: 0.6892s, d_loss: 0.06540371, g_loss: 4.22521400, rnn_loss: 0.00000000
 ** Epoch 666 took 95.648153s
Epoch: [

Epoch: [725/1000] time: 0.6961s, d_loss: 0.16689377, g_loss: 3.06090021, rnn_loss: 0.00000000
 ** Epoch 725 took 95.664838s
Epoch: [726/1000] time: 0.6914s, d_loss: 0.39242399, g_loss: 1.90669203, rnn_loss: 0.00000000
 ** Epoch 726 took 95.536027s
Epoch: [727/1000] time: 0.6980s, d_loss: 0.23276754, g_loss: 2.74709105, rnn_loss: 0.00000000
 ** Epoch 727 took 95.718106s
Epoch: [728/1000] time: 0.6866s, d_loss: 0.08670847, g_loss: 3.26392555, rnn_loss: 0.00000000
 ** Epoch 728 took 95.649178s
Epoch: [729/1000] time: 0.6886s, d_loss: 0.09787811, g_loss: 3.02437210, rnn_loss: 0.00000000
 ** Epoch 729 took 95.847628s
Epoch: [730/1000] time: 0.6913s, d_loss: 0.33168709, g_loss: 1.94058597, rnn_loss: 0.00000000
 ** Epoch 730 took 95.743218s
Epoch: [731/1000] time: 0.6918s, d_loss: 0.10807772, g_loss: 2.82239294, rnn_loss: 0.00000000
 ** Epoch 731 took 95.511526s
Epoch: [732/1000] time: 0.6958s, d_loss: 0.04971494, g_loss: 3.23304462, rnn_loss: 0.00000000
 ** Epoch 732 took 95.557106s
Epoch: [

Epoch: [791/1000] time: 0.6967s, d_loss: 0.63134098, g_loss: 1.88803327, rnn_loss: 0.00000000
 ** Epoch 791 took 96.662262s
Epoch: [792/1000] time: 0.7149s, d_loss: 0.36472416, g_loss: 2.06318283, rnn_loss: 0.00000000
 ** Epoch 792 took 96.854851s
Epoch: [793/1000] time: 0.6992s, d_loss: 0.06926033, g_loss: 3.54841757, rnn_loss: 0.00000000
 ** Epoch 793 took 97.104078s
Epoch: [794/1000] time: 0.7028s, d_loss: 0.11836354, g_loss: 3.14610052, rnn_loss: 0.00000000
 ** Epoch 794 took 96.904320s
Epoch: [795/1000] time: 0.6943s, d_loss: 0.09940353, g_loss: 3.20571733, rnn_loss: 0.00000000
 ** Epoch 795 took 96.550879s
Epoch: [796/1000] time: 0.7004s, d_loss: 0.06352280, g_loss: 3.44907546, rnn_loss: 0.00000000
 ** Epoch 796 took 96.399016s
Epoch: [797/1000] time: 0.6929s, d_loss: 0.15602207, g_loss: 2.46550894, rnn_loss: 0.00000000
 ** Epoch 797 took 96.473928s
Epoch: [798/1000] time: 0.6885s, d_loss: 0.23965807, g_loss: 2.05443382, rnn_loss: 0.00000000
 ** Epoch 798 took 96.403267s
Epoch: [

Epoch: [856/1000] time: 0.7416s, d_loss: 0.07098988, g_loss: 2.28779411, rnn_loss: 0.00000000
 ** Epoch 856 took 96.784974s
Epoch: [857/1000] time: 0.7092s, d_loss: 0.18112411, g_loss: 3.40401816, rnn_loss: 0.00000000
 ** Epoch 857 took 96.563474s
Epoch: [858/1000] time: 0.6927s, d_loss: 0.48312911, g_loss: 2.27302480, rnn_loss: 0.00000000
 ** Epoch 858 took 96.553096s
Epoch: [859/1000] time: 0.6975s, d_loss: 0.12883641, g_loss: 4.02741241, rnn_loss: 0.00000000
 ** Epoch 859 took 96.781672s
Epoch: [860/1000] time: 0.6994s, d_loss: 0.31378710, g_loss: 1.87345243, rnn_loss: 0.00000000
 ** Epoch 860 took 96.970049s
Epoch: [861/1000] time: 0.7161s, d_loss: 0.14540896, g_loss: 2.20583272, rnn_loss: 0.00000000
 ** Epoch 861 took 97.271165s
Epoch: [862/1000] time: 0.7036s, d_loss: 0.25167438, g_loss: 3.27396965, rnn_loss: 0.00000000
 ** Epoch 862 took 96.426781s
Epoch: [863/1000] time: 0.7253s, d_loss: 0.17770202, g_loss: 2.05113745, rnn_loss: 0.00000000
 ** Epoch 863 took 97.138194s
Epoch: [

Epoch: [922/1000] time: 0.7059s, d_loss: 0.18346640, g_loss: 2.01748419, rnn_loss: 0.00000000
 ** Epoch 922 took 98.365837s
Epoch: [923/1000] time: 0.7236s, d_loss: 0.11494214, g_loss: 2.36484289, rnn_loss: 0.00000000
 ** Epoch 923 took 98.802034s
Epoch: [924/1000] time: 0.7833s, d_loss: 0.08828337, g_loss: 2.99044895, rnn_loss: 0.00000000
 ** Epoch 924 took 100.994431s
Epoch: [925/1000] time: 0.7046s, d_loss: 0.41015318, g_loss: 1.30677557, rnn_loss: 0.00000000
 ** Epoch 925 took 99.470583s
Epoch: [926/1000] time: 0.7397s, d_loss: 0.09312907, g_loss: 2.79050827, rnn_loss: 0.00000000
 ** Epoch 926 took 98.483651s
Epoch: [927/1000] time: 0.7225s, d_loss: 0.74359137, g_loss: 1.10862279, rnn_loss: 0.00000000
 ** Epoch 927 took 100.346544s
Epoch: [928/1000] time: 0.7111s, d_loss: 0.05494282, g_loss: 3.30764866, rnn_loss: 0.00000000
 ** Epoch 928 took 98.765232s
Epoch: [929/1000] time: 0.7288s, d_loss: 0.30083454, g_loss: 2.00351334, rnn_loss: 0.00000000
 ** Epoch 929 took 101.049555s
Epoch

Epoch: [988/1000] time: 0.7666s, d_loss: 0.12903501, g_loss: 3.81055117, rnn_loss: 0.00000000
 ** Epoch 988 took 105.175717s
Epoch: [989/1000] time: 0.8286s, d_loss: 0.12241114, g_loss: 2.59005833, rnn_loss: 0.00000000
 ** Epoch 989 took 103.426058s
Epoch: [990/1000] time: 0.7379s, d_loss: 0.10007529, g_loss: 3.16802025, rnn_loss: 0.00000000
 ** Epoch 990 took 102.740494s
Epoch: [991/1000] time: 0.7894s, d_loss: 0.36774862, g_loss: 1.69402111, rnn_loss: 0.00000000
 ** Epoch 991 took 103.393481s
Epoch: [992/1000] time: 0.7267s, d_loss: 0.04324257, g_loss: 3.33473778, rnn_loss: 0.00000000
 ** Epoch 992 took 107.472787s
Epoch: [993/1000] time: 0.7482s, d_loss: 0.10786066, g_loss: 2.97771549, rnn_loss: 0.00000000
 ** Epoch 993 took 105.214008s
Epoch: [994/1000] time: 0.7190s, d_loss: 0.21804029, g_loss: 2.33146906, rnn_loss: 0.00000000
 ** Epoch 994 took 104.281992s
Epoch: [995/1000] time: 0.8388s, d_loss: 0.19595832, g_loss: 3.20138955, rnn_loss: 0.00000000
 ** Epoch 995 took 103.602194s


## 3. Evaluation metric

In [None]:
def generate_r_precision_data():
    caption_ids = np.reshape(np.asarray(test_dataset.captions_ids), (-1, cfg.TEXT.WORDS_NUM))
    captions_ids_wrong = np.reshape(test_dataset.random_wrong_captions(), (-1, cfg.WRONG_CAPTION, cfg.TEXT.WORDS_NUM))

    sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True))
    init = tf.global_variables_initializer()
    sess.run(init)

    # load the trained checkpoint
    checkpoint_dir = cfg.CHECKPOINT_DIR
    if checkpoint_dir is not None:
        loader = tf.train.Saver(var_list=tf.global_variables())
        ckpt_path = os.path.join(cfg.CHECKPOINT_DIR, CHECKPOINT_NAME)
        loader.restore(sess, ckpt_path)
        print("Restored model parameters from {}".format(ckpt_path))
    else:
        print('no checkpoints find.')

    n_caption_test = len(caption_ids)
    num_batches = n_caption_test // cfg.BATCH_SIZE

    true_cnn_features = np.zeros((num_batches, cfg.BATCH_SIZE, cfg.TEXT.EMBEDDING_DIM), dtype=float)
    true_rnn_features = np.zeros((num_batches, cfg.BATCH_SIZE, cfg.TEXT.EMBEDDING_DIM), dtype=float)
    wrong_rnn_features = np.zeros((num_batches, cfg.WRONG_CAPTION, cfg.BATCH_SIZE, cfg.TEXT.EMBEDDING_DIM), dtype=float)

    for i in range(num_batches):
        test_cap = caption_ids[i * cfg.BATCH_SIZE: (i + 1) * cfg.BATCH_SIZE]

        z = np.random.normal(loc=0.0, scale=1.0, size=(cfg.BATCH_SIZE, cfg.GAN.Z_DIM)).astype(np.float32)
        
        rnn_features = sess.run(rnn_encoder.outputs, feed_dict={t_real_caption: test_cap})
        gen = sess.run(generator.outputs, feed_dict={t_real_caption: test_cap, t_z: z})
        cnn_features = sess.run(cnn_encoder.outputs, feed_dict={t_real_image: gen})

        true_cnn_features[i] = cnn_features
        true_rnn_features[i] = rnn_features

        for per_wrong_caption in range(cfg.WRONG_CAPTION):
            test_cap = captions_ids_wrong[i * cfg.BATCH_SIZE: (i + 1) * cfg.BATCH_SIZE]
            rnn_features = sess.run(rnn_encoder.outputs, feed_dict={t_real_caption: test_cap[:, per_wrong_caption]})
            wrong_rnn_features[i, per_wrong_caption] = rnn_features
    
    # if exists, remove the existing file first
    try:
        os.remove(os.path.join(cfg.R_PRECISION_DIR, cfg.R_PRECISION_FILE))
    except OSError:
        pass
    np.savez(os.path.join(cfg.R_PRECISION_DIR, cfg.R_PRECISION_FILE), true_cnn=true_cnn_features, true_rnn=true_rnn_features,
             wrong_rnn=wrong_rnn_features)

In [None]:
def generate_inception_score_data():
    caption_ids = np.reshape(np.asarray(test_dataset.captions_ids),
                             (-1, cfg.TEXT.CAPTIONS_PER_IMAGE, cfg.TEXT.WORDS_NUM))
    
    sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True))
    init = tf.global_variables_initializer()
    sess.run(init)

    checkpoint_dir = cfg.CHECKPOINT_DIR
    if checkpoint_dir is not None:
        loader = tf.train.Saver(var_list=tf.global_variables())
        ckpt_path = os.path.join(cfg.CHECKPOINT_DIR, cfg.CHECKPOINT_NAME)
        loader.restore(sess, ckpt_path)
        print("Restored model parameters from {}".format(ckpt_path))
    else:
        print('no checkpoints find.')

    n_caption_test = len(caption_ids)
    num_batches = n_caption_test // cfg.BATCH_SIZE

    for i in range(num_batches):
        for per_caption in range(cfg.TEXT.CAPTIONS_PER_IMAGE):
            test_cap = caption_ids[i * cfg.BATCH_SIZE: (i + 1) * cfg.BATCH_SIZE, per_caption]
            test_directory = test_dataset.filenames[i * cfg.BATCH_SIZE: (i + 1) * cfg.BATCH_SIZE]

            z = np.random.normal(loc=0.0, scale=1.0, size=(cfg.BATCH_SIZE, cfg.GAN.Z_DIM)).astype(np.float32)
            gen = sess.run(generator.outputs, feed_dict={t_real_caption: test_cap, t_z: z})
            
            for j in range(cfg.BATCH_SIZE):
                if not os.path.exists(os.path.join(cfg.TEST.GENERATED_TEST_IMAGES, test_directory[j].split('/')[0])):
                    os.mkdir(os.path.join(cfg.TEST.GENERATED_TEST_IMAGES, test_directory[j].split('/')[0]))

                scipy.misc.imsave(os.path.join(cfg.TEST.GENERATED_TEST_IMAGES, test_directory[j] + '_{}.png'.format(per_caption)), gen[j])

In [None]:
generate_r_precision_data()

In [None]:
generate_inception_score_data()

## 3. Measure Inception score and R-precision of given test dataset

After set the config file as 'eval_birds.yml' and run the 'generate_inception_score_data()' and 'generate_r_precision_data()', the synthesized images based on given captions and set of image and caption features should be saved inside a 'evaluation' folder, specifically in 'evaluation/generated_images/..' and as 'evaluation/r_precision.npz' respectively.

**Then, go to the 'evaluation' folder and run each 'inception_score.ipynb' and 'r_precision.ipynb' file in order to measure inception score and r-precision score.**