In [5]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import json
from pickle import TRUE
import numpy as np

import time
from six.moves import cPickle
import opts
import models
from dataloader import *
from dataloaderraw import *
import eval_utils
import argparse
import misc.utils as utils
import torch
import pdb

In [6]:
def setup_model(checkpoint_dir):
    # Input arguments and options
    parser = argparse.ArgumentParser()
    parser.add_argument('--model', type=str, default='/apdcephfs/private_yuanenzhou/projects/data/self-critical.pytorch/save/'+checkpoint_dir+'/model-best.pth',
                    help='path to model to evaluate')
    parser.add_argument('--cnn_model', type=str,  default='resnet101',
                    help='resnet101, resnet152')
    parser.add_argument('--infos_path', type=str, default='/apdcephfs/private_yuanenzhou/projects/data/self-critical.pytorch/save/'+checkpoint_dir+'/infos_'+checkpoint_dir+'-best.pkl',
                    help='path to infos to evaluate')
    # Basic options
    parser.add_argument('--batch_size', type=int, default=1,
                    help='if > 0 then overrule, otherwise load from checkpoint.')
    parser.add_argument('--num_images', type=int, default=-1,
                    help='how many images to use when periodically evaluating the loss? (-1 = all)')
    parser.add_argument('--language_eval', type=int, default=1,
                    help='Evaluate language as well (1 = yes, 0 = no)? BLEU/CIDEr/METEOR/ROUGE_L? requires coco-caption code from Github.')
    parser.add_argument('--dump_images', type=int, default=0,
                    help='Dump images into vis/imgs folder for vis? (1=yes,0=no)')
    parser.add_argument('--dump_json', type=int, default=0,
                    help='Dump json with predictions into vis folder? (1=yes,0=no)')
    parser.add_argument('--dump_path', type=int, default=0,
                    help='Write image paths along with predictions into vis json? (1=yes,0=no)')

    # Sampling options
    parser.add_argument('--sample_max', type=int, default=1,
                    help='1 = sample argmax words. 0 = sample from distributions.')
    ##########################################################
    parser.add_argument('--beam_size', type=int, default=1,
                    help='used when sample_max = 1, indicates number of beams in beam search. Usually 2 or 3 works well. More is not better. Set this to 1 for faster runtime but a bit worse performance.')
    ##########################################################
    parser.add_argument('--max_length', type=int, default=20,
                    help='Maximum length during sampling')
    parser.add_argument('--length_penalty', type=str, default='',
                    help='wu_X or avg_X, X is the alpha')
    parser.add_argument('--group_size', type=int, default=1,
                    help='used for diverse beam search. if group_size is 1, then it\'s normal beam search')
    parser.add_argument('--diversity_lambda', type=float, default=0.5,
                    help='used for diverse beam search. Usually from 0.2 to 0.8. Higher value of lambda produces a more diverse list')
    parser.add_argument('--temperature', type=float, default=1.0,
                    help='temperature when sampling from distributions (i.e. when sample_max = 0). Lower = "safer" predictions.')
    parser.add_argument('--decoding_constraint', type=int, default=0,
                    help='If 1, not allowing same word in a row')
    parser.add_argument('--block_trigrams', type=int, default=0,
                    help='block repeated trigram.')
    parser.add_argument('--remove_bad_endings', type=int, default=0,
                    help='Remove bad endings')
    # For evaluation on a folder of images:
    parser.add_argument('--image_folder', type=str, default='', 
                    help='If this is nonempty then will predict on the images in this folder path')
    parser.add_argument('--image_root', type=str, default='', 
                    help='In case the image paths have to be preprended with a root path to an image folder')
    # For evaluation on MSCOCO images from some split:
    parser.add_argument('--input_fc_dir', type=str, default='',
                    help='path to the h5file containing the preprocessed dataset')
    parser.add_argument('--input_att_dir', type=str, default='',
                    help='path to the h5file containing the preprocessed dataset')
    parser.add_argument('--input_box_dir', type=str, default='',
                    help='path to the h5file containing the preprocessed dataset')
    parser.add_argument('--input_label_h5', type=str, default='',
                    help='path to the h5file containing the preprocessed dataset')
    parser.add_argument('--input_json', type=str, default='', 
                    help='path to the json file containing additional info and vocab. empty = fetch from model checkpoint.')
    parser.add_argument('--split', type=str, default='test', 
                    help='if running on MSCOCO images, which split to use: val|test|train')
    parser.add_argument('--coco_json', type=str, default='', 
                    help='if nonempty then use this file in DataLoaderRaw (see docs there). Used only in MSCOCO test evaluation, where we have a specific json file of only test set images.')
    # misc
    #######################################################
    parser.add_argument('--id', type=str, default='sat-4-from-nsc-seqkd', 
                    help='an id identifying this run/job. used only if language_eval = 1 for appending to intermediate files')
    parser.add_argument('--seq_kd', type=bool, default= False, 
                    help='Whether generating new train set by sequence level knowledge.')
    #######################################################
    parser.add_argument('--verbose_beam', type=int, default=1, 
                    help='if we need to print out all beam search beams.')
    parser.add_argument('--verbose_loss', type=int, default=1, 
                    help='if we need to calculate loss.')

    opt = parser.parse_args([])
    # Load infos
    with open(opt.infos_path,'rb') as f:
        infos = utils.pickle_load(f)
    # pdb.set_trace()
    # override and collect parameters
    if len(opt.input_fc_dir) == 0:
        opt.input_fc_dir = infos['opt'].input_fc_dir
        opt.input_att_dir = infos['opt'].input_att_dir
        opt.input_box_dir = getattr(infos['opt'], 'input_box_dir', '')
        opt.input_label_h5 = infos['opt'].input_label_h5
    if len(opt.input_json) == 0:
        opt.input_json = infos['opt'].input_json
    if opt.batch_size == 0:
        opt.batch_size = infos['opt'].batch_size
    if len(opt.id) == 0:
        opt.id = infos['opt'].id
    ignore = ["id", "batch_size", "beam_size", "start_from", "language_eval", "block_trigrams"]

    for k in vars(infos['opt']).keys():
        if k not in ignore:
            if k in vars(opt):
                assert vars(opt)[k] == vars(infos['opt'])[k], k + ' option not consistent'
            else:
                vars(opt).update({k: vars(infos['opt'])[k]}) # copy over options from model

    vocab = infos['vocab'] # ix -> word mapping

    # Setup the model
    model = models.setup(opt)
    model.load_state_dict(torch.load(opt.model))
    model.cuda()
    model.eval()
    return model, opt

In [32]:
# checkpoint_dirs = ['transformer-baseline','sat-2-from-nsc-seqkd','sat-4-from-nsc-seqkd','sat-6-from-nsc-seqkd','nsc-transformer-baseline','nsc-sat-2-from-nsc-seqkd','nsc-sat-4-from-nsc-seqkd','nsc-sat-6-from-nsc-seqkd']
model_list=[]
checkpoint_dirs = ['nsc-sat-2-from-nsc-seqkd','nsc-sat-4-from-nsc-seqkd','nsc-sat-6-from-nsc-seqkd']
for checkpoint_dir in checkpoint_dirs:
    model, opt = setup_model(checkpoint_dir)
    model_list.append(model)

In [33]:
# Create the Data Loader instance
if len(opt.image_folder) == 0:
  loader = DataLoader(opt)
else:
  loader = DataLoaderRaw({'folder_path': opt.image_folder, 
                            'coco_json': opt.coco_json,
                            'batch_size': opt.batch_size,
                            'cnn_model': opt.cnn_model})


DataLoader loading json file:  /apdcephfs/private_yuanenzhou/projects/data/self-critical.pytorch/data/cocotalk.json
vocab size is  9487
DataLoader loading h5 file:  /apdcephfs/private_yuanenzhou/projects/data/self-critical.pytorch/data/mscoco/cocobu_fc /apdcephfs/private_yuanenzhou/projects/data/self-critical.pytorch/data/mscoco/cocobu_att /apdcephfs/private_yuanenzhou/projects/data/self-critical.pytorch/data/mscoco/cocobu_box /apdcephfs/private_yuanenzhou/projects/data/self-critical.pytorch/data/cocotalk_label.h5
max sequence length in data is 16
read 123287 image features
assigned 113287 images to split train
assigned 5000 images to split val
assigned 5000 images to split test


In [34]:
eval_kwargs = vars(opt)
seq_kd = eval_kwargs.get('seq_kd', False)
verbose = eval_kwargs.get('verbose', True)
verbose_beam = eval_kwargs.get('verbose_beam', 1)
verbose_loss = eval_kwargs.get('verbose_loss', 1)
num_images = eval_kwargs.get('num_images', eval_kwargs.get('val_images_use', -1))
split = eval_kwargs.get('split', 'val')
lang_eval = eval_kwargs.get('language_eval', 0)
dataset = eval_kwargs.get('dataset', 'coco')
beam_size = eval_kwargs.get('beam_size', 1)
remove_bad_endings = eval_kwargs.get('remove_bad_endings', 0)
os.environ["REMOVE_BAD_ENDINGS"] = str(remove_bad_endings) # Use this nasty way to make other code clean since it's a global configuration


loader.reset_iterator(split)

n = 0
loss = 0
loss_sum = 0
loss_evals = 1e-8
predictions = []
time_sum = 0
if seq_kd:
    train_sents_list_all = []
    

In [42]:
# while True:
data = loader.get_batch(split)
print('Image_id:{}'.format(data['infos'][0]['id']))
n = n + loader.batch_size


if seq_kd:
    train_sents_list_batch = []

# forward the model to also get generated samples for each image
# Only leave one feature for each image, in case duplicate sample
tmp = [data['fc_feats'][np.arange(loader.batch_size) * loader.seq_per_img], 
    data['att_feats'][np.arange(loader.batch_size) * loader.seq_per_img],
    data['att_masks'][np.arange(loader.batch_size) * loader.seq_per_img] if data['att_masks'] is not None else None]
tmp = [_.cuda() if _ is not None else _ for _ in tmp]
fc_feats, att_feats, att_masks = tmp


for i in range(len(model_list)):
    model = model_list[i]
    model_name = checkpoint_dirs[i]
    # forward the model to also get generated samples for each image
    with torch.no_grad():
        seq = model(fc_feats, att_feats, att_masks, opt=eval_kwargs, mode='sample')[0].data

    sents = utils.decode_sequence(loader.get_vocab(), seq)
    print('{}:{}'.format(model_name, sents))

    # Print beam search
    # pdb.set_trace()
    if beam_size > 1 and verbose_beam:
        for i in range(loader.batch_size):
            sents_list = [utils.decode_sequence(loader.get_vocab(), _['seq'].unsqueeze(0))[0] for _ in model.done_beams[i]]
            if seq_kd:
                train_sents_list_batch.append(sents_list)
            print('\n'.join(sents_list))
            print('--' * 10)







Image_id:294832
sat-4:['a bathroom bathroom a shower and and shower']
sat-4-from-nsc-seqkd:['a bathroom with a sink and a sink and a toilet']
nsc-sat-4-from-nsc-seqkd:['a bathroom with a toilet and a sink and a shower']
