In [1]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

In [2]:
cd /home/dfried/projects/ImageCaptioning.pytorch

In [3]:
import numpy as np
import torch

In [28]:
import pickle

In [4]:
import argparse
import captioning.utils.opts as opts
import captioning.utils.misc as utils

In [5]:
from captioning.data.dataloader import DataLoader
from captioning.data.dataloaderraw import DataLoaderRaw

In [6]:
parser = argparse.ArgumentParser()
opts.add_eval_options(parser)

In [7]:
opt = parser.parse_args([])

In [8]:
opt

Namespace(batch_size=0, beam_size=1, block_trigrams=0, coco_json='', decoding_constraint=0, diversity_lambda=0.5, dump_images=1, dump_json=1, dump_path=0, group_size=1, id='', image_folder='', image_root='', input_att_dir='', input_box_dir='', input_fc_dir='', input_json='', input_label_h5='', language_eval=0, length_penalty='', max_length=20, num_images=-1, remove_bad_endings=0, sample_method='greedy', split='test', suppress_UNK=1, temperature=1.0, verbose_beam=1, verbose_loss=0)

In [9]:
with open('models/updown/infos_tds-best.pkl', 'rb') as f:
    infos = utils.pickle_load(f)
infos.keys()

dict_keys(['vocab', 'opt', 'best_val_score', 'iter', 'iterators', 'epoch', 'split_ix'])

In [10]:
replace = ['input_fc_dir', 'input_att_dir', 'input_box_dir', 'input_label_h5', 'input_json', 'batch_size', 'id']
ignore = ['start_from']

for k in vars(infos['opt']).keys():
    if k in replace:
        setattr(opt, k, getattr(opt, k) or getattr(infos['opt'], k, ''))
    elif k not in ignore:
        if not k in vars(opt):
            vars(opt).update({k: vars(infos['opt'])[k]}) # copy over options from model

In [11]:
loader = DataLoader(opt, shuffle_override=False, wrap_override=False)

DataLoader loading json file:  data/cocotalk.json
vocab size is  9487
DataLoader loading h5 file:  data/cocobu_fc data/cocobu_att data/cocotalk_box data/cocotalk_label.h5
max sequence length in data is 16
read 123287 image features
assigned 113287 images to split train
assigned 5000 images to split val
assigned 5000 images to split test


In [12]:
vocab = loader.get_vocab()

In [13]:
import tqdm

In [14]:
opt.batch_size

10

In [15]:
len(loader.loaders['train'])

11329

In [30]:
PAD_ID = 0

In [31]:
REGENERATE = False

In [16]:
if REGENERATE:
    ixs = []
    ids = []
    file_paths = []
    features = []
    captions = []

    split = 'train'

    loader.reset_iterator(split)

    for batch_ix in tqdm.trange(len(loader.loaders[split])):
        data = loader.get_batch(split)
        infos = data['infos']
        ixs.extend([d['ix'] for d in infos])
        ids.extend([d['id'] for d in infos])
        file_paths.extend([d['file_path'] for d in infos])
        features.append(data['fc_feats'])

        batch_captions = []
        for batch_labels in data['labels']:
            instance_captions = []
            for img_labels in batch_labels:
                caption = [vocab[str(ix.item())] for ix in img_labels if ix != 0]
                instance_captions.append(caption)
            batch_captions.append(instance_captions)
        captions.extend(batch_captions)

100%|██████████| 11329/11329 [03:50<00:00, 49.12it/s]


In [17]:
len(ixs)

113287

In [18]:
features_array = torch.cat(features, 0).numpy()

In [34]:
fname = 'data/cocobu_fc/all_{}.pkl'.format(split)
if REGENERATE:
    with open(fname, 'wb') as f:
        d = {
            'ixs': ixs,
            'ids': ids,
            'file_paths': file_paths,
            'features': features_array,
            'captions': captions,
        }
        pickle.dump(d, f)
else:
    with open(fname, 'rb') as f:
        d = pickle.load(f)
    ixs, ids, file_paths, features_array, captions = d['ixs'], d['ids'], d['file_paths'], d['features'], d['captions']

In [35]:
import faiss

In [36]:
index = faiss.IndexFlatL2(2048)

In [37]:
index.add(features_array)

In [38]:
index.ntotal

113287

In [39]:
def wrap_tag(tag, inner):
    return f'<{tag}>{inner}</{tag}>'

def image_html(image_path, width=300, border=False):
    if border:
        style = ' style="border: 5px solid #0FF" '
    else:
        style = ''
    return f'<img width={width} src="{image_path}" {style}></img>'

def captions_html(captions):
    #return wrap_tag('p', '<br>'.join(' '.join(cap) for cap in captions))
    return wrap_tag('ol', ''.join(wrap_tag('li', ' '.join(cap)) for cap in captions))

def images_html(image_paths, width=300, num_per_row=5, target=None, captions=None):
    rows = []
    for ix in range(0, len(image_paths), num_per_row):
        items = [wrap_tag('td', image_html(image_paths[image_ix], width=width, border=image_ix == target)) 
                 for image_ix in range(ix, ix+num_per_row) if image_ix < len(image_paths)]
        rows.append(wrap_tag('tr', ''.join(items)))
        if captions is not None:
            cap_html = [
                wrap_tag('td', captions_html(captions[image_ix]))
                for image_ix in range(ix, ix+num_per_row)
                if image_ix < len(image_paths)
            ]
            rows.append(wrap_tag('tr', ''.join(cap_html)))
    return wrap_tag('table', ''.join(rows))

def display_images(image_paths, width=300, num_per_row=5, target=None, captions=None):
    display(HTML(images_html(image_paths, width=width, num_per_row=num_per_row, target=target, captions=captions)))

In [40]:
def display_neighbors(ix, k=5, num_per_row=5):
    D, I = index.search(features_array[ix][None], k)
    paths_k = [file_paths[ix] for ix in I.flatten()]
    captions_k = [captions[ix] for ix in I.flatten()]
    display_images(paths_k, captions=captions_k, num_per_row=num_per_row)

In [43]:
display_neighbors(1, k=8, num_per_row=4)

0,1,2,3
,,,
a young boy standing in front of a computer keyboarda little boy wearing headphones and looking at a computer monitorhe is listening intently to the computer at schoola young boy stares up at the computer monitora young kid with head phones on using a computer,a boy wearing headphones using one computer in a long row of computersa little boy with earphones on listening to somethinga group of people sitting at desk using computerschildren sitting at computer stations on a long tablea small child wearing headphones plays on the computer,some small children playing on laptop games excitedtwo children work at a desk on laptop computerstwo young ladies work on laptops on a white counter topa little girl sitting in front of a laptop computertwo girls who are sitting in front of laptops,a man with glasses sitting at a desktop computertwo men at a computer playing game with headphones ontwo men are wearing headphones and playing a computer gametwo men are in the dark by a laptop computerdark haired man playing a video game on computer
,,,
a man with a hoodie and headphones on in front of a computera person sitting in front of a laptop computer wearing glassesa man with headphones sitting at a desk looking at a computera young man in a red sweatshirt is on the computera man is sitting at the computer desk with a laptop on it,people at a work bench table with laptops and other electronic equipmenttwo people on computers amongst a table full of debristhree people are working on two laptopshands are at work at a table repairing laptop computersa group of people sitting around a pair of laptops,a man holding a smart phone while standing next to a credit card readera man looking at something in his handsa young man is at a workstation with a phonean image of man that is looking at his cellphonea young man is using a cell phone near electronics,two men sitting around a laptop looking at the screena man at a laptop with another looking on at his screentwo men stare intently at a computer screen while one works at the keyboardtwo men at a desk working with a laptop computertwo people looking at a laptop on a desk


In [54]:
for sent in d['labels'][0]:
    print(' '.join(loader.dataset.get_vocab().get(str(ix.item()), 'IX_{}'.format(ix.item())) for ix in sent))

IX_0 a long hot dog on a plate on a table IX_0 IX_0 IX_0 IX_0 IX_0 IX_0 IX_0
IX_0 a very long hot dog on a plastic plate IX_0 IX_0 IX_0 IX_0 IX_0 IX_0 IX_0 IX_0
IX_0 a foot long hot dog on top of two buns IX_0 IX_0 IX_0 IX_0 IX_0 IX_0 IX_0
IX_0 long hot dog using two buns on paper plate IX_0 IX_0 IX_0 IX_0 IX_0 IX_0 IX_0 IX_0
IX_0 a foot long hotdog on two regular buns on a styrofoam plate IX_0 IX_0 IX_0 IX_0 IX_0


In [57]:
d['fc_feats'].size()

torch.Size([10, 2048])