This notebook shows how to run TbD-net through test data on the CLEVR dataset.

In [None]:
import torch

from pathlib import Path
import numpy as np
import h5py

from tbd.module_net import load_tbd_net
from utils.clevr import load_vocab
from utils.generate_programs import load_program_generator, generate_programs

Load the trained model we want to produce test answers for

In [None]:
vocab_path = Path('data/vocab.json')
model_path = Path('models/clevr-reg-hres.pt')
tbd_net = load_tbd_net(model_path, load_vocab(vocab_path))

To evaluate, we first need to generate programs from the questions

In [None]:
program_generator = load_program_generator(Path('models/program_generator.pt'))
generate_programs(Path('data/test/test_questions.h5'), program_generator, 
                  dest_dir=Path('data/test/'), batch_size=128)

Load the test features that we've extracted and the the questions, image indices, and programs we just generated.

In [None]:
use_np_features = True
if use_np_features:
    features = np.load(str(Path('data/test/test_features.npy')), mmap_mode='r')
else:
    features = h5py.File(Path('data/test/test_features.h5'))['features']

question_np = np.load(Path('data/test/test_questions.npy'))
image_idx_np = np.load(Path('data/test/test_image_idxs.npy'))
programs_np = np.load(Path('data/test/test_programs.npy'))

Create a mapping from our model output to answers

In [None]:
answers = ['blue', 'brown', 'cyan', 'gray', 'green', 'purple', 'red', 'yellow',
           'cube', 'cylinder', 'sphere',
           'large', 'small',
           'metal', 'rubber',
           'no', 'yes',
           '0', '1', '10', '2', '3', '4', '5', '6', '7', '8', '9']

pred_idx_to_token = dict(zip(range(len(answers)), answers))

Convenience function for writing predictions to a file

In [None]:
f = open('predicted_answers.txt', 'w')
def write_preds(preds):
    for pred in preds:
        f.write(pred)
        f.write('\n')

Create a handle to the device we want to use

In [None]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

Run through all of the questions, produce a prediction, and write that predicted answer to the text file we opened above

In [None]:
batch_size = 128
for batch in range(0, len(programs_np), batch_size):
    image_idx = image_idx_np[batch:batch+batch_size]
    programs = torch.LongTensor(programs_np[batch:batch+batch_size]).to(device)
    
    if use_np_features:
        feats = torch.FloatTensor(np.asarray(features[image_idx])).to(device)
    else:
        # Using HDF5 files requires some overhead due to constraints on how those may
        # be accessed. We cannot index into the file using a numpy array. We also cannot 
        # access the same element multiple times (e.g. we cannot index into an h5py.File 
        # with [1,1,1]) because we are constrained to increasing sequences
        feats = []
        for idx in image_idx:
            feats.append(np.asarray(features[idx]))
        feats = torch.FloatTensor(np.asarray(feats)).to(device)

    outputs = tbd_net(feats, programs)
    _, preds = outputs.max(1)
    preds = [pred_idx_to_token[pred] for pred in preds.detach().to('cpu').numpy()]
    write_preds(preds)
f.close()