In [1]:
import os
import torch
import json
import yaml
import tqdm
from dataset.ConcatBaselineDataset import ConcatBaselineDataset
from baseline.models.ConcatBaselineNet import ConcatBaselineNet
from baseline.scripts.train_baseline import get_hidden_layer_list
from torch.utils.data import DataLoader
import transforms.transforms as trfm

In [2]:
DATA_DIR = "/auto/homes/bat34/VQA/"
ROOT_DIR = "/auto/homes/bat34/VQA_PartII/"
test_dataset = ConcatBaselineDataset(split='test')
test_collate_fn = trfm.Compose([\
                              trfm.ConvertBatchListToDict(), \
                              trfm.CreateBatchItem(), \
                              trfm.PrepareBaselineTestBatch() \
            ])



with open(os.path.join(ROOT_DIR, 'baseline', 'scripts', 'baseline.yaml')) as f:
        config = yaml.load(f)
config = config['baseline_options']
test_loader = DataLoader(test_dataset, shuffle=False, \
                              batch_size=config['batch_size'], \
                              collate_fn=test_collate_fn,\
                             num_workers=config['num_workers'])
input_dim = list(test_dataset[0]['concat_vector'].size())[0]
out_dim = len(test_dataset.ans_to_aid)
size = config['max_depth']
hidden_list = get_hidden_layer_list(input_dim, out_dim, size)
model = ConcatBaselineNet(input_dim, out_dim, \
                                  hidden_list, \
                                  dropout=config['dropout'])
#Path to model weights with best performance on the validation dataset
model.load_state_dict(torch.load(os.path.join(ROOT_DIR, 'baseline', 'trained_models', \
                                   'depth_3_concatbaseline_dropout_0.25_batch_size_1024_lr_0.0001_weight_decay_0_BEST.pth')))
model.cuda()
model.eval()

  2% (9698 of 443757) |                  | Elapsed Time: 0:00:00 ETA:   0:00:08

Tokenizing questions for train2014


100% (443757 of 443757) |################| Elapsed Time: 0:00:10 Time:  0:00:10
  3% (8141 of 214354) |                  | Elapsed Time: 0:00:00 ETA:   0:00:04

Tokenizing questions for val2014


100% (214354 of 214354) |################| Elapsed Time: 0:00:05 Time:  0:00:05
  2% (10621 of 447793) |                 | Elapsed Time: 0:00:00 ETA:   0:00:08

Tokenizing questions for test2015


100% (447793 of 447793) |################| Elapsed Time: 0:00:09 Time:  0:00:09


Length of answer_vocabulary: 3000, Original no. of answers: 22531


  5% (22469 of 443757) |                 | Elapsed Time: 0:00:00 ETA:  00:00:00

No. of known words: 2752210, No. of unknown words : 1399, Percentage Loss of words: 0.050806051258548326%
Removing questions if they have infrequent answers


100% (443757 of 443757) |################| Elapsed Time: 0:00:01 Time:  0:00:01


Saving processed datasets...
Finished processing annotations and questions.


  del sys.path[0]


ConcatBaselineNet(
  (hidden): ModuleList(
    (0): LayerNorm((4448,), eps=1e-05, elementwise_affine=True)
    (1): Linear(in_features=4448, out_features=3966, bias=True)
    (2): Dropout(p=0.25, inplace=False)
    (3): LayerNorm((3966,), eps=1e-05, elementwise_affine=True)
    (4): Linear(in_features=3966, out_features=3484, bias=True)
    (5): Dropout(p=0.25, inplace=False)
    (6): LayerNorm((3484,), eps=1e-05, elementwise_affine=True)
    (7): Linear(in_features=3484, out_features=3000, bias=True)
  )
)

In [3]:
results = []
with torch.no_grad():
    for data in tqdm.tqdm(test_loader):
        inputs, qids = data[0].cuda(), data[1]
        outputs = model(inputs)
        values, ans_indices = torch.max(outputs, dim=1)
        ans_indices = list(ans_indices)
        ans_indices = [tsr.item() for tsr in ans_indices]
        for qid, ans_idx in zip(qids, ans_indices):
            results.append({
                'question_id': int(qid),
                'answer': test_dataset.aid_to_ans[ans_idx]
            })

100%|██████████| 438/438 [05:35<00:00,  1.30it/s]


In [4]:
with open(os.path.join(ROOT_DIR, 'baseline', 'baseline_test-dev2015_results.json'), 'w') as f:
    json.dump(results, f)