In [1]:
# from comet_ml import Experiment

import os
import json
import torch
import random
import argparse
import itertools
import numpy as np
from tqdm import tqdm
from torch import nn, optim
from torch.utils.data import DataLoader
from torch.optim.lr_scheduler import MultiStepLR
from visdial.model import VisdialModel
from visdial.loss import get_criterion
from visdial.data.dataset import VisDialDataset
from visdial.metrics import SparseGTMetrics, NDCG, Monitor
from visdial.utils.checkpointing import CheckpointManager, load_checkpoint
from visdial.utils import move_to_cuda


# =============================================================================
#   INPUT ARGUMENTS AND CONFIG AND PARAMS
# =============================================================================

from configs.transformer_config import get_config

config = get_config()
print(json.dumps(config, indent=4))


HOME_PATH /home/quanguet
DATA_PATH /home/quanguet/datasets/visdial
{
    "seed": 0,
    "callbacks": {
        "validate": true,
        "resume": false,
        "comet_project": "lf-bert-disc",
        "path_pretrained_ckpt": "",
        "path_dir_save_ckpt": "/home/quanguet/checkpoints/visdial/lf_disc/lf_bert_disc"
    },
    "dataset": {
        "overfit": true,
        "img_norm": 1,
        "concat_history": true,
        "batch_size": 2,
        "cpu_workers": 4,
        "max_seq_len": 25,
        "is_return_options": true,
        "is_add_boundaries": true,
        "train": {
            "path_feat_img": "/home/quanguet/datasets/visdial/features_faster_rcnn_x101_val.h5",
            "path_json_dialogs": "/home/quanguet/datasets/visdial/visdial_1.0_val.json",
            "path_feat_history": "/home/quanguet/datasets/visdial/features_bert_train_history.h5",
            "path_feat_answers": "/home/quanguet/datasets/visdial/features_bert_train_answers.h5",
            "path_feat_que

[nltk_data] Downloading package punkt to /home/quanguet/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [2]:
seed = config['seed']

random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
torch.cuda.manual_seed_all(seed)
torch.backends.cudnn.benchmark = False
torch.backends.cudnn.deterministic = True
os.environ['PYTHONHASHSEED'] = str(seed)

In [3]:
train_dataset = VisDialDataset(config, split='train')

  3%|▎         | 1208/45238 [00:00<00:03, 12078.25it/s]

[val2018] Tokenizing questions...


100%|██████████| 45238/45238 [00:03<00:00, 13065.52it/s]
  8%|▊         | 2624/34822 [00:00<00:02, 13081.15it/s]

[val2018] Tokenizing answers...


100%|██████████| 34822/34822 [00:02<00:00, 13091.94it/s]
100%|██████████| 2064/2064 [00:00<00:00, 11009.58it/s]

[val2018] Tokenizing captions...





In [4]:

train_dataloader = DataLoader(train_dataset,
                              batch_size=config['dataset']['batch_size'],
                              num_workers=config['dataset']['cpu_workers'],
                              shuffle=True,)

val_dataset = VisDialDataset(config, split='val')

val_dataloader = DataLoader(val_dataset,
                            batch_size=2,
                            num_workers=config['dataset']['cpu_workers'],)


  3%|▎         | 1225/45238 [00:00<00:03, 12249.14it/s]

[val2018] Tokenizing questions...


100%|██████████| 45238/45238 [00:03<00:00, 12683.11it/s]
  4%|▍         | 1313/34822 [00:00<00:02, 13128.98it/s]

[val2018] Tokenizing answers...


100%|██████████| 34822/34822 [00:02<00:00, 13218.18it/s]
100%|██████████| 2064/2064 [00:00<00:00, 10542.76it/s]

[val2018] Tokenizing captions...





In [5]:
from visdial.model import get_transformer_model

In [6]:
model = get_transformer_model(config)

In [7]:
model

VisdialModel(
  (encoder): Encoder(
    (text_encoder): TextEncoder(
      (text_embeddings): TextEmbeddings(
        (tok_embedding): Embedding(11322, 300, padding_idx=0)
        (pos_embedding): PositionalEmbedding()
        (linear): Linear(in_features=300, out_features=512, bias=True)
      )
      (hist_encoder): HistEncoder(
        (encoder): TransformerEncoder(
          (layers): ModuleList(
            (0): EncoderLayer(
              (self_attn): MultiHeadAttention(
                (dropout): Dropout(p=0.2)
                (attn_fn): Attention(
                  (dropout): Dropout(p=0.2)
                )
                (linears): ModuleList(
                  (0): Linear(in_features=512, out_features=512, bias=True)
                  (1): Linear(in_features=512, out_features=512, bias=True)
                  (2): Linear(in_features=512, out_features=512, bias=True)
                  (3): Linear(in_features=512, out_features=512, bias=True)
                )
              )

In [8]:
x = torch.Size([512, 100])

In [9]:
num_params = 0

for p in model.parameters():
    num_params += torch.tensor(p.size()).prod()

In [10]:
device = 'cuda'

In [11]:
model = model.to(device)

In [12]:
criterion = nn.CrossEntropyLoss()

In [13]:
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [14]:
for epoch in range(100):
    for idx, batch in enumerate(train_dataloader):
        batch = move_to_cuda(batch, device)
        
        optimizer.zero_grad()
        
        output = model(batch)
        
        opt_target = batch['ans_ind']
        
        opt_out = output['opt_scores']
        opt_out = opt_out.view(-1, opt_out.size(-1))
        
        batch_loss = criterion(opt_out, opt_target.view(-1))
        
        batch_loss.backward()
        
        optimizer.step()
        
        print(epoch, idx, batch_loss)

0 0 tensor(4.5976, device='cuda:0', grad_fn=<NllLossBackward>)
1 0 tensor(4.4222, device='cuda:0', grad_fn=<NllLossBackward>)
2 0 tensor(4.4221, device='cuda:0', grad_fn=<NllLossBackward>)
3 0 tensor(4.4222, device='cuda:0', grad_fn=<NllLossBackward>)
4 0 tensor(4.4222, device='cuda:0', grad_fn=<NllLossBackward>)
5 0 tensor(4.4222, device='cuda:0', grad_fn=<NllLossBackward>)
6 0 tensor(4.4222, device='cuda:0', grad_fn=<NllLossBackward>)
7 0 tensor(4.4222, device='cuda:0', grad_fn=<NllLossBackward>)
8 0 tensor(4.4222, device='cuda:0', grad_fn=<NllLossBackward>)
9 0 tensor(4.4222, device='cuda:0', grad_fn=<NllLossBackward>)
10 0 tensor(4.4222, device='cuda:0', grad_fn=<NllLossBackward>)
11 0 tensor(4.4222, device='cuda:0', grad_fn=<NllLossBackward>)
12 0 tensor(4.4222, device='cuda:0', grad_fn=<NllLossBackward>)
13 0 tensor(4.4222, device='cuda:0', grad_fn=<NllLossBackward>)
14 0 tensor(4.4222, device='cuda:0', grad_fn=<NllLossBackward>)
15 0 tensor(4.4222, device='cuda:0', grad_fn=<NllL

In [15]:
batch['ans_ind']

tensor([[ 0, 50, 40, 11, 95, 40, 84, 20, 53, 10],
        [33, 31, 20, 56, 60, 75, 64, 90, 54, 52]], device='cuda:0')

In [17]:
opt_out[0]

tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], device='cuda:0',
       grad_fn=<SelectBackward>)

In [18]:
opt_out.shape

torch.Size([20, 100])

In [22]:
for i in range(20):
    print(opt_out[i].argmax())
    print(opt_out[i][opt_out[i].argmax()])

tensor(30, device='cuda:0')
tensor(1., device='cuda:0', grad_fn=<SelectBackward>)
tensor(50, device='cuda:0')
tensor(1., device='cuda:0', grad_fn=<SelectBackward>)
tensor(40, device='cuda:0')
tensor(1., device='cuda:0', grad_fn=<SelectBackward>)
tensor(0, device='cuda:0')
tensor(1., device='cuda:0', grad_fn=<SelectBackward>)
tensor(20, device='cuda:0')
tensor(1., device='cuda:0', grad_fn=<SelectBackward>)
tensor(40, device='cuda:0')
tensor(1., device='cuda:0', grad_fn=<SelectBackward>)
tensor(80, device='cuda:0')
tensor(1., device='cuda:0', grad_fn=<SelectBackward>)
tensor(50, device='cuda:0')
tensor(1., device='cuda:0', grad_fn=<SelectBackward>)
tensor(40, device='cuda:0')
tensor(1., device='cuda:0', grad_fn=<SelectBackward>)
tensor(70, device='cuda:0')
tensor(1., device='cuda:0', grad_fn=<SelectBackward>)
tensor(70, device='cuda:0')
tensor(1., device='cuda:0', grad_fn=<SelectBackward>)
tensor(40, device='cuda:0')
tensor(1., device='cuda:0', grad_fn=<SelectBackward>)
tensor(20, device