In [1]:
from src.data_utils import Claim_Verification_Dataset
from allennlp.training.metrics import CategoricalAccuracy
import torch
from torch.utils.data import Dataset, DataLoader
from src.QA_models import T5_QA_From_Oracle_Facts, T5_QA_Only_Query
from src.QA_models import *
from main import *
from tqdm import tqdm

%load_ext autoreload
%autoreload 2
%load_ext line_profiler

In [2]:
claim_verification_dataset = Claim_Verification_Dataset()

In [3]:
train_data = claim_verification_dataset.get_data('rule_taker_3', 'train')
dev_data = claim_verification_dataset.get_data('rule_taker_3', 'dev')

In [4]:
claim_verification_dataset.data['rule_taker_1']['train'][1]

{'question': 'Dave is not smart.',
 'facts': ['Anne is quiet.',
  'Dave is round.',
  'Fiona is quiet.',
  'Dave is rough.',
  'Dave is smart.',
  'Fiona is not round.',
  'Bob is kind.',
  'Dave is not young.',
  'Anne is not young.',
  'Bob is young.',
  'Kind, young things are not smart.'],
 'answer': False}

# Seq2Seq models

In [5]:
args = parser.parse_args("--name test --batch_sz 32 --epochs 5 --warmup_steps 200 --gpu_id 1".split(' '))

In [6]:
model = T5_QA_From_Oracle_Facts(args)

In [7]:
dataloader = DataLoader(train_data, batch_size=args.batch_sz, shuffle=True, collate_fn = model.collate)

In [8]:
batch = next(iter(dataloader))

In [9]:
pl_trainer = Trainer(gpus=args.gpu_id, gradient_clip_val=0.5, amp_level='O1', max_epochs=args.epochs)
model.train()
pl_trainer.fit(model, dataloader)

GPU available: True, used: True
TPU available: None, using: 0 TPU cores

  | Name        | Type                       | Params
-----------------------------------------------------------
0 | transformer | T5ForConditionalGeneration | 222 M 
-----------------------------------------------------------
222 M     Trainable params
0         Non-trainable params
222 M     Total params
891.614   Total estimated model params size (MB)


HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Training', layout=Layout(flex='2'), max…




1

In [9]:
model.sample_to_input_text(train_data[0])

'Context: Julius Caesar had three children. ||| Genghis Khan had sixteen children. ||| Modern geneticists have determined that  out of every 200 men today has DNA that can be traced to Genghis Khan. ||| Query: Are more people today related to Genghis Khan than Julius Caesar?'

In [14]:
model.inference(train_data[:2])

Re-writing: 100%|██████████| 1/1 [00:00<00:00,  3.54it/s]


[{'qid': '2bc9c4f9c19c167187f2',
  'term': 'Genghis Khan',
  'description': 'founder and first Great Khan of the Mongol Empire',
  'question': 'Are more people today related to Genghis Khan than Julius Caesar?',
  'answer': True,
  'facts': ['Julius Caesar had three children.',
   'Genghis Khan had sixteen children.',
   'Modern geneticists have determined that  out of every 200 men today has DNA that can be traced to Genghis Khan.'],
  'decomposition': ['How many kids did Julius Caesar have?',
   'How many kids did Genghis Khan have?',
   'Is #2 greater than #1?'],
  'evidence': [[[['Caesarion-2', 'Julia (daughter of Caesar)-1']],
    [['Alakhai Bekhi-1', 'Tolui-1'], 'no_evidence'],
    ['operation']],
   [[['Julius Caesar-75']], [['Genghis Khan-17']], ['operation']],
   [[['Gaius Julius Caesar-7']],
    [['Genghis Khan-15'], 'no_evidence'],
    ['no_evidence', 'operation']]],
  'all_generations': ['true', 'false', 'True', 'truth'],
  'scores': tensor([0.5528, 0.4312, 0.0110, 0.0050])

# Reasoning in Decoder

In [51]:
model = Reasoning_in_Decoder(args)

In [49]:
enc_ids = tokenizer(['John is blue', 'Mary is yellowish'], return_tensors='pt', padding=True)
enc_ids

{'input_ids': tensor([[1079,   19, 1692,    1,    0],
        [3790,   19, 4459, 1273,    1]]), 'attention_mask': tensor([[1, 1, 1, 1, 0],
        [1, 1, 1, 1, 1]])}

In [50]:
dec_ids = tokenizer(['What colour is John? Jon is <extra_id_0>.'], return_tensors='pt', padding=True)
dec_ids

{'input_ids': tensor([[  363,  3243,    19,  1079,    58,  8178,    19, 32099,     3,     5,
             1]]), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]])}

In [51]:
encoder_outputs = transformer.encoder(input_ids=enc_ids['input_ids'], attention_mask=enc_ids['attention_mask'])
encoder_hidden_states = encoder_outputs[0]

In [54]:
encoder_hidden_states.shape

torch.Size([2, 5, 512])

In [95]:
enc_ids['attention_mask'][[True, False]]

tensor([[1, 1, 1, 1, 0]])

In [91]:
enc_ids['attention_mask'].reshape(-1).to(torch.bool)

tensor([ True,  True,  True,  True, False,  True,  True,  True,  True,  True])

In [99]:
a = encoder_hidden_states.reshape(-1,512)[enc_ids['attention_mask'].reshape(-1).to(torch.bool)]
a.shape

torch.Size([9, 512])

In [102]:
torch.cat([a, torch.zeros(3,512)]).shape

torch.Size([12, 512])

In [7]:
batch = model.collate([{'facts':['The capital of <extra_id_0> is Madrid.'], 'decoder_text':'<pad> Madrid, Madrid'}])

In [8]:
batch = model.collate(example_set)

NameError: name 'example_set' is not defined

In [9]:
batch

{'fusion_map': [[0, 1]],
 'encoder_ids': tensor([[   37,  1784,    13, 32099,    19, 12033,     5,     1]]),
 'encoder_att_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1]]),
 'decoder_input_ids': tensor([[    0, 12033,     6, 12033]]),
 'decoder_target_ids': tensor([[12033,     6, 12033,     1]]),
 'decoder_att_mask': tensor([[1, 1, 1, 1]])}

In [59]:
flat_enc_hidden_states = encoder_hidden_states.reshape(1,-1,512)
flat_enc_atts = enc_ids['attention_mask'].reshape(1,-1)

In [65]:
dec_outputs = transformer.decoder(input_ids=dec_ids['input_ids'], 
                    attention_mask=dec_ids['attention_mask'], 
                    encoder_hidden_states=flat_enc_hidden_states, 
                    encoder_attention_mask=flat_enc_atts)
sequence_output = dec_outputs[0]
lm_logits = transformer.lm_head(sequence_output)

In [71]:
example_set = claim_verification_dataset.data['rule_taker_1']['train'][:10]

In [72]:
example_set

[{'question': 'Bob is kind.',
  'facts': ['Anne is quiet.',
   'Dave is round.',
   'Fiona is quiet.',
   'Dave is rough.',
   'Dave is smart.',
   'Fiona is not round.',
   'Bob is kind.',
   'Dave is not young.',
   'Anne is not young.',
   'Bob is young.',
   'Kind, young things are not smart.'],
  'answer': True},
 {'question': 'Dave is not smart.',
  'facts': ['Anne is quiet.',
   'Dave is round.',
   'Fiona is quiet.',
   'Dave is rough.',
   'Dave is smart.',
   'Fiona is not round.',
   'Bob is kind.',
   'Dave is not young.',
   'Anne is not young.',
   'Bob is young.',
   'Kind, young things are not smart.'],
  'answer': False},
 {'question': 'Bob is not smart.',
  'facts': ['Anne is quiet.',
   'Dave is round.',
   'Fiona is quiet.',
   'Dave is rough.',
   'Dave is smart.',
   'Fiona is not round.',
   'Bob is kind.',
   'Dave is not young.',
   'Anne is not young.',
   'Bob is young.',
   'Kind, young things are not smart.'],
  'answer': True},
 {'question': 'Bob is smart.

In [53]:
_=  model.to('cuda:1')

In [12]:
model.tokenizer.decode([ 7781,   603,    10,    19,     3, 17396,     5, 11801,    10,  6136, 1,     0])

'Claim: is bob. Answer: false</s> <pad>'

In [98]:
model.training_step(batch, 0)

RuntimeError: Input, output and indices must be on the current device

In [97]:
model.inference([{'facts':['John is happy.','Mary is sad.'], 'decoder_text':'<pad> happy is'}], num_return_sequences=1)

Re-writing: 100%|██████████| 1/1 [00:00<00:00, 34.31it/s]


[{'facts': ['John is happy.', 'Mary is sad.'],
  'decoder_text': '<pad> happy is',
  'all_generations': ['<pad> happy is John.</s>'],
  'scores': tensor([1.]),
  'top_output': '<pad> happy is John.</s>'}]

In [62]:
%debug

> [0;32m/nfs/Complex_Decomposition_QA/src/QA_models.py[0m(188)[0;36mprepare_inputs_for_generation[0;34m()[0m
[0;32m    186 [0;31m        [0mbatch_size[0m [0;34m=[0m [0minput_ids[0m[0;34m.[0m[0mshape[0m[0;34m[[0m[0;36m0[0m[0;34m][0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m    187 [0;31m[0;34m[0m[0m
[0m[0;32m--> 188 [0;31m        [0mnew_decoder_mask[0m [0;34m=[0m [0mtorch[0m[0;34m.[0m[0mcat[0m[0;34m([0m[0;34m[[0m[0mdecoder_attention_mask[0m[0;34m,[0m [0mtorch[0m[0;34m.[0m[0mones[0m[0;34m([0m[0mbatch_size[0m[0;34m,[0m [0mpadding_delta[0m[0;34m,[0m [0mdevice[0m[0;34m=[0m[0mself[0m[0;34m.[0m[0mdevice[0m[0;34m)[0m[0;34m][0m[0;34m,[0m [0mdim[0m[0;34m=[0m[0;36m1[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m    189 [0;31m[0;34m[0m[0m
[0m[0;32m    190 [0;31m        [0mprint[0m[0;34m([0m[0;34m'decoder_attention_mask'[0m[0;34m,[0m[0mdecoder_attention_mask[0m[0;34m)[0m[0;34m[0m[0;34m[0m

ipdb>  input_ids


tensor([[  0,   0,   0,   0,   0,   0,   0,   0],
        [  0,   0,   0,   0,   0,   0,   0,   0],
        [  0,   0,   0,   0,   0,   0,   0,   0],
        [  0,  48,  19,   3,   9, 794,  82, 388],
        [  0,  48,  19,   3,   9, 794,  82, 388],
        [  0,  48,  19,   3,   9, 794,  82, 388]], device='cuda:1')


ipdb>  exit


In [67]:
model.tokenizer.decode([   0,    0,    0,    0,    0,    0,    0,    0,  325, 1784,   15,   20,
           50, 1784,   15,   20,   50, 1784,   15,  259,  260,  159,    5])

'<pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> La capitale de la capitale de la capitale est paris.'

In [14]:
from transformers import T5Tokenizer, T5ForConditionalGeneration

t5_tokenizer = T5Tokenizer.from_pretrained('t5-small')
t5_model = T5ForConditionalGeneration.from_pretrained('t5-small')
my_model = model.transformer

In [60]:
t5_input_ids = t5_tokenizer("The capital of <extra_id_0> is paris.", return_tensors="pt").input_ids  # Batch size 1
t5_outputs = t5_model.generate(t5_input_ids, num_beams=3, early_stopping=True, use_cache=False, output_hidden_states=True, return_dict_in_generate=True)
t5_tokenizer.decode(t5_outputs.sequences[0])

'<pad> La capitale de la capitale est paris.</s>'

In [19]:
t5_outputs.decoder_hidden_states[1][-1]

tensor([[[-5.6781e-02,  1.2296e-01,  4.2948e-02,  ..., -6.7451e-02,
           4.0020e-04,  5.7785e-02],
         [-7.0078e-02,  2.4940e-01,  2.6310e-02,  ..., -3.8364e-02,
           3.3131e-05,  1.1047e-01]]])

In [121]:
t5_outputs[0].shape

torch.Size([1, 14])

In [122]:
my_inputs = t5_tokenizer("The capital of <extra_id_0> is Madrid.", return_tensors="pt")  # Batch size 1
my_input_ids = my_inputs.input_ids
my_input_att_mask = my_inputs.attention_mask
my_decoder_inputs = t5_tokenizer("<pad> Madrid,", return_tensors="pt", add_special_tokens=False)  # Batch size 1
my_decoder_ids = my_decoder_inputs.input_ids
my_decoder_att_mask = my_decoder_inputs.attention_mask

# encoder_outputs = my_model.encoder(my_decoder_input_ids)

# t5_outputs = t5_model.generate(t5_input_ids, num_beams=1, early_stopping=True, use_cache=False, output_hidden_states=True, return_dict_in_generate=True)
# t5_tokenizer.decode(t5_outputs[0])

In [123]:
t5_tokenizer.decode([    0, 12033, 6])

'<pad> Madrid,'

In [126]:
t5_model.lm_head(t5_outputs.decoder_hidden_states[2][-1]).argmax(-1)

tensor([[12033,     6, 12033]])

In [127]:
model.forward([[0,1]], my_input_ids, my_input_att_mask, my_decoder_ids, my_decoder_att_mask).argmax(-1)

encoder_fused_states torch.Size([1, 8, 512])
fused_attention_mask torch.Size([1, 8])
decoder_input_ids torch.Size([1, 3])
decoder_attention_mask torch.Size([1, 3])
tensor([[[-5.6781e-02,  1.2296e-01,  4.2948e-02,  ..., -6.7451e-02,
           4.0020e-04,  5.7785e-02],
         [-7.0078e-02,  2.4940e-01,  2.6310e-02,  ..., -3.8364e-02,
           3.3131e-05,  1.1047e-01],
         [-9.8589e-02,  1.6192e-01,  6.9472e-02,  ..., -9.7110e-02,
           4.9539e-04,  1.3432e-01]]], grad_fn=<MulBackward0>)


tensor([[12033,     6, 12033]])

In [33]:
my_model.decoder(my_input_ids)

BaseModelOutputWithPastAndCrossAttentions(last_hidden_state=tensor([[[ 1.7982e-01,  6.6164e-02,  4.1064e-02,  ..., -2.1999e-01,
           1.0907e-01, -2.3763e-02],
         [-3.4724e-01,  1.8860e-01,  9.9379e-02,  ..., -2.6027e-01,
          -5.7355e-02, -1.0252e-01],
         [ 7.4379e-02, -8.0222e-02,  1.1290e-02,  ...,  2.8214e-02,
           5.8411e-02, -5.7466e-02],
         ...,
         [-1.5884e-01, -3.1136e-01, -8.1628e-02,  ...,  8.8100e-02,
           1.1413e-01,  2.0548e-02],
         [-2.7359e-02, -2.4308e-01, -5.4980e-02,  ...,  2.5070e-02,
          -1.6737e-01, -1.0863e-01],
         [ 1.2779e-02, -2.5673e-02, -1.0659e-02,  ..., -7.3198e-02,
           5.0401e-03, -6.9694e-05]]], grad_fn=<MulBackward0>), past_key_values=None, hidden_states=None, attentions=None, cross_attentions=None)

In [186]:
t5_outputs[0]

tensor([    0, 32099,  2447,   704, 32098,     8, 32097,  2447,     5,     1])

In [185]:
t5_input_ids

tensor([[   37, 32099, 10681,    16, 32098,  2447,     5,     1]])