In [1]:
import torch
import numpy as np
import voduct as vo
import tokenizer as tk
import os
import matplotlib.pyplot as plt

Using TensorFlow backend.


In [2]:
main_folder = "splitdigs"
folders = vo.save_io.get_model_folders(main_folder)
folders = [os.path.join(main_folder,f) for f in folders]

In [3]:
model_folder = folders[-1]
print(model_folder)

splitdigs/splitdigs_6_lr0.0005


In [4]:
model = vo.save_io.load_model(model_folder)

splitdigs/splitdigs_6_lr0.0005


In [5]:
checkpt = vo.save_io.load_checkpoint(model_folder)
model.load_state_dict(checkpt['state_dict'])

splitdigs/splitdigs_6_lr0.0005


<All keys matched successfully>

In [6]:
model.eval()

Transformer(
  (embeddings): Embedding(49, 512)
  (encoder): Encoder(
    (pos_encoding): PositionalEncoder()
    (enc_layers): ModuleList(
      (0): EncodingBlock(
        (norm0): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
        (multi_attn): MultiHeadAttention(
          (outs): Linear(in_features=384, out_features=512, bias=True)
        )
        (norm1): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
        (fwd_net): Sequential(
          (0): Linear(in_features=512, out_features=512, bias=True)
          (1): ReLU()
          (2): Linear(in_features=512, out_features=512, bias=True)
        )
        (norm2): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
      )
      (1): EncodingBlock(
        (norm0): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
        (multi_attn): MultiHeadAttention(
          (outs): Linear(in_features=384, out_features=512, bias=True)
        )
        (norm1): LayerNorm((512,), eps=1e-05, elementwise_affine=True)


In [7]:
word2idx = checkpt['word2idx']
idx2word = checkpt['idx2word']

In [8]:
seq_len_x = model.encoder.seq_len
print("X len:", seq_len_x)
seq_len_y = model.decoder.seq_len
print("Y len:", seq_len_y)

tokenizer = vo.datas.Tokenizer(word2idx=word2idx, idx2word=idx2word, 
                                                       split_digits=checkpt['hyps']['split_digits']==True,
                                                       seq_len_x=seq_len_x,
                                                       seq_len_y=seq_len_y,
                                                       prepend=False,
                                                       append=False
                                                       )

X len: 144
Y len: 9


## Evaluation

In [9]:
# wp = vo.datas.WordProblems(difficulty="easy", split_digits=True)
# X = tokenizer.index_tokens(wp.token_qs, seq_len_x, prepend=True,append=True)
# Y = tokenizer.index_tokens(wp.token_ans, seq_len_y, prepend=True,append=True)

In [22]:
questions = ["21 red box objects start at the starting point. you move 5 red box objects from the starting point to the goal. how many red objects are at the starting point?"]
answers = ["1111 red objects"]

In [23]:
toks_x = tk.tokenize(questions[0], split_digits=checkpt['hyps']['split_digits']==True)
X = tokenizer.index_tokens([toks_x], seq_len_x, prepend=True,append=True)
toks_y = tk.tokenize(answers[0], split_digits=checkpt['hyps']['split_digits']==True)
Y = tokenizer.index_tokens([toks_y], seq_len_y, prepend=True,append=True)

0 %    0 %    

In [24]:
print([idx2word[a.item()] for a in X[0]])

['<START>', '2', '1', 'red', 'box', 'objects', 'start', 'at', 'the', 'starting', 'point', '.', 'you', 'move', '5', 'red', 'box', 'objects', 'from', 'the', 'starting', 'point', 'to', 'the', 'goal', '.', 'how', 'many', 'red', 'objects', 'are', 'at', 'the', 'starting', 'point', '?', '<STOP>', '<MASK>', '<MASK>', '<MASK>', '<MASK>', '<MASK>', '<MASK>', '<MASK>', '<MASK>', '<MASK>', '<MASK>', '<MASK>', '<MASK>', '<MASK>', '<MASK>', '<MASK>', '<MASK>', '<MASK>', '<MASK>', '<MASK>', '<MASK>', '<MASK>', '<MASK>', '<MASK>', '<MASK>', '<MASK>', '<MASK>', '<MASK>', '<MASK>', '<MASK>', '<MASK>', '<MASK>', '<MASK>', '<MASK>', '<MASK>', '<MASK>', '<MASK>', '<MASK>', '<MASK>', '<MASK>', '<MASK>', '<MASK>', '<MASK>', '<MASK>', '<MASK>', '<MASK>', '<MASK>', '<MASK>', '<MASK>', '<MASK>', '<MASK>', '<MASK>', '<MASK>', '<MASK>', '<MASK>', '<MASK>', '<MASK>', '<MASK>', '<MASK>', '<MASK>', '<MASK>', '<MASK>', '<MASK>', '<MASK>', '<MASK>', '<MASK>', '<MASK>', '<MASK>', '<MASK>', '<MASK>', '<MASK>', '<MASK>',

In [25]:
print([idx2word[a.item()] for a in Y[0]])

['<START>', '1', '1', '1', '1', 'red', 'objects', '<STOP>', '<MASK>']


In [26]:
preds = model(X[:3], Y[:3])
preds = torch.argmax(preds,dim=-1)
print([idx2word[a.item()] for a in preds[0]])

['0', '0', 'objects', 'objects', 'objects', 'objects', '<STOP>', '<MASK>', '2']


In [27]:
with torch.no_grad():
    preds = Y[:3].clone()
    for i in range(Y.shape[-1]-1):
        
        preds[:,0] = Y[:3,0]
        temp = preds.data.clone()
        temp[:,i+1:] = 0
        preds = model(X[:3], temp)
        preds = torch.argmax(preds,dim=-1)
        print([idx2word[a.item()] for a in preds[0,1:]])

['objects', 'objects', 'objects', 'objects', 'objects', 'objects', 'objects', 'objects']
['<STOP>', 'objects', 'objects', 'objects', 'objects', 'objects', 'objects', 'objects']
['<MASK>', '<STOP>', '<MASK>', '<MASK>', '<MASK>', '<MASK>', '<MASK>', 'objects']
['objects', '<MASK>', 'objects', '<MASK>', '<MASK>', '<MASK>', '<MASK>', 'objects']
['<STOP>', 'objects', '<STOP>', 'objects', 'objects', 'objects', 'objects', 'objects']
['<MASK>', '<STOP>', '<MASK>', '<STOP>', '<STOP>', '<MASK>', '<MASK>', 'objects']
['objects', '<MASK>', 'objects', '<MASK>', '<MASK>', '<MASK>', '<MASK>', '<MASK>']
['<STOP>', 'objects', '<STOP>', 'objects', 'objects', 'objects', 'objects', 'objects']


In [22]:
preds.shape

torch.Size([3, 8, 49])

In [23]:
ans = torch.argmax(preds[0],dim=-1)
print([idx2word[a.item()] for a in ans])

['7', '3', 'green', 'cylinder', 'objects', '<STOP>', '<MASK>', '8']


In [38]:
for l in idxs:
    if len(l) < 30: l = l+[0 for i in range(30-len(l))]
    long = torch.LongTensor(l)[None]
    with torch.no_grad():
        x = long.cuda()
        preds = model(x,x).data.cpu().squeeze()
    preds = torch.argmax(preds,dim=-1)
    words = []
    for p in preds:
        words.append(idx2word[p.item()])
    print(" ".join(words))
    print()
    

NameError: name 'idxs' is not defined

In [13]:
outputs = []
def hook(module,inp,out):
    outputs.append(out.detach().cpu())
hook = model.collapser.register_forward_hook(hook)

In [15]:
for l in idxs:
    if len(l) < 30: l = l+[0 for i in range(30-len(l))]
    long = torch.LongTensor(l)[None]
    with torch.no_grad():
        x = long.cuda()
        preds = model(x,x).data.cpu().squeeze()
    

In [27]:
avg_mag = (torch.sqrt((model.embeddings.weight.mean(0)**2).sum())/512).item()

In [29]:
raw = torch.sqrt(((outputs[0]-outputs[1])**2).sum())/len(outputs[0])
raw = raw.item()
print("Raw:", raw)
print("percent:", raw/avg_mag*100)

Raw: 6.536159844472422e-07
percent: tensor(0.1645, device='cuda:0', grad_fn=<MulBackward0>)


In [31]:
raw = torch.sqrt(((outputs[2]-outputs[0])**2).sum())/len(outputs[0])
raw = raw.item()
print("Raw:", raw)
print("percent:", raw/avg_mag*100)

Raw: 6.158345513540553e-07
percent: tensor(0.1550, device='cuda:0', grad_fn=<MulBackward0>)


In [32]:
raw = torch.sqrt(((outputs[2]-outputs[1])**2).sum())/len(outputs[0])
raw = raw.item()
print("Raw:", raw)
print("percent:", raw/avg_mag*100)

Raw: 5.960464477539062e-07
percent: tensor(0.1500, device='cuda:0', grad_fn=<MulBackward0>)


In [33]:
hook.remove()