In [1]:
from src.model.nli_models import *
from src.model.novelty_models import *
from src.defaults import *

def load_novelty_model(_id):
    # load model data 
    check_model(_id)
    def load_model_data(_id):
        model_path = os.path.join("./results/", _id, "model.pt")
        model_data = torch.load(model_path)
        return model_data
    field = load_field(_id)
    model_data = load_model_data(_id)
    encoder_id = model_data["options"]["load_nli"]
    check_model(encoder_id)

    def load_encoder(enc_data):
        if enc_data["options"].get("attention_layer_param", 0) == 0:
            enc_data["options"]["use_glove"] = False
            model = bilstm_snli(enc_data["options"])
        elif enc_data["options"].get("r", 0) == 0:
            enc_data["options"]["use_glove"] = False
            model = attn_bilstm_snli(enc_data["options"])
        else:
            enc_data["options"]["use_glove"] = False
            model = struc_attn_snli(enc_data["options"])
        model.load_state_dict(enc_data["model_dict"])
        return model
    
    enc_data = load_encoder_data(encoder_id)
    encoder = load_encoder(enc_data).encoder

    model = HAN(model_data["options"],encoder)
    model.load_state_dict(model_data["model_dict"])
    return model,field


In [2]:
from torchtext.data import Example 

def encode_text(text,field):
    ex = Example.fromlist([text],[("text",field)])
    enc = field.process([ex.text])
    return torch.tensor(enc)

In [3]:
model,field = load_novelty_model('NOV-1014')


dropout option adds dropout after all but last recurrent layer, so non-zero dropout expects num_layers greater than 1, but got dropout=0.3 and num_layers=1



In [44]:
source = "A balanced exercise routine for an athlete must consists of cardio as well as weight training. The way we perform these two exercises is rather important . The researchers believe that one should follow their research which suggests the ideal method for working out. A workout should start with some stretching and small dynamic movements to warm up thebody. This should be followed by the weight training or a High Intensity Interval training. We must also include a couple minutes of reset between this transition. The can be followed by aproper session of cardio exercises like running or swimming. One should adjust the intensityof this cardio exercises as per their training goal. Researchers also say that a training sessionshould end with a cool-down which includes stretching and relaxation"
s_enc = encode_text(source,field)



Example class will be retired soon and moved to torchtext.legacy. Please see the most recent release notes for further information.


To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).



In [45]:
target = "Research published by a team of sports scientists reveals some interesting in-formation about the recipe for in ideal workout session. A balanced exercise routine for an athlete must consists of cardio as well as weight training. The way we perform these twoe xercises is rather important . This should be followed by the weight training or a High In-tensity Interval training. A workout should start with some stretching and small dynamicmovements to warm up the body. The can be followed by a proper session of cardio exerciseslike running or swimming. A couple minutes of rest between these high intensity training isalso considered beneficial. This should be followed by the weight training or a High IntensityInterval training. The fligt from usa to india got delayed.In regex, captures are numbered, but in some implementations, captures can be named. To understand the basics first, let’s see an example in python where capturing is performed in the figure 5.1. In this example, we can see that the input string of ’ac’ is matched."
t_enc = encode_text(target,field)



Example class will be retired soon and moved to torchtext.legacy. Please see the most recent release notes for further information.


To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).



In [46]:
seed_torch()
model.eval()
with torch.no_grad():
    opt,s_att,t_att = model.forward_with_attn(s_enc,t_enc)
    pred = F.softmax(opt)

torch.Size([1, 50, 50])

Implicit dimension choice for softmax has been deprecated. Change the call to include dim=X as an argument.



In [47]:
s_enc[0][0]



tensor([    2,     4,  3657,  1321,  1345,    40,    22,  1081,  3812,  5533,
           13, 14252,    60,   693,    60,  2919,  1167,     5,     3,     1,
            1,     1,     1,     1,     1,     1,     1,     1,     1,     1,
            1,     1,     1,     1,     1,     1,     1,     1,     1,     1,
            1,     1,     1,     1,     1,     1,     1,     1,     1,     1])

In [48]:
def decode(inp,field):
    return [[field.nesting_field.vocab.itos[i] for i in sent] for sent in inp]


In [49]:
from pprint import pprint
pprint(decode(s_enc[0],field))

[['<sos>',
  'a',
  'balanced',
  'exercise',
  'routine',
  'for',
  'an',
  'athlete',
  'must',
  'consists',
  'of',
  'cardio',
  'as',
  'well',
  'as',
  'weight',
  'training',
  '.',
  '<eos>',
  '<pad>',
  '<pad>',
  '<pad>',
  '<pad>',
  '<pad>',
  '<pad>',
  '<pad>',
  '<pad>',
  '<pad>',
  '<pad>',
  '<pad>',
  '<pad>',
  '<pad>',
  '<pad>',
  '<pad>',
  '<pad>',
  '<pad>',
  '<pad>',
  '<pad>',
  '<pad>',
  '<pad>',
  '<pad>',
  '<pad>',
  '<pad>',
  '<pad>',
  '<pad>',
  '<pad>',
  '<pad>',
  '<pad>',
  '<pad>',
  '<pad>'],
 ['<sos>',
  'the',
  'way',
  'we',
  'perform',
  'these',
  'two',
  'exercises',
  'is',
  'rather',
  'important',
  '.',
  '<eos>',
  '<pad>',
  '<pad>',
  '<pad>',
  '<pad>',
  '<pad>',
  '<pad>',
  '<pad>',
  '<pad>',
  '<pad>',
  '<pad>',
  '<pad>',
  '<pad>',
  '<pad>',
  '<pad>',
  '<pad>',
  '<pad>',
  '<pad>',
  '<pad>',
  '<pad>',
  '<pad>',
  '<pad>',
  '<pad>',
  '<pad>',
  '<pad>',
  '<pad>',
  '<pad>',
  '<pad>',
  '<pad>',
  '<pad>'

In [81]:
def decode(inp,field):
    return [[field.nesting_field.vocab.itos[i] for i in sent] for sent in inp]

def attention_combined(inp,field,s_att,w_att=None):
    tok_str = decode(inp,field)
    print(len(tok_str))
    assert len(tok_str) == s_att.shape[0]
    assert len(tok_str) == w_att.shape[0]

    assert len(tok_str[0]) == w_att.shape[1]

    opt = []
    for sent in range(len(tok_str)):
        sent_with_att = []
        for word in range(len(tok_str[0])):
            word_str = tok_str[sent][word]
            if word_str != "<pad>":
                sent_with_att.append((word_str,w_att[sent][word].item()))
        if sent_with_att!=[]:
            opt.append((sent_with_att,s_att[sent].item()))
    return opt
        



In [88]:
x = attention_combined(s_enc[0],field,s_att[0][0,:,0],s_att[1][0])

50


In [123]:
import pandas as pd
import numpy as np
import html
import random
from IPython.core.display import display, HTML
from IPython.display import IFrame
import numpy as np
import matplotlib
import matplotlib.pyplot as plt

def html_string(word,color,new_line = False):
    template = '<span class="barcode"; style="color: black; background-color: {}">{}</span>'
    colored_string = template.format(color, '&nbsp' + word + '&nbsp') + ("<br>" if new_line else "")
    return colored_string


def colorize(attention_list):
    cmap_sent = matplotlib.cm.Blues
    cmap_word = matplotlib.cm.Reds

    template = '<span class="barcode"; style="color: black; background-color: {}">{}</span>'
    colored_string = ''

    for sent, sent_att in attention_list:
        sent_color = matplotlib.colors.rgb2hex(cmap_sent(sent_att*2)[:3])
        colored_string  += html_string('\t---\t ',sent_color)
        for word,word_att in sent:
            word_color = matplotlib.colors.rgb2hex(cmap_word(word_att)[:3])
            colored_string += html_string(word,word_color)
        colored_string += "<br>"
    colored_string += "<br><br><br><br>"
    return colored_string

    



In [124]:
s = colorize(x)
with open('colorize.html', 'w') as f:
    f.write(s)


In [125]:
IFrame('./colorize.html',width=1200,height=400)
