In [1]:
from src.defaults import *
from src.datasets.novelty import *
import sys
old_stdout = sys.stdout

In [170]:
load_nli = 'NLI-92'

download_models_from_neptune(load_nli)
field = load_field(load_nli)
# field = None


dataset_conf = {'dataset': 'dlnd', 'max_num_sent': 60,"sent_tokenizer":"spacy","batch_size":4,"device":"cuda"}
# dataset_conf = {'dataset': 'dlnd', 'max_num_sent': 50,"sent_tokenizer":"spacy", "tokenizer":'spacy',"max_len":50,"batch_size":32,"device":"cuda"}
model_conf = {'results_dir': 'results', 'device': 'cuda', 'dropout': 0.2, 'dataset': 'dlnd', 'hidden_size': 150, 'use_glove': False,"num_filters":95,"filter_sizes":[3,5,9],"max_num_sent":60,"prune_p":50,"prune_q":10,"attention_layer_param":150,"attention_hops":5,"num_layers":1}

In [133]:
data = dlnd(dataset_conf,sentence_field = field)

In [134]:
for i in data.train_iter:
    print(i)
    break


[torchtext.data.batch.Batch of size 4]
	[.source]:[torch.cuda.LongTensor of size 4x60x50 (GPU 0)]
	[.target]:[torch.cuda.LongTensor of size 4x60x50 (GPU 0)]
	[.label]:[torch.cuda.LongTensor of size 4 (GPU 0)]


In [171]:
from src.model.nli_models import *
from src.model.novelty_models import *

def load_encoder(enc_data):
    if enc_data['options'].get("attention_layer_param",0)==0:
        model = bilstm_snli(enc_data["options"])
    elif enc_data['options'].get("r",0)==0:
        model = attn_bilstm_snli(enc_data["options"])
    else:
        model = struc_attn_snli(enc_data["options"])
    return model

nli_model_data = load_encoder_data(load_nli)
nli_model_data['options']["use_glove"] = False
encoder = load_encoder(nli_model_data).encoder
model_conf["encoder_dim"] = nli_model_data["options"]["hidden_size"]


In [201]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

class concat_attention(nn.Module):
    def __init__(self, hidden_size):
        super().__init__()
        self.Wc1 = nn.Linear(2 * hidden_size, hidden_size, bias=False)
        self.Wc2 = nn.Linear(2 * hidden_size, hidden_size, bias=False)
        self.vc = nn.Linear(hidden_size, 1, bias=False)

    def forward(self, x, y):
        _s1 = self.Wc1(x).unsqueeze(1)
        _s2 = self.Wc2(y).unsqueeze(2)
        sjt = self.vc(torch.tanh(_s1 + _s2)).squeeze()
        ait = F.softmax(sjt, 2)
        qtc = ait.bmm(x)
        return qtc


class bilinear_attention(nn.Module):
    def __init__(self, hidden_size):
        super().__init__()
        self.Wb = nn.Linear(2 * hidden_size, 2 * hidden_size, bias=False)

    def forward(self, x, y):
        _s1 = self.Wb(x).transpose(2, 1)
        sjt = y.bmm(_s1)
        ait = F.softmax(sjt, 2)
        qtb = ait.bmm(x)
        return qtb


class dot_attention(nn.Module):
    def __init__(self, hidden_size):
        super().__init__()
        self.Wd = nn.Linear(2 * hidden_size, hidden_size, bias=False)
        self.vd = nn.Linear(hidden_size, 1, bias=False)

    def forward(self, x, y):
        _s1 = x.unsqueeze(1)
        _s2 = y.unsqueeze(2)
        sjt = self.vd(torch.tanh(self.Wd(_s1 * _s2))).squeeze()
        ait = F.softmax(sjt, 2)
        qtd = ait.bmm(x)
        return qtd


class minus_attention(nn.Module):
    def __init__(self, hidden_size):
        super().__init__()
        self.Wm = nn.Linear(2 * hidden_size, hidden_size, bias=False)
        self.vm = nn.Linear(hidden_size, 1, bias=False)

        self.Ws = nn.Linear(2 * hidden_size, hidden_size, bias=False)
        self.vs = nn.Linear(hidden_size, 1, bias=False)

    def forward(self, x, y):
        _s1 = x.unsqueeze(1)
        _s2 = y.unsqueeze(2)
        sjt = self.vm(torch.tanh(self.Wm(_s1 - _s2))).squeeze()
        ait = F.softmax(sjt, 2)
        qtm = ait.bmm(x)
        return qtm



class Attention(nn.Module):
    def __init__(self, conf):
        super(Attention, self).__init__()
        self.Ws = nn.Linear(
            2 * conf["hidden_size"],
            conf["attention_layer_param"],
            bias=False,
        )
        self.Wa = nn.Linear(conf["attention_layer_param"], 1, bias=False)

    def forward(self, hid):
        opt = self.Ws(hid)
        opt = torch.tanh(opt)
        opt = self.Wa(opt)
        opt = F.softmax(opt, dim=1)
        return opt



class SelfAttention(nn.Module):
    def __init__(self, conf):
        super(SelfAttention, self).__init__()
        self.ut_dense = nn.Linear(2 * conf["hidden_size"],conf["attention_layer_param"],bias = False)
        self.et_dense = nn.Linear(conf["attention_layer_param"],conf["attention_hops"],bias = False)


    def forward(self, x):
        # x shape: [batch_size, num_sent, embedding_width]
        # ut shape: [batch_size, num_sent, att_unit]
        ut = self.ut_dense(x)
        ut = torch.tanh(ut)
        # et shape: [batch_size, num_sent, att_hops]
        et = self.et_dense(ut)

        # att shape: [batch_size,  att_hops, seq_len]
        att = F.softmax(et)
        # output shape [batch_size, att_hops, embedding_width]
        output = torch.bmm(att.permute(0, 2, 1), x).squeeze(1)
        return output, att


class HAN_DOC(nn.Module):
    def __init__(self, conf, encoder):
        super(HAN_DOC, self).__init__()
        self.conf = conf
        self.encoder = encoder

        self.translate = nn.Linear(
            2 * self.conf["encoder_dim"], self.conf["hidden_size"]
        )
        self.act = nn.ReLU()
        self.dropout = nn.Dropout(conf["dropout"])
        self.template = nn.Parameter(torch.zeros((1)), requires_grad=True)
        self.lstm_layer = nn.LSTM(
            input_size=self.conf["hidden_size"],
            hidden_size=self.conf["hidden_size"],
            num_layers=self.conf["num_layers"],
            bidirectional=True,
        )
        self.attention = SelfAttention(conf)

        self.prune_p = nn.Linear(2*self.conf["hidden_size"],self.conf["prune_p"])
        self.prune_q = nn.Linear(self.conf["attention_hops"],self.conf["prune_q"])

    def forward(self, inp):
        batch_size, num_sent, max_len = inp.shape
        x = inp.view(-1, max_len)

        x_padded_idx = x.sum(dim=1) != 0
        x_enc = []
        for sub_batch in x[x_padded_idx].split(64):
            x_enc.append(self.encoder(sub_batch, None))
        x_enc = torch.cat(x_enc, dim=0)

        x_enc_t = torch.zeros((batch_size * num_sent, x_enc.size(1))).to(
            self.template.device
        )

        x_enc_t[x_padded_idx] = x_enc
        x_enc_t = x_enc_t.view(batch_size, num_sent, -1)

        embedded = self.dropout(self.translate(x_enc_t))
        embedded = self.act(embedded)

        all_, (_, _) = self.lstm_layer(embedded)

        # opt: [batch, att_hops, hidden_size]
        opt,attn = self.attention(all_)
        

        # p_section: [batch, att_hops, prune_p]
        p_section = self.prune_p(opt)
        # q_section: [batch, hidden_size, prune_q]

        q_section = self.prune_q(opt.permute(0,2,1))
        

        encoded = torch.cat([p_section.view(batch_size,-1),q_section.view(batch_size,-1)],dim=1)
        
        return encoded


class tester(nn.Module):
    def __init__(self, conf, encoder, doc_enc=None):
        super(tester, self).__init__()
        self.conf = conf
        if doc_enc == None:
            self.encoder = HAN_DOC(conf, encoder)
        elif encoder == None:
            self.encoder = doc_enc
        self.act = nn.ReLU()
        self.dropout = nn.Dropout(conf["dropout"])

        fc_in_dim = self.conf["attention_hops"]*self.conf["prune_p"] + 2*self.conf["hidden_size"]*self.conf["prune_q"]

        self.fc = nn.Linear(4*fc_in_dim, 2)

    def forward(self, inputs):
        x0,x1 = inputs
        x0_enc = self.encoder(x0)
        x1_enc = self.encoder(x1)
        # print(x0_enc.shape)
        # print(x1_enc.shape)

        cont = torch.cat(
            [
                x0_enc,
                x1_enc,
                torch.abs(x0_enc - x1_enc),
                x0_enc * x1_enc,
            ],
            dim=1,
        )

        cont = self.dropout(self.act(cont))
        cont = self.fc(cont)
        return cont




In [202]:
model = tester(model_conf,encoder)
model.cpu()


tester(
  (encoder): HAN_DOC(
    (encoder): BiLSTM_encoder(
      (embedding): Embedding(33934, 300, padding_idx=1)
      (projection): Linear(in_features=300, out_features=400, bias=True)
      (lstm): LSTM(400, 400, batch_first=True, dropout=0.3, bidirectional=True)
      (relu): ReLU()
      (dropout): Dropout(p=0.3, inplace=False)
    )
    (translate): Linear(in_features=800, out_features=150, bias=True)
    (act): ReLU()
    (dropout): Dropout(p=0.2, inplace=False)
    (lstm_layer): LSTM(150, 150, bidirectional=True)
    (attention): SelfAttention(
      (ut_dense): Linear(in_features=300, out_features=150, bias=False)
      (et_dense): Linear(in_features=150, out_features=5, bias=False)
    )
    (prune_p): Linear(in_features=300, out_features=50, bias=True)
    (prune_q): Linear(in_features=5, out_features=10, bias=True)
  )
  (act): ReLU()
  (dropout): Dropout(p=0.2, inplace=False)
  (fc): Linear(in_features=6500, out_features=2, bias=True)
)

In [203]:
trainloader,valloader,testloader = data.get_dataloaders()
import ballpark
print("Model Parameters:",ballpark.business(sum(p.numel() for p in model.parameters() if p.requires_grad)))

Model Parameters: 13.4M


In [204]:
i,l = next(iter(trainloader))
opt = model(i)
print(opt)
trainloader,valloader,testloader = data.get_dataloaders()

tensor([[-0.0707, -0.2080],
        [ 0.0649, -0.3061],
        [ 0.0080, -0.3542],
        [-0.0220, -0.2619]], grad_fn=<AddmmBackward>)


In [131]:

from hyperdash import Experiment

sys.stdout = old_stdout

exp = Experiment("test",api_key_getter = get_hyperdash_api)
optimizer = optim.AdamW(model.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss()
device = torch.device("cuda")

model.to(device)


def train(model,dl,optimizer,criterion):
    model.train()
    n_correct, n_total,running_loss = 0, 0,0
    for i, data in enumerate(dl, 0):
        inputs, labels = data
        inputs, labels = [x.to(device) for x in inputs], labels.to(device)
        batch_size = labels.shape[0]
        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = model(inputs)
        loss = criterion(outputs, labels)

        n_correct += ((torch.max(F.softmax(outputs, dim=1), 1)[1].view(labels.size())== labels).sum().item())
        n_total += batch_size

        exp.metric('train loss',loss.item(),log=False)
        print(f"loss = {loss.item()}",end = '\r')
        loss.backward()
        optimizer.step()
        running_loss += loss.item()

    acc = 100.0 * n_correct / n_total
    print("Train Acc",acc)
    exp.metric('train acc',acc,log=False)
    exp.metric('train running loss',running_loss,log=False)
    print('Loss: {}'.format(running_loss))
    print("-------------")


def validate(model,dl,criterion):
    
    n_correct, n_total,running_loss = 0, 0,0
    model.eval()
    with torch.no_grad():
        for i, data in enumerate(dl, 0):
            inputs, labels = data
            inputs, labels = [x.to(device) for x in inputs], labels.to(device)
            batch_size = labels.shape[0]
            # forward 
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            
            n_correct += ((torch.max(F.softmax(outputs, dim=1), 1)[1].view(labels.size())== labels).sum().item())
            n_total += batch_size

            exp.metric('val loss',loss.item(),log=False)
            running_loss += loss.item()


        acc = 100.0 * n_correct / n_total
        print("Val Acc",acc)
        exp.metric('val acc',acc, log=False)
        exp.metric('val running loss',running_loss, log=False)
        print('Val Loss: {}'.format(running_loss))
        print("-------------")

try:

    # loop over the dataset multiple times
    for epoch in range(7):
        train(model,trainloader,optimizer,criterion)
        validate(model,valloader,criterion)
    validate(model,testloader,criterion)
    exp.end()
    print('Finished Training')
except KeyboardInterrupt:
    validate(model,testloader,criterion)
    exp.end()


Train Acc 53.44545666900257
Loss: 755.272955738008
-------------
Val Acc 55.03731343283582
Val Loss: 92.16557067632675
-------------
Train Acc 64.12053258584443
Loss: 697.0707288980484
-------------
Val Acc 68.47014925373135
Val Loss: 82.14385944604874
-------------
Train Acc 73.67437514599392
Loss: 585.704736797139
-------------
Val Acc 76.30597014925372
Val Loss: 66.0655355155468
-------------
Train Acc 80.892314879701
Loss: 467.5735091175884
-------------
Val Acc 79.2910447761194
Val Loss: 63.57440869510174
-------------
Train Acc 84.79327259985985
Loss: 390.52157745929435
-------------
Val Acc 79.4776119402985
Val Loss: 65.49550592154264
-------------
Train Acc 87.47956085026863
Loss: 339.86118906363845
-------------
Val Acc 80.78358208955224
Val Loss: 59.70144883915782
-------------
Train Acc 89.20812894183602
Loss: 285.7252041818356
-------------
Val Acc 79.1044776119403
Val Loss: 58.688639322295785
-------------
Val Acc 83.3644859813084
Val Loss: 54.29779403586872
-------------


In [116]:
del model,trainloader,valloader,testloader
del optimizer

In [117]:
torch.cuda.empty_cache()

In [118]:
import sys
sys.stdout = old_stdout

In [78]:
opt.shape

torch.Size([4, 60, 3200])

In [180]:
a= [[0.0,0.5,0.5],
    [0.5,0.0,0.5],
    [0.5,0.5,0.0]]
b= [[0.5,0.0,0.5],
    [0.0,0.5,0.5],
    [0.5,0.5,0.0]]

In [181]:
a= torch.tensor(a)
b= torch.tensor(b)

In [182]:
a.flatten()

tensor([0.0000, 0.5000, 0.5000, 0.5000, 0.0000, 0.5000, 0.5000, 0.5000, 0.0000])

In [65]:
a_norm

tensor([[0.0000, 0.7071, 0.7071],
        [0.7071, 0.0000, 0.7071],
        [0.7071, 0.7071, 0.0000]])

In [69]:
b_norm

tensor([[0.7071, 0.0000, 0.7071],
        [0.0000, 0.7071, 0.7071],
        [0.7071, 0.7071, 0.0000]])

In [71]:
cos_sim = torch.mm(a_norm, b_norm.transpose(0, 1))

In [None]:
from torch_lr_finder import LRFinder

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
# optimizer = optim.Adadelta(model.parameters(),lr = 0.01,)
lr_finder = LRFinder(model, optimizer, criterion, device="cuda")
lr_finder.range_test(train_dl, val_loader=val_dl, end_lr=0.01, num_iter=100, step_mode="exp")
lr_finder.plot(log_lr=False)
lr_finder.reset()

tensor([[0.5000, 1.0000, 0.5000],
        [1.0000, 0.5000, 0.5000],
        [0.5000, 0.5000, 1.0000]])

In [73]:
i[0]

tensor([[[   2, 7341,    6,  ...,    1,    1,    1],
         [   2,  136,  953,  ...,    1,    1,    1],
         [   2,  631,   20,  ...,    1,    1,    1],
         ...,
         [   2,    6, 7020,  ...,    1,    1,    1],
         [   2,  179, 1679,  ...,    1,    1,    1],
         [   2,  136, 5916,  ...,    1,    1,    1]],

        [[   2,    0,   18,  ...,   55,   11,    3],
         [   2, 1602,    4,  ...,    1,    1,    1],
         [   2,   31,    0,  ...,    1,    1,    1],
         ...,
         [   1,    1,    1,  ...,    1,    1,    1],
         [   1,    1,    1,  ...,    1,    1,    1],
         [   1,    1,    1,  ...,    1,    1,    1]],

        [[   2,   20, 1532,  ...,    1,    1,    1],
         [   2, 3071, 1848,  ...,    1,    1,    1],
         [   2,    6,  176,  ...,    1,    1,    1],
         ...,
         [   1,    1,    1,  ...,    1,    1,    1],
         [   1,    1,    1,  ...,    1,    1,    1],
         [   1,    1,    1,  ...,    1,    1,    1]],

In [102]:
c=0
bat = []
for i in trainloader:
    print(i)
    bat.append(i)
    c+=1
    if c==10:
        break

In [112]:
bat[2]

[[tensor([[[    2,  1500,    63,  ...,     1,     1,     1],
           [    2,    52,  5336,  ...,     1,     1,     1],
           [    2,  6245,     0,  ...,  3033,     8,     3],
           ...,
           [    1,     1,     1,  ...,     1,     1,     1],
           [    1,     1,     1,  ...,     1,     1,     1],
           [    1,     1,     1,  ...,     1,     1,     1]],
  
          [[    2, 33131, 12758,  ...,     3,     1,     1],
           [    2,     0,    18,  ...,     1,     1,     1],
           [    2,     6,     0,  ...,     1,     1,     1],
           ...,
           [    1,     1,     1,  ...,     1,     1,     1],
           [    1,     1,     1,  ...,     1,     1,     1],
           [    1,     1,     1,  ...,     1,     1,     1]],
  
          [[    2,     0,   939,  ...,     1,     1,     1],
           [    2,     6, 13524,  ...,     1,     1,     1],
           [    2,     6,  9296,  ...,     1,     1,     1],
           ...,
           [    2, 20408,  19

In [108]:
for i in data.train_iter:
    print(i)
    break

In [110]:
i.source

tensor([[[    2,     6,     0,  ...,     1,     1,     1],
         [    2,  2401,     0,  ...,     1,     1,     1],
         [    2,   254, 10884,  ...,     1,     1,     1],
         ...,
         [    1,     1,     1,  ...,     1,     1,     1],
         [    1,     1,     1,  ...,     1,     1,     1],
         [    1,     1,     1,  ...,     1,     1,     1]],

        [[    2,     4,   596,  ...,     1,     1,     1],
         [    2,   223,     8,  ...,     1,     1,     1],
         [    2,    31,     6,  ...,     1,     1,     1],
         ...,
         [    1,     1,     1,  ...,     1,     1,     1],
         [    1,     1,     1,  ...,     1,     1,     1],
         [    1,     1,     1,  ...,     1,     1,     1]],

        [[    2,   286,  7310,  ...,     1,     1,     1],
         [    2,   179, 13320,  ...,     6,  1675,     3],
         [    2,     4,   152,  ...,     1,     1,     1],
         ...,
         [    1,     1,     1,  ...,     1,     1,     1],
         [

In [113]:

class DLND_Dataset(Dataset):
    def __init__(self, data):
        self.data = data
        self.fields = self.data.fields

    def __len__(self):
        return len(self.data.examples)

    def __getitem__(self, idx):
        source = (
            self.fields["source"].process([self.data.examples[idx].source]).squeeze()
        )

        target = (
            self.fields["target"].process([self.data.examples[idx].target]).squeeze()
        )
        label = self.fields["label"].process([self.data.examples[idx].label]).squeeze()

        return [source, target], label

In [114]:
new_data = DLND_Dataset(data.train)

In [117]:
new_data[4]

([tensor([[   2,    0,   18,  ...,    1,    1,    1],
          [   2,  136,  939,  ...,    1,    1,    1],
          [   2, 5873,  256,  ...,    1,    1,    1],
          ...,
          [   2,   10, 2178,  ...,    1,    1,    1],
          [   2,  223,  240,  ...,    1,    1,    1],
          [   1,    1,    1,  ...,    1,    1,    1]]),
  tensor([[    2,  8261,     0,  ...,     3,     1,     1],
          [    2,   302, 13160,  ...,     1,     1,     1],
          [    2,  5900,     0,  ...,  5935,  9162,     3],
          ...,
          [    1,     1,     1,  ...,     1,     1,     1],
          [    1,     1,     1,  ...,     1,     1,     1],
          [    1,     1,     1,  ...,     1,     1,     1]])],
 tensor(0))

In [121]:
data.train.fields["label"].process([data.train.examples[1].label])

tensor([0])

In [125]:
for i in range(5000):
    if data.train.examples[i].label!='Non-Novel':
        print(i)

IndexError: list index out of range

In [129]:
c=0
for i in data.train_iter:
    print(i.label)
    c+=1
    if c==10:
        break


In [130]:
i.label

tensor([1, 0, 0, 1], device='cuda:0')

In [131]:
a = [i.label for i in data.train.examples]

In [133]:
a.count('Non-Novel')

2243

In [135]:
a.count('Novel')

2038