In [3]:
from src.defaults import *
from src.datasets.novelty import *

In [4]:
# download_models_from_neptune("NLI-81")
field = load_field('NLI-87')
# field = None


dataset_conf = {'dataset': 'dlnd', 'max_num_sent': 60,"sent_tokenizer":"spacy","batch_size":4,"device":"cuda"}
# dataset_conf = {'dataset': 'dlnd', 'max_num_sent': 50,"sent_tokenizer":"spacy", "tokenizer":'spacy',"max_len":50,"batch_size":32,"device":"cuda"}
model_conf = {'results_dir': 'results', 'device': 'cuda', 'dropout': 0.3, 'dataset': 'dlnd', 'hidden_size': 400, 'use_glove': False, "attention_layer_param":200,"num_layers":1}

In [5]:
data = dlnd(dataset_conf,sentence_field = field)


In [6]:
for i in data.train_iter:
    print(i)
    break


[torchtext.data.batch.Batch of size 4]
	[.source]:[torch.cuda.LongTensor of size 4x60x50 (GPU 0)]
	[.target]:[torch.cuda.LongTensor of size 4x60x50 (GPU 0)]
	[.label]:[torch.cuda.LongTensor of size 4 (GPU 0)]


In [7]:
from src.model.nli_models import *
from src.model.novelty_models import *



def load_encoder1(enc_data):
    if enc_data['options'].get("attention_layer_param",0)==0:
        model = bilstm_snli(enc_data["options"])
    elif enc_data['options'].get("r",0)==0:
        model = attn_bilstm_snli(enc_data["options"])
    else:
        model = struc_attn_snli(enc_data["options"])
    return model

def load_encoder(_id):
    model_path = os.path.join('./results/',_id,"model.pt")
    model_data = torch.load(model_path)
    return model_data

model_data = load_encoder('NLI-87')

model_conf["encoder_dim"] = model_data["options"]["hidden_size"]
model_data['options']["use_glove"] = False

model = attn_bilstm_snli(model_data['options'])

# dan = DAN(model_conf,model.encoder)

# dan = dan.cuda()

In [8]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

In [19]:


class Attention(nn.Module):
    def __init__(self, conf):
        super(Attention, self).__init__()
        self.Ws = nn.Linear(
            2 * conf["hidden_size"],
            conf["attention_layer_param"],
            bias=False,
        )
        self.Wa = nn.Linear(conf["attention_layer_param"], 1, bias=False)

    def forward(self, hid):
        opt = self.Ws(hid)
        opt = F.tanh(opt)
        opt = self.Wa(opt)
        opt = F.softmax(opt)
        return opt


class HAN_DOC(nn.Module):
    def __init__(self, conf, encoder):
        super(HAN_DOC, self).__init__()
        self.conf = conf
        self.encoder = encoder

        self.translate = nn.Linear(2*self.conf["encoder_dim"], self.conf["hidden_size"])
        self.act = nn.ReLU()
        self.dropout = nn.Dropout(conf["dropout"])
        self.template = nn.Parameter(torch.zeros((1)), requires_grad=True)
        self.lstm_layer = nn.LSTM(
            input_size=self.conf["hidden_size"],
            hidden_size=self.conf["hidden_size"],
            num_layers=self.conf["num_layers"],
            bidirectional=True,
        )
        self.attention = Attention(conf)

    def forward(self, inp):
        batch_size, num_sent, max_len = inp.shape
        x = inp.view(-1, max_len)

        x_padded_idx = x.sum(dim=1) != 0
        x_enc = []
        for sub_batch in x[x_padded_idx].split(64):
            x_enc.append(self.encoder(sub_batch, None))
        x_enc = torch.cat(x_enc, dim=0)

        x_enc_t = torch.zeros((batch_size * num_sent, x_enc.size(1))).to(
            self.template.device
        )

        x_enc_t[x_padded_idx] = x_enc
        x_enc_t = x_enc_t.view(batch_size, num_sent, -1)

        print(x_enc_t.shape)

        embedded = self.dropout(self.translate(x_enc_t))
        embedded = self.act(embedded)

        all_, (_, _) = self.lstm_layer(embedded)
        attn = self.attention(all_)

        cont = torch.bmm(attn.permute(0, 2, 1), all_)
        cont = cont.squeeze(1)
        return cont


class HAN(nn.Module):
    def __init__(self, conf, sent_enc, doc_enc=None):
        super(HAN,self).__init__()
        self.conf = conf
        if doc_enc == None:
            self.encoder = HAN_DOC(conf, sent_enc)
        elif sent_enc == None:
            self.encoder = doc_enc
        self.act = nn.ReLU()
        self.dropout = nn.Dropout(conf["dropout"])
        self.fc = nn.Linear(4*conf["hidden_size"], 2)

    def forward(self, x0, x1):
        x0_enc = self.encoder(x0)
        x1_enc = self.encoder(x1)
        cont = torch.cat(
            [
                x0_enc,
                x1_enc,
                torch.abs(x0_enc - x1_enc),
                x0_enc * x1_enc,
            ],
            dim=1,
        )
        cont = self.dropout(self.act(cont))
        cont = self.fc(cont)
        return cont


In [20]:
han = HAN(model_conf,model.encoder)

In [21]:
han(i.source.cpu(),i.target.cpu())

torch.Size([4, 60, 800])
torch.Size([4, 60, 800])


RuntimeError: mat1 and mat2 shapes cannot be multiplied (4x3200 and 400x2)

In [35]:
ha = torch.rand([4,60,400])
hb = torch.rand([4,60,400])
x = ha * hb

In [36]:
x.shape

torch.Size([4, 60, 400])

In [18]:
model.conf

{'attention_layer_param': 200,
 'char_embedding_dim': 100,
 'dataset': 'snli',
 'device': 'cuda',
 'dropout': 0.3,
 'embedding_dim': 300,
 'fcs': 1,
 'hidden_size': 400,
 'max_word_len': 10,
 'num_layers': 1,
 'padding_idx': 1,
 'results_dir': 'results',
 'use_char_emb': False,
 'use_glove': False,
 'vocab_size': 33934}