In [2]:
from src.defaults import *
from src.datasets.novelty import *

In [18]:
load_nli = 'NLI-87'

# download_models_from_neptune("NLI-81")
field = load_field(load_nli)
# field = None


dataset_conf = {'dataset': 'dlnd', 'max_num_sent': 60,"sent_tokenizer":"spacy","batch_size":4,"device":"cuda"}
# dataset_conf = {'dataset': 'dlnd', 'max_num_sent': 50,"sent_tokenizer":"spacy", "tokenizer":'spacy',"max_len":50,"batch_size":32,"device":"cuda"}
model_conf = {'results_dir': 'results', 'device': 'cuda', 'dropout': 0.3, 'dataset': 'dlnd', 'hidden_size': 400, 'use_glove': False,"num_filters":95,"filter_sizes":[3,5,7]}

In [19]:
data = dlnd(dataset_conf,sentence_field = field)


In [20]:
for i in data.train_iter:
    print(i)
    break


[torchtext.data.batch.Batch of size 4]
	[.source]:[torch.cuda.LongTensor of size 4x60x50 (GPU 0)]
	[.target]:[torch.cuda.LongTensor of size 4x60x50 (GPU 0)]
	[.label]:[torch.cuda.LongTensor of size 4 (GPU 0)]


In [21]:
from src.model.nli_models import *
from src.model.novelty_models import *



def load_encoder(enc_data):
    if enc_data['options'].get("attention_layer_param",0)==0:
        model = bilstm_snli(enc_data["options"])
    elif enc_data['options'].get("r",0)==0:
        model = attn_bilstm_snli(enc_data["options"])
    else:
        model = struc_attn_snli(enc_data["options"])
    return model

nli_model_data = load_encoder_data(load_nli)
nli_model_data['options']["use_glove"] = False

encoder = load_encoder(nli_model_data).encoder

model_conf["encoder_dim"] = nli_model_data["options"]["hidden_size"]


# encoder = load_encoder(nli_model_data['options']).encoder

# dan = DAN(model_conf,model.encoder)

# dan = dan.cuda()

In [22]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

class Accumulator(nn.Module):
    def __init__(self, conf, encoder):
        super(Accumulator, self).__init__()
        self.conf = conf
        self.encoder = encoder
        self.template = nn.Parameter(torch.zeros((1)), requires_grad=True)

    def forward(self, src, trg):
        batch_size, num_sent, max_len = src.shape

        x = src.view(-1, max_len)
        y = trg.view(-1, max_len)

        x_padded_idx = x.sum(dim=1) != 0
        y_padded_idx = y.sum(dim=1) != 0

        x_enc = []

        for sub_batch in x[x_padded_idx].split(64):
            x_enc.append(self.encoder(sub_batch, None))
        x_enc = torch.cat(x_enc, dim=0)
        y_enc = []

        for sub_batch in y[y_padded_idx].split(64):
            y_enc.append(self.encoder(sub_batch, None))
        y_enc = torch.cat(y_enc, dim=0)

        x_enc_t = torch.zeros((batch_size * num_sent, x_enc.size(1))).to(
            self.template.device
        )
        x_enc_t[x_padded_idx] = x_enc

        y_enc_t = torch.zeros((batch_size * num_sent, y_enc.size(1))).to(
            self.template.device
        )
        y_enc_t[y_padded_idx] = y_enc

        x_enc_t = x_enc_t.view(batch_size, num_sent, -1)
        y_enc_t = y_enc_t.view(batch_size, num_sent, -1)
        eps = 1e-8

        a_n = x_enc_t.norm(dim=2)[:, None]
        a_norm = x_enc_t / torch.max(
            eps * torch.ones_like(a_n.permute(0, 2, 1)), a_n.permute(0, 2, 1)
        )

        b_n = y_enc_t.norm(dim=2)[:, None]
        b_norm = y_enc_t / torch.max(
            eps * torch.ones_like(b_n.permute(0, 2, 1)), b_n.permute(0, 2, 1)
        )
        cos_sim = torch.bmm(a_norm, b_norm.transpose(1, 2))
        y_sim = torch.argmax(cos_sim, dim=2)
        dummy = y_sim.unsqueeze(2).expand(y_sim.size(0), y_sim.size(1), y_enc_t.size(2))
        matched_y = torch.gather(y_enc_t, 1, dummy)

        rdv = torch.cat(
            [
                x_enc_t,
                matched_y,
                torch.abs(x_enc_t - matched_y),
                x_enc_t * matched_y,
            ],
            dim=2,
        )
        return rdv


class RDV_CNN(nn.Module):
    def __init__(self, conf,encoder):
        super(RDV_CNN, self).__init__()
        self.accumulator = Accumulator(conf,encoder)
        self.linear = nn.Linear(conf["num_filters"] * len(conf["filter_sizes"]), 2)
        self.convs1 = nn.ModuleList(
            [
                nn.Conv2d(
                    1,
                    conf["num_filters"],
                    (K, conf["encoder_dim"] * 2 * 4),
                )
                for K in conf["filter_sizes"]
            ]
        )
        self.act = nn.ReLU()
        self.dropout = nn.Dropout(conf["dropout"])

    def forward(self, x, y):
        rdv = self.accumulator(x, y)
        opt = [self.act(conv(rdv.unsqueeze(1))).squeeze(3) for conv in self.convs1]
        opt = [F.max_pool1d(i, i.size(2)).squeeze(2) for i in opt]
        opt = torch.cat(opt, 1)
        opt = self.act(opt)
        opt = self.linear(opt)
        return opt



In [23]:
model = RDV_CNN(model_conf,encoder)

In [24]:
model(i.source.cpu(),i.target.cpu())

ValueError: too many values to unpack (expected 3)

Attn_Encoder(
  (embedding): Embedding(33934, 300, padding_idx=1)
  (translate): Linear(in_features=300, out_features=400, bias=True)
  (relu): ReLU()
  (dropout): Dropout(p=0.3, inplace=False)
  (lstm_layer): LSTM(400, 400, batch_first=True, dropout=0.3, bidirectional=True)
  (attention): Attention(
    (Ws): Linear(in_features=800, out_features=200, bias=False)
    (Wa): Linear(in_features=200, out_features=1, bias=False)
  )
)

In [36]:
x.shape

torch.Size([4, 60, 400])

In [18]:
model.conf

{'attention_layer_param': 200,
 'char_embedding_dim': 100,
 'dataset': 'snli',
 'device': 'cuda',
 'dropout': 0.3,
 'embedding_dim': 300,
 'fcs': 1,
 'hidden_size': 400,
 'max_word_len': 10,
 'num_layers': 1,
 'padding_idx': 1,
 'results_dir': 'results',
 'use_char_emb': False,
 'use_glove': False,
 'vocab_size': 33934}