In [2]:
from src.datasets.nli import *
from src.model.nli_models import *
from src.utils.nli_utils import *

In [4]:
snli_conf = {"batch_size":128,"max_len":40,"device":'cuda',"tokenizer":'spacy'}
dataset = snli_module(snli_conf)

In [5]:
dataset.prepare_data()

downloading snli_1.0.zip
snli_1.0.zip: 100%|██████████| 94.6M/94.6M [00:07<00:00, 12.6MB/s]
extracting
.vector_cache/glove.840B.300d.zip: 2.18GB [17:08, 2.12MB/s]                            
100%|█████████▉| 2195697/2196017 [05:11<00:00, 7666.82it/s]

In [49]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import numpy as np

class Attention(nn.Module):
    def __init__(self, conf):
        super(Attention, self).__init__()
        self.Ws = nn.Linear(
            2 * conf["hidden_size"],
            conf["attention_layer_param"],
            bias=False,
        )
        self.Wa = nn.Linear(conf["attention_layer_param"], 1, bias=False)

    def forward(self, hid):
        opt = self.Ws(hid)
        opt = F.tanh(opt)
        opt = self.Wa(opt)
        opt = F.softmax(opt)
        return opt


class Attn_Encoder(nn.Module):
    def __init__(self, conf):
        super(Attn_Encoder, self).__init__()
        self.embedding = nn.Embedding(
            num_embeddings=conf["vocab_size"],
            embedding_dim=conf["embedding_dim"],
            padding_idx=conf["padding_idx"],
        )
        self.translate = nn.Linear(conf["embedding_dim"], conf["hidden_size"])
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(p=conf["dropout"])

        if conf["use_glove"]:
            self.embedding = nn.Embedding.from_pretrained(
                torch.load(".vector_cache/{}_vectors.pt".format(conf["dataset"]))
            )
        self.lstm_layer = nn.LSTM(
            input_size=conf["hidden_size"],
            hidden_size=conf["hidden_size"],
            num_layers=conf["num_layers"],
            dropout=conf["dropout"],
            bidirectional=True,
            batch_first=True
        )
        self.attention = Attention(conf)

    def forward(self, inp):
        batch_size = inp.shape[0]
        embedded = self.embedding(inp)
        embedded = self.relu(self.translate(embedded))
        all_, (_, _) = self.lstm_layer(embedded)
        attn = self.attention(all_)
        cont = torch.bmm(attn.permute(0, 2, 1),all_)
        cont = cont.squeeze(1)
        return cont


class Attn_encoder_snli(nn.Module):
    def __init__(self, conf):
        super(Attn_encoder_snli, self).__init__()
        self.conf = conf
        self.encoder = Attn_Encoder(conf)
        self.fc_in = nn.Linear(
            2 * 4 * self.conf["hidden_size"],
            self.conf["hidden_size"],
        )
        self.fcs = nn.ModuleList(
            [
                nn.Linear(self.conf["hidden_size"], self.conf["hidden_size"])
                for i in range(self.conf["fcs"])
            ]
        )
        self.fc_out = nn.Linear(self.conf["hidden_size"], 3)
        self.relu = nn.ReLU()
        self.softmax = nn.Softmax(dim=2)
        self.dropout = nn.Dropout(p=self.conf["dropout"])

    def forward(self, x0, x1):
        x0_enc = self.encoder(x0)
        x0_enc = self.dropout(x0_enc)
        x1_enc = self.encoder(x1)
        x1_enc = self.dropout(x1_enc)
        cont = torch.cat(
            [x0_enc, x1_enc, torch.abs(x0_enc - x1_enc), x0_enc * x1_enc], dim=1
        )
        opt = self.fc_in(cont)
        opt = self.dropout(opt)
        for fc in self.fcs:
            opt = fc(opt)
            opt = self.dropout(opt)
            opt = self.relu(opt)
        opt = self.fc_out(opt)
        return opt



In [50]:
model_conf = {
    "hidden_size":300,
    "embedding_dim":300,
    "dropout":0.3,
    "use_glove":True,
    "num_layers":1,
    "dataset":"snli",
    "fcs":1,
    "vocab_size":dataset.vocab_size(),
    "tokenizer":"spacy",
    "padding_idx":dataset.padding_idx(),
    "attention_layer_param":200
}

hparams = {
    "optimizer_base": {
        "optim": "adamw",
        "lr": 0.0010039910781394373,
        "scheduler": "const",
    },
    "optimizer_tune": {
        "optim": "adam",
        "lr": 0.0010039910781394373,
        "weight_decay": 0.1,
        "scheduler": "lambda",
    },
    "switch_epoch": 5,
}

model = Attn_encoder_snli(model_conf)
# model = SNLI_model(attn_bilstm_snli,model_conf,hparams=hparams)

In [34]:
for i in dataset.train_dataloader():
    print(i)
    break



[torchtext.data.batch.Batch of size 128 from SNLI]
	[.premise]:[torch.cuda.LongTensor of size 128x40 (GPU 0)]
	[.hypothesis]:[torch.cuda.LongTensor of size 128x40 (GPU 0)]
	[.label]:[torch.cuda.LongTensor of size 128 (GPU 0)]


In [51]:
opt = model(i.premise.cpu(),i.hypothesis.cpu())

torch.Size([128, 40, 600])
torch.Size([128, 40, 1])
torch.Size([128, 600])
torch.Size([128, 40, 600])
torch.Size([128, 40, 1])
torch.Size([128, 600])
torch.Size([128, 600])


IndexError: Dimension out of range (expected to be in range of [-2, 1], but got 2)

In [6]:
EPOCHS = 20

tensorboard_logger = TensorBoardLogger("lightning_logs")
lr_logger = LearningRateLogger(logging_interval="step")

trainer = pl.Trainer(
    gpus=1,
    max_epochs=EPOCHS, 
    progress_bar_refresh_rate=10,
    profiler=False,
    auto_lr_find=False,
    callbacks=[lr_logger, SwitchOptim()],
    logger=[tensorboard_logger],
    row_log_interval=2,
) 
trainer.fit(model, dataset)
trainer.test(model, datamodule=dataset)


GPU available: True, used: True
TPU available: False, using: 0 TPU cores
CUDA_VISIBLE_DEVICES: [0]

  | Name  | Type        | Params
--------------------------------------
0 | model | BiLSTM_snli | 14 M  


HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validation sanity check', layout=Layout…



HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Training', layout=Layout(flex='2'), max…

In [10]:
for i in dataset.train_dataloader():
    print(i)
    break


[torchtext.data.batch.Batch of size 128 from SNLI]
	[.premise]:[torch.cuda.LongTensor of size 128x40 (GPU 0)]
	[.hypothesis]:[torch.cuda.LongTensor of size 128x40 (GPU 0)]
	[.label]:[torch.cuda.LongTensor of size 128 (GPU 0)]


In [9]:
i.label

tensor([0, 1, 2, 0, 0, 2, 2, 1, 2, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 2, 1, 1, 0, 1,
        0, 1, 2, 0, 2, 2, 1, 1, 1, 1, 2, 2, 0, 2, 0, 0, 2, 2, 2, 0, 0, 1, 1, 1,
        0, 2, 2, 0, 1, 1, 0, 2, 2, 2, 2, 1, 1, 1, 1, 2, 2, 1, 2, 1, 2, 0, 0, 2,
        0, 2, 0, 1, 1, 2, 1, 1, 0, 2, 1, 0, 2, 2, 0, 0, 1, 0, 0, 0, 1, 2, 0, 2,
        2, 0, 0, 1, 2, 0, 2, 0, 1, 1, 1, 1, 0, 2, 1, 2, 0, 1, 1, 2, 0, 0, 2, 1,
        0, 2, 1, 0, 1, 0, 0, 1], device='cuda:0', dtype=torch.int32)

In [11]:
opt = model(i.premise,i.hypothesis)

In [15]:
opt = opt.squeeze(0)

In [24]:
torch.rand([2, 128, 300])[-2:].transpose(0,1).contiguous().view(128,-1).shape

torch.Size([128, 600])