In [1]:
from src.datasets.nli import *
from src.model.nli_models import *
from src.utils.nli_utils import *

In [2]:
snli_conf = {"batch_size":128,"max_len":100,"device":'cuda',"tokenizer":'spacy',"use_char_emb":False,"max_word_len":10}
dataset = snli_module(snli_conf)

In [3]:
dataset.prepare_data()


Field class will be retired soon and moved to torchtext.legacy. Please see the most recent release notes for further information.


LabelField class will be retired soon and moved to torchtext.legacy. Please see the most recent release notes for further information.

downloading snli_1.0.zip
snli_1.0.zip: 100%|██████████| 94.6M/94.6M [00:04<00:00, 19.9MB/s]
extracting

Example class will be retired soon and moved to torchtext.legacy. Please see the most recent release notes for further information.

.vector_cache/glove.840B.300d.zip: 2.18GB [06:52, 5.27MB/s]                            

BucketIterator class will be retired soon and moved to torchtext.legacy. Please see the most recent release notes for further information.



In [22]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import numpy as np
        


class ESIM(nn.Module):
    def __init__(self,conf):
        super(ESIM, self).__init__()
        self.conf = conf
        self.embedding = nn.Embedding(
            num_embeddings=conf["vocab_size"],
            embedding_dim=conf["embedding_dim"],
            padding_idx=conf["padding_idx"],
        )
        self.translate = nn.Linear(conf["embedding_dim"],conf["hidden_size"])
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(p=conf["dropout"])

        if conf["use_glove"]:
            self.embedding = nn.Embedding.from_pretrained(
                torch.load(".vector_cache/{}_vectors.pt".format(conf["dataset"]))
            )
        self.lstm_layer = nn.LSTM(
            input_size=conf["hidden_size"],
            hidden_size=conf["hidden_size"],
            num_layers=conf["num_layers"],
            dropout=conf["dropout"],
            bidirectional=True,
            batch_first=True,
        )
        
        self.projection = nn.Sequential(nn.Linear(4*2*conf["hidden_size"],conf["hidden_size"]),nn.ReLU())
        self.composition = nn.LSTM(
            input_size=conf["hidden_size"],
            hidden_size=conf["hidden_size"],
            num_layers=conf["num_layers"],
            dropout=conf["dropout"],
            bidirectional=True,
            batch_first=True
            )
        self.classification = nn.Sequential(nn.Dropout(p=conf["dropout"]),
                                             nn.Linear(2*4*conf["hidden_size"],
                                                       conf["hidden_size"]),
                                             nn.Tanh(),
                                             nn.Dropout(p=conf["dropout"]),
                                             nn.Linear(conf["hidden_size"],
                                                       2))

    def forward(self,x0,x1):
        x0_enc = self.encode(x0)
        x1_enc = self.encode(x1)

        x0_att,x1_att = self.softmax_attention(x0_enc,x1_enc)

        enh_x0 = torch.cat([x0_enc,x0_att,x0_enc - x0_att,x0_enc * x0_att],dim=-1)
        enh_x1 = torch.cat([x1_enc,x1_att,x1_enc - x1_att,x1_enc * x1_att],dim=-1)

        proj_x0 = self.dropout(self.projection(enh_x0))
        proj_x1 = self.dropout(self.projection(enh_x1))

        comp_x0,(_,_) = self.composition(proj_x0)
        comp_x1,(_,_) = self.composition(proj_x1)


        avg_x0 = torch.mean(comp_x0,dim=1)
        avg_x1 = torch.mean(comp_x1,dim=1)

        max_x0 = torch.max(comp_x0,dim=1).values
        max_x1 = torch.max(comp_x1,dim=1).values

        v = torch.cat([avg_x0, avg_x1, max_x0, max_x1], dim=1)
        return self.classification(v)


    def softmax_attention(self,x,y):
        similarity_matrix = x.bmm(y.transpose(2, 1).contiguous())
        x_att = F.softmax(similarity_matrix,dim=1)
        y_att = F.softmax(similarity_matrix.transpose(1, 2).contiguous(),dim=1)
        x_att_emb = x_att.bmm(y)
        y_att_emb = y_att.bmm(x)
        return x_att_emb,y_att_emb
        

    def encode(self,x):
        embedded = self.embedding(x)
        embedded = self.relu(self.translate(embedded))
        all_, (_, _) = self.lstm_layer(embedded)
        return all_







        


In [23]:

model_conf = {"dropout":0.2,"hidden_size":300,"num_layers":1,"use_glove":False}
model_conf["vocab_size"] = dataset.vocab_size()
model_conf["padding_idx"] = dataset.padding_idx()
model_conf["embedding_dim"] = 300

hparams = {
    "optimizer_base": {
        "optim": "adamw",
        "lr": 0.0010039910781394373,
        "scheduler": "const",
    },
    "optimizer_tune": {
        "optim": "adam",
        "lr": 0.0010039910781394373,
        "weight_decay": 0.1,
        "scheduler": "lambda",
    },
    "switch_epoch": 5,
}

model = ESIM(model_conf)
# model = SNLI_model(attn_bilstm_snli,model_conf,hparams=hparams)


dropout option adds dropout after all but last recurrent layer, so non-zero dropout expects num_layers greater than 1, but got dropout=0.2 and num_layers=1



In [24]:
def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)
count_parameters(model)

14601302

In [25]:
train_dl,val_dl,test_dl = dataset.data.get_dataloaders()

In [26]:
for i in train_dl:
    print(i)
    break

[[tensor([[   2,    4,   46,  ...,    1,    1,    1],
        [   2,    4,   46,  ...,    1,    1,    1],
        [   2,    4,   46,  ...,    1,    1,    1],
        ...,
        [   2, 1518, 1134,  ...,    1,    1,    1],
        [   2,    6, 2169,  ...,    1,    1,    1],
        [   2,    6, 2169,  ...,    1,    1,    1]]), tensor([[ 2,  4, 46,  ...,  1,  1,  1],
        [ 2,  4, 46,  ...,  1,  1,  1],
        [ 2,  4, 46,  ...,  1,  1,  1],
        ...,
        [ 2,  4, 33,  ...,  1,  1,  1],
        [ 2,  4, 33,  ...,  1,  1,  1],
        [ 2,  4, 33,  ...,  1,  1,  1]])], tensor([2, 1, 0, 2, 0, 1, 1, 0, 2, 2, 1, 2, 2, 1, 0, 2, 1, 0, 0, 1, 2, 2, 1, 0,
        1, 0, 2, 2, 1, 0, 2, 0, 1, 1, 2, 0, 1, 0, 2, 2, 0, 1, 2, 0, 0, 0, 2, 1,
        1, 0, 2, 1, 0, 2, 1, 2, 0, 2, 1, 0, 0, 1, 2, 0, 2, 1, 1, 0, 2, 1, 0, 2,
        2, 1, 0, 2, 1, 0, 1, 0, 2, 2, 1, 0, 0, 1, 2, 1, 0, 2, 1, 0, 1, 2, 1, 2,
        0, 2, 2, 0, 2, 1, 1, 0, 0, 2, 0, 1, 1, 0, 2, 2, 0, 1, 1, 2, 0, 0, 2, 1,
        1, 2, 0

In [27]:
model(i[0][0],i[0][1])

torch.Size([128, 100, 600])
torch.Size([128, 2400])


tensor([[ 0.0282, -0.0408],
        [ 0.0202, -0.0525],
        [ 0.0224, -0.0503],
        [ 0.0366, -0.0530],
        [ 0.0353, -0.0414],
        [ 0.0398, -0.0516],
        [ 0.0286, -0.0404],
        [ 0.0290, -0.0517],
        [ 0.0377, -0.0596],
        [ 0.0204, -0.0438],
        [ 0.0305, -0.0502],
        [ 0.0277, -0.0437],
        [ 0.0196, -0.0325],
        [ 0.0272, -0.0532],
        [ 0.0174, -0.0504],
        [ 0.0371, -0.0332],
        [ 0.0281, -0.0441],
        [ 0.0380, -0.0396],
        [ 0.0398, -0.0366],
        [ 0.0326, -0.0413],
        [ 0.0293, -0.0479],
        [ 0.0269, -0.0540],
        [ 0.0330, -0.0464],
        [ 0.0171, -0.0616],
        [ 0.0361, -0.0551],
        [ 0.0270, -0.0508],
        [ 0.0408, -0.0427],
        [ 0.0364, -0.0420],
        [ 0.0377, -0.0497],
        [ 0.0309, -0.0478],
        [ 0.0342, -0.0489],
        [ 0.0241, -0.0464],
        [ 0.0353, -0.0432],
        [ 0.0265, -0.0490],
        [ 0.0292, -0.0527],
        [ 0.0409, -0

In [23]:
from torch_lr_finder import LRFinder

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.00001)
lr_finder = LRFinder(model, optimizer, criterion, device="cuda")
lr_finder.range_test(train_dl, val_loader=val_dl, end_lr=0.01, num_iter=100, step_mode="exp")
lr_finder.plot(log_lr=False)
lr_finder.reset()

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

TypeError: forward() missing 1 required positional argument: 'x1'

In [62]:
model.cpu()
model(i[0][0].cpu(),i[0][1].cpu())

RuntimeError: mat1 and mat2 shapes cannot be multiplied (128x400 and 2400x300)

In [46]:
i[0][0]

tensor([[   2,    4,   46,  ...,    1,    1,    1],
        [   2,    4,   46,  ...,    1,    1,    1],
        [   2,    4,   46,  ...,    1,    1,    1],
        ...,
        [   2, 1518, 1134,  ...,    1,    1,    1],
        [   2,    6, 2169,  ...,    1,    1,    1],
        [   2,    6, 2169,  ...,    1,    1,    1]])

In [17]:
weighted_sum = x_att.bmm(y)

In [26]:
dataset.__dir__()

['_train_transforms',
 '_val_transforms',
 '_test_transforms',
 'dims',
 '_has_prepared_data',
 '_has_setup_fit',
 '_has_setup_test',
 'conf',
 'batch_size',
 'data',
 '__module__',
 '__init__',
 'prepare_data',
 'train_dataloader',
 'val_dataloader',
 'test_dataloader',
 'vocab_size',
 'char_vocab_size',
 'padding_idx',
 'charpadding_idx',
 'out_dim',
 'labels',
 '__doc__',
 'setup',
 '__annotations__',
 'name',
 'train_transforms',
 'val_transforms',
 'test_transforms',
 'size',
 'has_prepared_data',
 'has_setup_fit',
 'has_setup_test',
 'transfer_batch_to_device',
 'add_argparse_args',
 'from_argparse_args',
 'get_init_arguments_and_types',
 '__dict__',
 '__weakref__',
 '__repr__',
 '__hash__',
 '__str__',
 '__getattribute__',
 '__setattr__',
 '__delattr__',
 '__lt__',
 '__le__',
 '__eq__',
 '__ne__',
 '__gt__',
 '__ge__',
 '__new__',
 '__reduce_ex__',
 '__reduce__',
 '__subclasshook__',
 '__init_subclass__',
 '__format__',
 '__sizeof__',
 '__dir__',
 '__class__']

In [27]:
dataset.embedding_dim

AttributeError: 'SNLIDataModule' object has no attribute 'embedding_dim'