In [2]:
import sys

sys.path.append(".")
import joblib
import pickle
import argparse
from lang import *
from snli.bilstm.bilstm import *
from snli.attn_enc.attn_enc import *
from joblib import Memory
import shutil
import pytorch_lightning as pl
from pytorch_lightning.callbacks import LearningRateLogger
from pytorch_lightning.profiler import AdvancedProfiler
from pytorch_lightning.loggers import NeptuneLogger, TensorBoardLogger
from pytorch_lightning.metrics import Accuracy
from utils.load_models import load_bilstm_encoder, load_attn_encoder
from utils.save_models import save_model, save_model_neptune
from novelty.train_utils import *
from datamodule import *
import os
from utils.keys import NEPTUNE_API

In [3]:
model_id = "SNLI-12"
encoder, Lang = load_attn_encoder(model_id)

In [4]:
data_module = dlnd_data_module(Lang, use_nltk=True)

In [10]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim


class Transformer_conf:
    num_sent = 100
    sent_len = 100
    encoder_dim = 400
    hidden_size = 768
    activation = "relu"
    dropout = 0.3
    transformer_max_len = num_sent * 2 + 1
    n_heads = 6
    sub_enc_layer = 1

    def __init__(self, num_sent, encoder, **kwargs):
        self.num_sent = num_sent
        self.encoder = encoder
        for k, v in kwargs.items():
            setattr(self, k, v)


class Transformer_novelty(nn.Module):
    def __init__(self, conf):
        super(Transformer_novelty, self).__init__()
        self.conf = conf
        self.sent_len = conf.sent_len
        self.num_sent = conf.num_sent
        self.encoder = conf.encoder
        del self.conf.encoder
        self.translate = nn.Linear(2 * self.conf.encoder_dim, self.conf.hidden_size)
        self.template = nn.Parameter(torch.zeros((1)), requires_grad=True)
        if self.conf.activation.lower() == "relu".lower():
            self.act = nn.ReLU()
        elif self.conf.activation.lower() == "tanh".lower():
            self.act = nn.Tanh()
        elif self.conf.activation.lower() == "leakyrelu".lower():
            self.act = nn.LeakyReLU()
        self.dropout = nn.Dropout(conf.dropout)

        self.pos_embedding = nn.Embedding(
            num_embeddings=self.conf.transformer_max_len,
            embedding_dim=self.conf.hidden_size,
        )
        self.register_buffer(
            "position_ids", torch.arange(self.conf.transformer_max_len).expand((1, -1))
        )

        self.transformer = nn.TransformerEncoder(
            nn.TransformerEncoderLayer(
                d_model=self.conf.hidden_size, nhead=self.conf.n_heads
            ),
            self.conf.sub_enc_layer,
        )

        self.LayerNorm = nn.LayerNorm(self.conf.hidden_size)
        self.pooler = nn.Linear(self.conf.hidden_size, self.conf.hidden_size)

        self.translate_trans = nn.Linear(self.conf.hidden_size, self.conf.hidden_size)
        self.template = nn.Parameter(torch.zeros((1)), requires_grad=True)
        self.dropout = nn.Dropout(p=0.3)
        self.cls = nn.Linear(self.conf.hidden_size, 2)

    def encode_sent(self, inp):
        batch_size, _, _ = inp.shape
        x = inp.view(-1, self.sent_len)

        x_padded_idx = x.sum(dim=1) != 0
        x_enc = []
        for sub_batch in x[x_padded_idx].split(64):
            x_enc.append(self.encoder(sub_batch)[0])
        x_enc = torch.cat(x_enc, dim=0)
        encoder_dim = x_enc.shape[0]

        x_enc = x_enc.view(batch_size,-1,encoder_dim)
        print(x_enc.shape)


    def forward(self, x0, x1):
        batch_size, _, _ = x0.shape
        x0_enc = self.encode_sent(x0).permute(1, 0, 2)
        sep_token = torch.zeros((batch_size, 1, self.conf.hidden_size)).to(
            self.template.device
        )
        x1_enc = self.encode_sent(x1).permute(1, 0, 2)
        emb = torch.cat([x0_enc, sep_token, x1_enc], dim=1)
        emb = emb.permute(1, 0, 2)
        # print(emb.shape)

        position_ids = self.position_ids.expand(batch_size, -1).transpose(0, 1)
        # print(position_ids.shape)

        pos_embedding = self.pos_embedding(position_ids)
        # print(pos_embedding.shape)
        emb = emb + pos_embedding

        emb = self.LayerNorm(emb)
        emb = self.dropout(emb)

        opt = self.transformer(emb)[:1, :, :]
        opt = self.pooler(opt)
        opt = self.dropout(F.tanh(opt))
        opt = self.translate_trans(opt)
        opt = self.cls(opt)
        opt = opt.permute(1, 0, 2)
        return opt


In [11]:
params = {
        "encoder_dim": encoder.conf.hidden_size,
        "dropout": 0.3,
        "activation": "tanh",
        "optim": "adamw",
        "weight_decay": 0.1,
        "lr": 0.00010869262115700171,
        "scheduler": "lambda",
    }

model_conf = Transformer_conf(100, encoder, **params)
model = Transformer_novelty(model_conf)
# model = Novelty_model(Transformer_novelty, model_conf, params)


In [12]:
dl = data_module.train_dataloader()
for i in dl:
    a,b,c,d = i
    break
model.cuda()
a=a.cuda()
b=b.cuda()
model(a,b).shape

torch.Size([32, 25, 406])


AttributeError: 'NoneType' object has no attribute 'permute'

In [7]:

EPOCHS = 10

tensorboard_logger = TensorBoardLogger("lightning_logs")

loggers = [tensorboard_logger]

lr_logger = LearningRateLogger(logging_interval="step")
trainer = pl.Trainer(
    gpus=1,
    max_epochs=EPOCHS,
    progress_bar_refresh_rate=10,
    profiler=False,
    auto_lr_find=False,
    callbacks=[lr_logger],
    logger=loggers,
    row_log_interval=2,
)
trainer.fit(model, data_module)
trainer.test(model, datamodule=data_module)


GPU available: True, used: True
TPU available: False, using: 0 TPU cores
CUDA_VISIBLE_DEVICES: [0]

  | Name  | Type                | Params
----------------------------------------------
0 | model | Transformer_novelty | 32 M  


HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validation sanity check', layout=Layout…



HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Training', layout=Layout(flex='2'), max…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

Saving latest checkpoint..



HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Testing', layout=Layout(flex='2'), max=…

--------------------------------------------------------------------------------
DATALOADER:0 TEST RESULTS
{'test_acc': tensor(0.8111),
 'test_f1': tensor(0.8040),
 'test_loss': tensor(0.5277, device='cuda:0'),
 'test_prec': tensor(0.8127),
 'test_recall': tensor(0.8134)}
--------------------------------------------------------------------------------



[{'test_acc': 0.8110613822937012,
  'test_f1': 0.8040344715118408,
  'test_loss': 0.5276749730110168,
  'test_prec': 0.8127212524414062,
  'test_recall': 0.8134458661079407}]

In [19]:
p = torch.zeros((32,1,400))
q = torch.zeros((32,100,400))
r = torch.zeros((32,100,400))


In [21]:
torch.cat([q,p,r],dim = 1).shape

torch.Size([32, 201, 400])