In [1]:
import numpy as np
import re
import random
import json
import collections
import numpy as np
from tqdm import tqdm
import nltk
from nltk.corpus import wordnet as wn 
import os
import pickle
import multiprocessing
from nltk.tag import StanfordNERTagger
from nltk.tag import StanfordPOSTagger

In [2]:
PADDING = "<PAD>"
POS_Tagging = [PADDING, 'WP$', 'RBS', 'SYM', 'WRB', 'IN', 'VB', 'POS', 'TO', ':', '-RRB-', '$', 'MD', 'JJ', '#', 'CD', '``', 'JJR', 'NNP', "''", 'LS', 'VBP', 'VBD', 'FW', 'RBR', 'JJS', 'DT', 'VBG', 'RP', 'NNS', 'RB', 'PDT', 'PRP$', '.', 'XX', 'NNPS', 'UH', 'EX', 'NN', 'WDT', 'VBN', 'VBZ', 'CC', ',', '-LRB-', 'PRP', 'WP']
POS_dict = {pos:i for i, pos in enumerate(POS_Tagging)}

stemmer = nltk.SnowballStemmer('english')

tt = nltk.tokenize.treebank.TreebankWordTokenizer()

nst = StanfordNERTagger('stanford-ner-2020-11-17/classifiers/english.muc.7class.distsim.crf.ser.gz', 'stanford-ner-2020-11-17/stanford-ner-4.2.0.jar',encoding='utf-8')


pst = StanfordPOSTagger('stanford-postagger-full-2020-11-17/models/english-bidirectional-distsim.tagger', \
                    'stanford-postagger-full-2020-11-17/stanford-postagger.jar')




The StanfordTokenizer will be deprecated in version 3.2.5.
Please use [91mnltk.tag.corenlp.CoreNLPPOSTagger[0m or [91mnltk.tag.corenlp.CoreNLPNERTagger[0m instead.
  super(StanfordNERTagger, self).__init__(*args, **kwargs)
The StanfordTokenizer will be deprecated in version 3.2.5.
Please use [91mnltk.tag.corenlp.CoreNLPPOSTagger[0m or [91mnltk.tag.corenlp.CoreNLPNERTagger[0m instead.
  super(StanfordPOSTagger, self).__init__(*args, **kwargs)


In [3]:

def is_exact_match(token1, token2):
    token1 = token1.lower()
    token2 = token2.lower()
    
    token1_stem = stemmer.stem(token1)

    if token1 == token2:
        return True
    
    for synsets in wn.synsets(token2):
        for lemma in synsets.lemma_names():
            if token1_stem == stemmer.stem(lemma):
                return True
    
    if token1 == "n't" and token2 == "not":
        return True
    elif token1 == "not" and token2 == "n't":
        return True
    elif token1_stem == stemmer.stem(token2):
        return True
    return False

In [4]:
def is_antonyms(token1, token2):
    token1 = token1.lower()
    token2 = token2.lower()
    token1_stem = stemmer.stem(token1)
    antonym_lists_for_token2 = []
    for synsets in wn.synsets(token2):
        for lemma_synsets in [wn.synsets(l) for l in synsets.lemma_names()]:
            for lemma_syn in lemma_synsets:
                for lemma in lemma_syn.lemmas():
                    for antonym in lemma.antonyms():
                        antonym_lists_for_token2.append(antonym.name())
                        # if token1_stem == stemmer.stem(antonym.name()):
                        #     return True 
    antonym_lists_for_token2 = list(set(antonym_lists_for_token2))
    for atnm in antonym_lists_for_token2:
        if token1_stem == stemmer.stem(atnm):
            return True
    return False  

In [3]:
from lang import *
from datamodule import *
from snli.train_utils import *

import pytorch_lightning as pl
import torch.nn.functional as F
from datamodule import *
from pytorch_lightning.callbacks import LearningRateLogger
from snli.attn_enc.attn_enc import *
from pytorch_lightning.loggers import NeptuneLogger, TensorBoardLogger
from pytorch_lightning.profiler import AdvancedProfiler
from pytorch_lightning.metrics import Accuracy
import pickle
import os
import joblib
import shutil

In [4]:
datamodule = snli_bert_data_module(char_emb=True)

In [5]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import numpy as np


class Attn_Encoder_conf:
    embedding_dim = 300
    hidden_size = 300
    fcs = 1
    num_layers = 2
    dropout = 0.1
    opt_labels = 3
    bidirectional = True
    attn_type = "dot"
    attention_layer_param = 100
    activation = "tanh"
    freeze_embedding = False
    char_embedding_size = 50

    def __init__(self, lang, embedding_matrix=None, **kwargs):
        self.embedding_matrix = None
        self.char_emb = lang.char_emb
        self.char_vocab_size = lang.char_vocab_size
        self.char_word_len = lang.char_emb_max_len

        if lang.tokenizer_ == "BERT":
            self.vocab_size = lang.vocab_size
            self.padding_idx = lang.bert_tokenizer.vocab["[PAD]"]
        else:
            self.embedding_matrix = embedding_matrix
            self.vocab_size = lang.vocab_size_final()
            self.padding_idx = lang.word2idx[lang.config.pad]
        for k, v in kwargs.items():
            setattr(self, k, v)


class Attention(nn.Module):
    def __init__(self, conf):
        super(Attention, self).__init__()
        self.Ws = nn.Linear(
            (2 if conf.bidirectional else 1) * conf.hidden_size,
            conf.attention_layer_param,
            bias=False,
        )
        self.Wa = nn.Linear(conf.attention_layer_param, 1, bias=False)

    def forward(self, hid):
        opt = self.Ws(hid)
        opt = F.tanh(opt)
        opt = self.Wa(opt)
        opt = F.softmax(opt)
        return opt


class Attn_Encoder(nn.Module):
    def __init__(self, conf):
        super(Attn_Encoder, self).__init__()
        self.conf = conf
        self.embedding = nn.Embedding(
            num_embeddings=self.conf.vocab_size,
            embedding_dim=self.conf.embedding_dim,
            padding_idx=self.conf.padding_idx,
        )
        if self.conf.char_emb:
            self.char_embedding = nn.Embedding(
                num_embeddings=self.conf.char_vocab_size,
                embedding_dim=self.conf.char_embedding_size,
                padding_idx=0
            )
            self.char_cnn = nn.Conv2d(self.conf.char_word_len,self.conf.char_embedding_size , (1, 6), stride=(1, 1), padding=0, bias=True)
        self.translate = nn.Linear(
            self.conf.embedding_dim+(self.conf.char_embedding_size if self.conf.char_emb else 0), self.conf.hidden_size
        )  # make (300,..) if not working
        if self.conf.activation.lower() == "relu".lower():
            self.act = nn.ReLU()
        elif self.conf.activation.lower() == "tanh".lower():
            self.act = nn.Tanh()
        elif self.conf.activation.lower() == "leakyrelu".lower():
            self.act = nn.LeakyReLU()
        if isinstance(self.conf.embedding_matrix, np.ndarray):
            self.embedding.from_pretrained(
                torch.tensor(self.conf.embedding_matrix),
                freeze=self.conf.freeze_embedding,
            )
        self.lstm_layer = nn.LSTM(
            input_size=self.conf.hidden_size,
            hidden_size=self.conf.hidden_size,
            num_layers=self.conf.num_layers,
            bidirectional=self.conf.bidirectional,
        )
        self.attention = Attention(conf)

    def char_embedding_forward(self,x):
        #X - [batch_size, seq_len, char_emb_size])
        batch_size, seq_len, char_emb_size= x.shape
        x = x.view(-1,char_emb_size)
        x = self.char_embedding(x) #(batch_size * seq_len, char_emb_size, emb_size)
        x = x.view(batch_size, -1, seq_len, char_emb_size)
        x = x.permute(0,3,2,1)
        x = self.char_cnn(x)
        x = torch.max(F.relu(x), 3)[0]
        return x.view(-1,seq_len,self.conf.char_embedding_size)


    def forward(self, inp, char_vec = None):
        batch_size = inp.shape[0]
        embedded = self.embedding(inp)
        if char_vec!=None:
            char_emb = self.char_embedding_forward(char_vec)
            embedded = torch.cat([embedded,char_emb],dim=2)

        embedded = self.translate(embedded)
        embedded = self.act(embedded)
        embedded = embedded.permute(1, 0, 2)
        all_, (hid, cell) = self.lstm_layer(embedded)

        attn = self.attention(all_)

        cont = torch.bmm(all_.permute(1, 2, 0), attn.permute(1, 0, 2)).permute(2, 0, 1)
        return cont


class Attn_encoder_snli(nn.Module):
    def __init__(self, conf):
        super(Attn_encoder_snli, self).__init__()
        self.conf = conf
        self.encoder = Attn_Encoder(conf)
        self.fc_in = nn.Linear(
            (2 if conf.bidirectional else 1) * 4 * self.conf.hidden_size,
            self.conf.hidden_size,
        )
        self.fcs = nn.ModuleList(
            [
                nn.Linear(self.conf.hidden_size, self.conf.hidden_size)
                for i in range(self.conf.fcs)
            ]
        )
        self.fc_out = nn.Linear(self.conf.hidden_size, self.conf.opt_labels)
        if self.conf.activation.lower() == "relu".lower():
            self.act = nn.ReLU()
        elif self.conf.activation.lower() == "tanh".lower():
            self.act = nn.Tanh()
        elif self.conf.activation.lower() == "leakyrelu".lower():
            self.act = nn.LeakyReLU()
        self.softmax = nn.Softmax(dim=2)
        self.dropout = nn.Dropout(p=self.conf.dropout)

    def forward(self, x0, x1, x0_char_vec = None, x1_char_vec = None):
        x0_enc = self.encoder(x0.long(),char_vec = x0_char_vec)
        x0_enc = self.dropout(x0_enc)
        x1_enc = self.encoder(x1.long(),char_vec = x1_char_vec)
        x1_enc = self.dropout(x1_enc)
        cont = torch.cat(
            [x0_enc, x1_enc, torch.abs(x0_enc - x1_enc), x0_enc * x1_enc], dim=2
        )
        opt = self.fc_in(cont)
        opt = self.dropout(opt)
        for fc in self.fcs:
            opt = fc(opt)
            opt = self.dropout(opt)
            opt = self.act(opt)
        opt = self.fc_out(opt)
        return opt


In [6]:
lang = datamodule.Lang

In [7]:
conf_kwargs = {
        "num_layers": 2,
        "dropout": 0.10018262692246818,
        "embedding_dim": 300,
        "hidden_size": 400,
        "attention_layer_param": 250,
        "bidirectional": True,
        "freeze_embedding": False,
        "activation": "tanh",
        "fcs": 1,
        "glove": False,
        "batch_size": 128,
        "max_len": 110,
    }

hparams = {
    "optimizer_base": {
        "optim": "adamw",
        "lr": 0.0010039910781394373,
        "scheduler": "const",
    },
    "optimizer_tune": {
        "optim": "adam",
        "lr": 0.0010039910781394373,
        "weight_decay": 0.1,
        "scheduler": "lambda",
    },
    "switch_epoch": 5,
}



In [90]:
model(i[0],i[2],x0_char_vec = i[1],x1_char_vec = i[3]).shape

torch.Size([1, 128, 3])

In [8]:
model_conf = Attn_Encoder_conf(lang,None, **conf_kwargs)
model = SNLI_char_emb(Attn_encoder_snli, model_conf, hparams)

None


In [10]:
for i in datamodule.train_dataloader():
    a,b,c,d,e = i
    break

In [14]:
model(a.cuda(),c.cuda(),b.cuda(),d.cuda()).shape

torch.Size([1, 128, 3])

In [9]:


tensorboard_logger = TensorBoardLogger("lightning_logs")
lr_logger = LearningRateLogger(logging_interval="step")

trainer = pl.Trainer(
    gpus=1,
    max_epochs=1,
    progress_bar_refresh_rate=10,
    profiler=False,
    auto_lr_find=False,
    callbacks=[lr_logger, SwitchOptim()],
    logger=[tensorboard_logger],
    row_log_interval=2,
)
trainer.fit(model, datamodule)

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
CUDA_VISIBLE_DEVICES: [0]

  | Name  | Type              | Params
--------------------------------------------
0 | model | Attn_encoder_snli | 17 M  


HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validation sanity check', layout=Layout…



HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Training', layout=Layout(flex='2'), max…

Saving latest checkpoint..



1