In [15]:
from torchtext.data import Iterator, BucketIterator
from torchtext import data
import torch
from torchtext.vocab import Vectors
import torch.nn as nn
import torch.nn.functional as F
import tqdm
from tqdm.notebook import tqdm

batch_size = 36
hidden_size = 600  # every LSTM's(forward and backward) hidden size is half of HIDDEN_SIZE
epochs = 1
dropout = 0.2
num_layers = 2
num_classes=4
learning_rate = 4e-4
embedding_size = 200

data_path='./data/'
vectors = Vectors('glove.twitter.27B.200d.txt', 'C:/Users/YYH/Desktop/nlp-beginner/Task 2/embedding/')
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [2]:
def load_iter(batch_size=32, device="cuda", data_path='./data/', vectors=None):
    TEXT = data.Field(batch_first=True, include_lengths=True, lower=True)
    LABEL = data.LabelField(batch_first=True)
    fields = {'sentence1': ('premise', TEXT),
              'sentence2': ('hypothesis', TEXT),
              'gold_label': ('label', LABEL)}
    train_data, dev_data, test_data = data.TabularDataset.splits(
        path=data_path,
        train='snli_1.0_train.jsonl',
        validation='snli_1.0_dev.jsonl',
        test='snli_1.0_test.jsonl',
        format='json',
        fields=fields,
        filter_pred=lambda ex: ex.label != '-'
    )
    TEXT.build_vocab(train_data, vectors=vectors, unk_init=torch.Tensor.normal_)
    LABEL.build_vocab(test_data)
    train_iter, dev_iter = BucketIterator.splits(
        (train_data, dev_data),
        batch_sizes=(batch_size, batch_size),
        device=device,
        sort_key=lambda x: len(x.premise) + len(x.hypothesis),
        sort_within_batch=True,
        repeat=False,
        shuffle=True
    )

    test_iter = Iterator(
         test_data,
         batch_size=batch_size,
         device=device,
         sort=False,
         sort_within_batch=False,
         repeat=False,
         shuffle=False
    )

    return train_iter, dev_iter, test_iter, TEXT, LABEL

train_iter, dev_iter, test_iter, TEXT, LABEL = load_iter(batch_size, device, data_path, vectors)

In [16]:
class Input_Encoding(nn.Module):
    def __init__(self, embedding_size, hidden_size, num_layers, bidirectional, dropout):
        super(Input_Encoding,self).__init__()
        self.embedding = nn.Embedding.from_pretrained(TEXT.vocab.vectors, freeze=False)
        self.lstm = nn.LSTM(embedding_size, 
                           hidden_size, 
                           num_layers=num_layers, 
                           bidirectional=bidirectional, 
                           dropout=dropout,
                           batch_first=True)
        self.dropout = nn.Dropout(dropout)
    
    def forward(self,text,text_lengths):
        embedded = self.embedding(text)
        embedded = self.dropout(embedded)
        packed_embedded = nn.utils.rnn.pack_padded_sequence(embedded, 
                                                            text_lengths,
                                                            batch_first=True,
                                                            enforce_sorted=False)
        packed_output, (hidden, cell) = self.lstm(packed_embedded)
        output, _ = nn.utils.rnn.pad_packed_sequence(packed_output, batch_first=True)
        return output

class Local_Inference(nn.Module):
    def __init__(self):
        super(Local_Inference,self).__init__()
        
    def forward(self,a,b):
        e=torch.matmul(a,b.transpose(1,2))
        a1=nn.Softmax(dim=2)(e).bmm(b)
        b1=nn.Softmax(dim=1)(e).transpose(1,2).bmm(a)
        m_a=torch.cat([a,a1,a-a1,a*a1],dim=-1)
        m_b=torch.cat([b,b1,b-b1,b*b1],dim=-1)
        return m_a,m_b

class Inference_Composition(nn.Module):
    def __init__(self, hidden_size, num_layers, bidirectional, dropout):
        super(Inference_Composition, self).__init__()
        self.fc=nn.Linear(8*hidden_size, hidden_size)
        self.lstm = nn.LSTM(hidden_size, 
                           hidden_size, 
                           num_layers=num_layers, 
                           bidirectional=bidirectional, 
                           dropout=dropout,
                           batch_first=True)
        self.dropout = nn.Dropout(dropout)
    def forward(self,text,text_lengths):
        x = self.fc(text)
        x = self.dropout(x)
        packed_embedded = nn.utils.rnn.pack_padded_sequence(x, 
                                                            text_lengths,
                                                            batch_first=True,
                                                            enforce_sorted=False)
        packed_output, (hidden, cell) = self.lstm(packed_embedded)
        output, _ = nn.utils.rnn.pad_packed_sequence(packed_output, batch_first=True)
        return output

class Prediction_Layer(nn.Module):
    def __init__(self, hidden_size, num_classes, dropout):
        super(Prediction_Layer, self).__init__()
        self.fc=nn.Sequential(
            nn.Linear(8*hidden_size,hidden_size),
            nn.Tanh(),
            nn.Linear(hidden_size,num_classes)
            )
    def forward(self,a,b):
        a1 = F.avg_pool1d(a.transpose(1, 2), a.size(1)).squeeze(-1)
        a2 = F.max_pool1d(a.transpose(1, 2), a.size(1)).squeeze(-1)
        b1 = F.avg_pool1d(b.transpose(1, 2), b.size(1)).squeeze(-1)
        b2 = F.max_pool1d(b.transpose(1, 2), b.size(1)).squeeze(-1)
        output = torch.cat((a1,a2,b1,b2), dim=-1)
        output= self.fc(output)
        return output
        
class ESIM(nn.Module):
    def __init__(self, embedding_size, hidden_size, num_classes=4, num_layers=2, 
                 dropout=0.2, bidirectional=True, batch_first=True, freeze=False):
        super(ESIM, self).__init__()
        self.name='ESIM'
        self.Input_Encoding=Input_Encoding(embedding_size, hidden_size, num_layers, bidirectional, dropout)
        self.Local_Inference=Local_Inference()
        self.Inference_Composition=Inference_Composition(hidden_size, num_layers, bidirectional, dropout)
        self.Prediction_Layer=Prediction_Layer(hidden_size, num_classes, dropout)
        
    def forward(self, a, a_length, b, b_length):
        a0=self.Input_Encoding(a,a_length)
        b0=self.Input_Encoding(b,b_length)
        a1,b1=self.Local_Inference(a0,b0)
        a2=self.Inference_Composition(a1,a_length)
        b2=self.Inference_Composition(b1,b_length)
        output=self.Prediction_Layer(a2,b2)
        return output

model=ESIM(embedding_size, hidden_size, num_classes, num_layers, dropout)
model

ESIM(
  (Input_Encoding): Input_Encoding(
    (embedding): Embedding(56220, 200)
    (lstm): LSTM(200, 600, num_layers=2, batch_first=True, dropout=0.2, bidirectional=True)
    (dropout): Dropout(p=0.2, inplace=False)
  )
  (Local_Inference): Local_Inference()
  (Inference_Composition): Inference_Composition(
    (fc): Linear(in_features=4800, out_features=600, bias=True)
    (lstm): LSTM(600, 600, num_layers=2, batch_first=True, dropout=0.2, bidirectional=True)
    (dropout): Dropout(p=0.2, inplace=False)
  )
  (Prediction_Layer): Prediction_Layer(
    (fc): Sequential(
      (0): Linear(in_features=4800, out_features=600, bias=True)
      (1): Tanh()
      (2): Linear(in_features=600, out_features=4, bias=True)
    )
  )
)

In [17]:
def train(model,loss_fn,optimizer,train_generator, dev_generator, epochs):
    model.to(device)
    for epoch in range(epochs):
        for step, batch in enumerate(tqdm(train_generator)):
            model.train()
            a, a_length=batch.premise
            b, b_length=batch.hypothesis
            labels=batch.label
            
            optimizer.zero_grad()
            outputs=model(a,a_length,b,b_length)
            loss=loss_fn(outputs,labels)
            loss.backward()
            optimizer.step()
            if step % 10 == 0:
                loss_history.append(loss.item())
        model.eval()
        with torch.no_grad():
            corr_num = 0
            err_num = 0
            for batch in dev_iter:
                a, a_length=batch.premise
                b, b_length=batch.hypothesis
                labels=batch.label
                outputs=model(a,a_length,b,b_length)
                corr_num += (outputs.argmax(1) == labels).sum().item()
                err_num += (outputs.argmax(1) != labels).sum().item()
            tqdm.write('Epoch {}, Accuracy {}'.format(epoch, corr_num / (corr_num + err_num))) 
        torch.save(model, './model/model_'+model.name+'_epoch_{}.pkl'.format(epoch))
    return model

In [18]:
loss_history=[]
optimizer = torch.optim.Adam(model.parameters())
loss_fn = nn.CrossEntropyLoss()
train(model,loss_fn,optimizer,train_iter,dev_iter,epochs)

  0%|          | 0/15261 [00:00<?, ?it/s]

Epoch 0, Accuracy 0.7444625076204023


FileNotFoundError: [Errno 2] No such file or directory: './model/model_ESIM_epoch_0.pkl'