In [3]:
import torch
from torchtext.legacy import data
from torchtext.legacy import datasets
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable
import numpy as np
import torch.optim as optim
import random
import time
import json
import copy

from load_data import load_dataset

SEED = 1234
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.backends.cudnn.deterministic = True
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

### Common functions for model training

In [4]:
def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)
    
def categorical_accuracy(preds, y):
    """
    Returns accuracy per batch, i.e. if you get 8/10 right, this returns 0.8, NOT 8
    """
    top_pred = preds.argmax(1, keepdim = True)
    correct = top_pred.eq(y.view_as(top_pred)).sum()
    acc = correct.float() / y.shape[0]
    return acc

def train(model, iterator, optimizer, criterion):
    epoch_loss = 0
    epoch_acc = 0
    model.train()
    for batch in iterator:
        
        optimizer.zero_grad()
        
        text, text_lengths = batch.text
        
        predictions = model(text, text_lengths).squeeze(1)
        
        loss = criterion(predictions, batch.label.long())
        
        acc = categorical_accuracy(predictions, batch.label)
        
        loss.backward()
        
        optimizer.step()
        
        epoch_loss += loss.item()
        epoch_acc += acc.item()
        
    return epoch_loss / len(iterator), epoch_acc / len(iterator)


def evaluate(model, iterator, criterion):
    
    epoch_loss = 0
    epoch_acc = 0
    
    model.eval()
    
    with torch.no_grad():
    
        for batch in iterator:

            text, text_lengths = batch.text
            
            predictions = model(text, text_lengths).squeeze(1)
            
            loss = criterion(predictions, batch.label.long())
            
            acc = categorical_accuracy(predictions, batch.label)

            epoch_loss += loss.item()
            epoch_acc += acc.item()
        
    return epoch_loss / len(iterator), epoch_acc / len(iterator)

def multi_models(model, train_iterator, valid_iterator,  test_iterator, num = 5, N_EPOCHS = 20, early_stopping = 5):
    init_model = copy.deepcopy(model)
    res = {}
    res["test_loss"] = []
    res["test_acc"] = []
    ### begin training process
    for i in range(num):
        print('Begin training model %s'%i)
        res["model_%s"%i] = {}
        res["model_%s"%i]["time"] = []
        res["model_%s"%i]["train_loss"] = []
        res["model_%s"%i]["val_loss"] = []
        res["model_%s"%i]["train_acc"] = []
        res["model_%s"%i]["val_acc"] = []
        best_valid_loss = float('inf')
        num_steps = 0
        ### reset model for each round
        # model.load_state_dict(init_param)
        model = copy.deepcopy(init_model)
        model = model.to(device)
        optimizer = optim.Adam(model.parameters())
        criterion = nn.CrossEntropyLoss()
        criterion = criterion.to(device)
        
        for epoch in range(N_EPOCHS):
            print('Begin epoch %s'%epoch)
            start_time = time.time()
            train_loss, train_acc = train(model, train_iterator, optimizer, criterion)
            valid_loss, valid_acc = evaluate(model, valid_iterator, criterion)
            end_time = time.time()
            if valid_loss < best_valid_loss:
                best_valid_loss = valid_loss
                torch.save(model.state_dict(), 'bilstm-model.pt')
                num_steps = 0
            else:
                num_steps += 1
            print(f'\tTrain Loss: {train_loss:.3f} | Train Acc: {train_acc*100:.2f}%')
            print(f'\t Val. Loss: {valid_loss:.3f} |  Val. Acc: {valid_acc*100:.2f}%')
            res["model_%s"%i]["time"].append(end_time - start_time)
            res["model_%s"%i]["train_loss"].append(train_loss)
            res["model_%s"%i]["val_loss"].append(valid_loss)
            res["model_%s"%i]["train_acc"].append(train_acc)
            res["model_%s"%i]["val_acc"].append(valid_acc)
            if num_steps >= early_stopping:
                break
        
        model.load_state_dict(torch.load('bilstm-model.pt'))
        test_loss, test_acc = evaluate(model, test_iterator, criterion)
        print(f'Test Loss: {test_loss:.3f} | Test Acc: {test_acc*100:.2f}%')
        res["test_loss"].append(test_loss)
        res["test_acc"].append(test_acc)
    return res

### BiLSTM model

In [5]:
import torch.nn as nn

class biLSTM(nn.Module):
    def __init__(self, vocab_size, embedding_dim, hidden_dim, output_dim, n_layers, bidirectional, dropout, pad_idx):
        super().__init__()
        self.embedding = nn.Embedding(vocab_size, embedding_dim, padding_idx = pad_idx)
        self.embedding.weight.requires_grad = False
        self.rnn = nn.LSTM(embedding_dim, 
                           hidden_dim, 
                           num_layers=n_layers, 
                           bidirectional=bidirectional, 
                           dropout=dropout)
        self.fc = nn.Linear(hidden_dim * 2, output_dim)
        self.dropout = nn.Dropout(dropout)
        
    def forward(self, text, text_lengths):
        #text = [sent len, batch size]
        embedded = self.dropout(self.embedding(text))
        #embedded = [sent len, batch size, emb dim]
        #pack sequence
        # lengths need to be on CPU!
        packed_embedded = nn.utils.rnn.pack_padded_sequence(embedded, text_lengths.to('cpu'), enforce_sorted=False)
        
        packed_output, (hidden, cell) = self.rnn(packed_embedded)
        output, output_lengths = nn.utils.rnn.pad_packed_sequence(packed_output)
        hidden = self.dropout(torch.cat((hidden[-2,:,:], hidden[-1,:,:]), dim = 1))
        return self.fc(hidden)

### train on SST dataset

In [4]:
data_name = "SST"
train_iterator, valid_iterator, test_iterator, TEXT, LABEL = load_dataset(data_name, device, include_lengths = True, batch_first = False)

INPUT_DIM = len(TEXT.vocab)
EMBEDDING_DIM = 300
HIDDEN_DIM = 512
OUTPUT_DIM = len(LABEL.vocab)
N_LAYERS = 1
BIDIRECTIONAL = True
DROPOUT = 0.5
PAD_IDX = TEXT.vocab.stoi[TEXT.pad_token]

model = biLSTM(INPUT_DIM, 
                EMBEDDING_DIM, 
                HIDDEN_DIM, 
                OUTPUT_DIM, 
                N_LAYERS, 
                BIDIRECTIONAL, 
                DROPOUT, 
                PAD_IDX)

### pre-train embeddings
pretrained_embeddings = TEXT.vocab.vectors
model.embedding.weight.data.copy_(pretrained_embeddings)
UNK_IDX = TEXT.vocab.stoi[TEXT.unk_token]
model.embedding.weight.data[UNK_IDX] = torch.zeros(EMBEDDING_DIM)
model.embedding.weight.data[PAD_IDX] = torch.zeros(EMBEDDING_DIM)
### NOTE: Probably we need to try fit embedding
res = multi_models(model, train_iterator, valid_iterator, test_iterator)
res["num_param"] = count_parameters(model)
with open("bilstm_%s.json"%data_name, "w") as outfile:
    outfile.write(json.dumps(res, indent=4))

  "num_layers={}".format(dropout, num_layers))


Begin training model 0
Begin epoch 0
	Train Loss: 0.344 | Train Acc: 84.93%
	 Val. Loss: 0.390 |  Val. Acc: 82.19%
Begin epoch 1
	Train Loss: 0.274 | Train Acc: 88.20%
	 Val. Loss: 0.395 |  Val. Acc: 83.62%
Begin epoch 2
	Train Loss: 0.235 | Train Acc: 90.31%
	 Val. Loss: 0.414 |  Val. Acc: 82.66%
Begin epoch 3
	Train Loss: 0.204 | Train Acc: 91.72%
	 Val. Loss: 0.406 |  Val. Acc: 84.00%
Begin epoch 4
	Train Loss: 0.178 | Train Acc: 92.92%
	 Val. Loss: 0.363 |  Val. Acc: 86.27%
Begin epoch 5
	Train Loss: 0.158 | Train Acc: 93.80%
	 Val. Loss: 0.390 |  Val. Acc: 86.65%
Begin epoch 6
	Train Loss: 0.142 | Train Acc: 94.48%
	 Val. Loss: 0.431 |  Val. Acc: 85.94%
Begin epoch 7
	Train Loss: 0.128 | Train Acc: 95.08%
	 Val. Loss: 0.423 |  Val. Acc: 86.54%
Begin epoch 8
	Train Loss: 0.120 | Train Acc: 95.37%
	 Val. Loss: 0.464 |  Val. Acc: 86.96%
Begin epoch 9
	Train Loss: 0.110 | Train Acc: 95.78%
	 Val. Loss: 0.431 |  Val. Acc: 86.00%
Test Loss: 0.328 | Test Acc: 86.49%
Begin training model 

In [5]:
data_name = "TREC"
train_iterator, valid_iterator, test_iterator, TEXT, LABEL = load_dataset(data_name, device, include_lengths = True, batch_first = False)

INPUT_DIM = len(TEXT.vocab)
EMBEDDING_DIM = 300
HIDDEN_DIM = 512
OUTPUT_DIM = len(LABEL.vocab)
N_LAYERS = 1
BIDIRECTIONAL = True
DROPOUT = 0.5
PAD_IDX = TEXT.vocab.stoi[TEXT.pad_token]

model = biLSTM(INPUT_DIM, 
                EMBEDDING_DIM, 
                HIDDEN_DIM, 
                OUTPUT_DIM, 
                N_LAYERS, 
                BIDIRECTIONAL, 
                DROPOUT, 
                PAD_IDX)

### pre-train embeddings
pretrained_embeddings = TEXT.vocab.vectors
model.embedding.weight.data.copy_(pretrained_embeddings)
UNK_IDX = TEXT.vocab.stoi[TEXT.unk_token]
model.embedding.weight.data[UNK_IDX] = torch.zeros(EMBEDDING_DIM)
model.embedding.weight.data[PAD_IDX] = torch.zeros(EMBEDDING_DIM)
### NOTE: Probably we need to try fit embedding
res = multi_models(model, train_iterator, valid_iterator, test_iterator)
res["num_param"] = count_parameters(model)
with open("bilstm_%s.json"%data_name, "w") as outfile:
    outfile.write(json.dumps(res, indent=4))

Begin training model 0
Begin epoch 0
	Train Loss: 1.087 | Train Acc: 59.57%
	 Val. Loss: 0.680 |  Val. Acc: 75.99%
Begin epoch 1
	Train Loss: 0.626 | Train Acc: 77.93%
	 Val. Loss: 0.522 |  Val. Acc: 80.98%
Begin epoch 2
	Train Loss: 0.514 | Train Acc: 81.24%
	 Val. Loss: 0.410 |  Val. Acc: 85.63%
Begin epoch 3
	Train Loss: 0.446 | Train Acc: 84.11%
	 Val. Loss: 0.400 |  Val. Acc: 83.89%
Begin epoch 4
	Train Loss: 0.374 | Train Acc: 86.91%
	 Val. Loss: 0.349 |  Val. Acc: 87.36%
Begin epoch 5
	Train Loss: 0.325 | Train Acc: 88.09%
	 Val. Loss: 0.352 |  Val. Acc: 87.72%
Begin epoch 6
	Train Loss: 0.290 | Train Acc: 89.10%
	 Val. Loss: 0.380 |  Val. Acc: 85.64%
Begin epoch 7
	Train Loss: 0.258 | Train Acc: 90.06%
	 Val. Loss: 0.320 |  Val. Acc: 89.08%
Begin epoch 8
	Train Loss: 0.205 | Train Acc: 92.50%
	 Val. Loss: 0.324 |  Val. Acc: 88.58%
Begin epoch 9
	Train Loss: 0.187 | Train Acc: 93.38%
	 Val. Loss: 0.384 |  Val. Acc: 87.53%
Begin epoch 10
	Train Loss: 0.149 | Train Acc: 94.43%
	 V

In [6]:
data_name = "IMDB"
train_iterator, valid_iterator, test_iterator, TEXT, LABEL = load_dataset(data_name, device, include_lengths = True, batch_first = False)

INPUT_DIM = len(TEXT.vocab)
EMBEDDING_DIM = 300
HIDDEN_DIM = 512
OUTPUT_DIM = len(LABEL.vocab)
N_LAYERS = 1
BIDIRECTIONAL = True
DROPOUT = 0.5
PAD_IDX = TEXT.vocab.stoi[TEXT.pad_token]

model = biLSTM(INPUT_DIM, 
                EMBEDDING_DIM, 
                HIDDEN_DIM, 
                OUTPUT_DIM, 
                N_LAYERS, 
                BIDIRECTIONAL, 
                DROPOUT, 
                PAD_IDX)

### pre-train embeddings
pretrained_embeddings = TEXT.vocab.vectors
model.embedding.weight.data.copy_(pretrained_embeddings)
UNK_IDX = TEXT.vocab.stoi[TEXT.unk_token]
model.embedding.weight.data[UNK_IDX] = torch.zeros(EMBEDDING_DIM)
model.embedding.weight.data[PAD_IDX] = torch.zeros(EMBEDDING_DIM)
### NOTE: Probably we need to try fit embedding
res = multi_models(model, train_iterator, valid_iterator, test_iterator)
res["num_param"] = count_parameters(model)
with open("bilstm_%s.json"%data_name, "w") as outfile:
    outfile.write(json.dumps(res, indent=4))

  "num_layers={}".format(dropout, num_layers))


Begin training model 0
Begin epoch 0
	Train Loss: 0.607 | Train Acc: 67.11%
	 Val. Loss: 0.494 |  Val. Acc: 75.82%
Begin epoch 1
	Train Loss: 0.528 | Train Acc: 73.10%
	 Val. Loss: 0.367 |  Val. Acc: 84.80%
Begin epoch 2
	Train Loss: 0.358 | Train Acc: 84.51%
	 Val. Loss: 0.307 |  Val. Acc: 87.50%
Begin epoch 3
	Train Loss: 0.313 | Train Acc: 86.99%
	 Val. Loss: 0.283 |  Val. Acc: 87.97%
Begin epoch 4
	Train Loss: 0.291 | Train Acc: 87.76%
	 Val. Loss: 0.274 |  Val. Acc: 88.67%
Begin epoch 5
	Train Loss: 0.271 | Train Acc: 88.85%
	 Val. Loss: 0.248 |  Val. Acc: 89.84%
Begin epoch 6
	Train Loss: 0.254 | Train Acc: 89.72%
	 Val. Loss: 0.233 |  Val. Acc: 89.77%
Begin epoch 7
	Train Loss: 0.241 | Train Acc: 90.15%
	 Val. Loss: 0.242 |  Val. Acc: 90.08%
Begin epoch 8
	Train Loss: 0.225 | Train Acc: 90.85%
	 Val. Loss: 0.238 |  Val. Acc: 89.96%
Begin epoch 9
	Train Loss: 0.207 | Train Acc: 91.55%
	 Val. Loss: 0.240 |  Val. Acc: 90.31%
Begin epoch 10
	Train Loss: 0.190 | Train Acc: 92.39%
	 V