### Import

In [4]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torchtext import data
from tqdm import tnrange, tqdm_notebook

from data_loader import DataLoader

%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


### Load Data

_**Note**: It takes a while to load data_

In [2]:
data_loader = DataLoader()
train, valid = data_loader.small_train_valid()

loading data...
splitting data...
building vocabulary...


In [5]:
BATCH_SIZE = 64
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# make iterator for splits
train_iter, valid_iter = data.BucketIterator.splits((train, valid), batch_size=BATCH_SIZE, device=device)

In [6]:
print(vars(train.examples[0]))

{'text': ['<', 'br', '/><br', '/>charlie', 'kauffman', 'has', 'made', 'weird', 'metaphysical', 'angst', 'popular', ',', 'but', 'this', 'canadian', 'gem', 'makes', 'it', 'hilarious', '.', '<', 'br', '/><br', '/>like', 'most', 'weird', 'films', 'the', 'less', 'said', 'about', 'plot', 'the', 'better', 'but', 'let', "'s", 'set', 'the', 'scene', ',', 'two', 'friends', 'anthony', 'and', 'dave', 'have', 'been', 'together', 'since', 'childhood', ',', 'they', 'ca', "n't", 'cope', 'with', 'the', 'world', 'and', 'eventually', 'this', 'means', 'they', 'no', 'longer', 'have', 'to', '.', 'but', 'that', 'is', 'where', 'even', 'more', 'problems', 'begin.<br', '/><br', '/>i', 'loved', 'this', 'film', ',', 'it', 'made', 'me', 'smile', 'long', 'after', 'the', 'final', 'credits', 'and', 'that', 'is', 'a', 'rare', 'experience', 'with', 'so', 'many', 'mass', 'produced', 'pieces', 'of', '"', 'nothing', '"', 'out', 'there.<br', '/><br', "/>don't", 'miss', 'this.<br', '/><br', '/', '>'], 'label': ['pos']}


In [9]:
print('%d training examples' %len(train))
print('%d validation examples' %len(valid))
print('%d unique tokens in TEXT' %len(data_loader.TEXT.vocab))
print('%d unique tokens in LABEL' %len(data_loader.LABEL.vocab))
print(data_loader.LABEL.vocab.itos)
print('Input data shape:', data_loader.TEXT.vocab.vectors.shape)

1250 training examples
1250 validation examples
22587 unique tokens in TEXT
4 unique tokens in LABEL
['<unk>', '<pad>', 'neg', 'pos']
Input data shape: torch.Size([22587, 100])


### Model Config

In [13]:
VOCAB_SIZE, EMBEDDING_DIM = data_loader.TEXT.vocab.vectors.shape
HIDDEN_DIM = 32
OUTPUT_DIM = 1
BIDRECTIONAL = True
LEARNING_RATE = 1e-3

### BiRNN

In [14]:
class BiRNN(nn.Module):
    
    def __init__(self, vocab_size, embedding_dim, hidden_dim, output_dim, bidirectional):
        super(BiRNN, self).__init__()
        
        self.embedding = nn.Embedding(vocab_size, embedding_dim)
        self.rnn = nn.RNN(embedding_dim, hidden_dim, bidirectional=bidirectional)
        self.fc = nn.Linear(hidden_dim * 2, output_dim)  # input is concated forward and backward hidden state
        
    def forward(self, x):
        embeds = self.embedding(x)
        lstm_output, hidden = self.rnn(embeds)
        hidden = torch.cat((hidden[-2,:,:], hidden[-1,:,:]), dim=1)  # concate forward and backward hidden before FC
        fc_output = self.fc(hidden.squeeze(0))
        
        return fc_output


In [15]:
# initialize model
model_birnn = BiRNN(VOCAB_SIZE, EMBEDDING_DIM, HIDDEN_DIM, OUTPUT_DIM, BIDRECTIONAL)
model_birnn = model_birnn.to(device)  # place it to GPU (if available)

# replace inintial weights of embedding layer with pred-trained embedding
pretrained_embeddings = data_loader.TEXT.vocab.vectors
model_birnn.embedding.weight.data.copy_(pretrained_embeddings)

tensor([[ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
        [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
        [-0.0382, -0.2449,  0.7281,  ..., -0.1459,  0.8278,  0.2706],
        ...,
        [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
        [-0.2415, -0.1050, -0.1875,  ..., -0.3229,  0.1251, -0.8694],
        [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000]])

### BiLSTM

In [16]:
class BiLSTM(nn.Module):
    
    def __init__(self, vocab_size, embedding_dim, hidden_dim, output_dim, bidirectional):
        super(BiLSTM, self).__init__()
        
        self.embedding = nn.Embedding(vocab_size, embedding_dim)
        self.lstm = nn.LSTM(embedding_dim, hidden_dim, bidirectional=bidirectional)
        self.fc = nn.Linear(hidden_dim * 2, output_dim)  # input is concated forward and backward hidden state
        
    def forward(self, x):
        embeds = self.embedding(x)
        lstm_output, (hidden, memory) = self.lstm(embeds)
        hidden = torch.cat((hidden[-2,:,:], hidden[-1,:,:]), dim=1)  # concate forward and backward hidden before FC
        fc_output = self.fc(hidden.squeeze(0))
        
        return fc_output

In [17]:
# initialize model
model_bilstm = BiLSTM(VOCAB_SIZE, EMBEDDING_DIM, HIDDEN_DIM, OUTPUT_DIM, BIDRECTIONAL)
model_bilstm = model_bilstm.to(device)  # place it to GPU (if available)

# replace inintial weights of embedding layer with pred-trained embedding
pretrained_embeddings = data_loader.TEXT.vocab.vectors
model_bilstm.embedding.weight.data.copy_(pretrained_embeddings)

tensor([[ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
        [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
        [-0.0382, -0.2449,  0.7281,  ..., -0.1459,  0.8278,  0.2706],
        ...,
        [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
        [-0.2415, -0.1050, -0.1875,  ..., -0.3229,  0.1251, -0.8694],
        [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000]])

### Classifier Class

In [23]:
class SentimentClassifier:
    
    def __init__(self, train_iter, valid_iter, model):
        self.train_iter = train_iter
        self.valid_iter = valid_iter
        self.model = model
        self.optimizer = optim.Adam(self.model.parameters())
        self.loss_function = nn.BCEWithLogitsLoss()          # Binary Cross-Entropy Loss
        self.loss_function = self.loss_function.to(device)   # place it to GPU (if available)
        
    def accuracy(self, pred, y):
        """Define metric for evaluation."""
        pred = torch.round(torch.sigmoid(pred))
        acc = torch.sum((pred == y)).float() / len(y)
        return acc

    def train_model(self):
        """Train one epoch of inputs and update weights.
        
        Return: average loss, average accuracy.
        """
        epoch_loss = []
        epoch_acc = []
        self.model.train()

        for batch_data in self.train_iter:
            self.optimizer.zero_grad()  # clear out gradient
            pred = self.model(batch_data.text).squeeze(1)
            y = (batch_data.label.squeeze(0) >= 3).float()  # neg:2, pos:3 -> convert them to 0 and 1
            loss = self.loss_function(pred, y)
            acc = self.accuracy(pred, y)

            # backprob and update gradient
            loss.backward()
            self.optimizer.step()

            epoch_loss.append(loss.item())
            epoch_acc.append(acc.item())

        return np.mean(epoch_loss), np.mean(epoch_acc)

    
    def evaluate_model(self):
        """Evaluate one epoch of inputs.
        
        Return: average loss, average accuracy.
        """
        epoch_loss = []
        epoch_acc = []
        self.model.eval()

        with torch.no_grad():    
            for batch_data in self.valid_iter:
                pred = self.model(batch_data.text).squeeze(1)
                y = (batch_data.label.squeeze(0) >= 3).float()
                loss = self.loss_function(pred, y)
                acc = self.accuracy(pred, y)

                epoch_loss.append(loss.item())
                epoch_acc.append(acc.item())

        return np.mean(epoch_loss), np.mean(epoch_acc)

    
    def run_epochs(self, num_epochs=10, eval_each=1):
        """Run # epochs and evaluate the model.
        
        Return: average loss and accuracy per epoch for training and validation set.
        """
        train_epoch_metrics, valid_epoch_metrics = [], []
        
        for epoch in tnrange(num_epochs, desc='EPOCHS'):
            train_loss, train_acc = self.train_model()
            valid_loss, valid_acc = self.evaluate_model()
            train_epoch_metrics.append((train_loss, train_acc))
            valid_epoch_metrics.append((valid_loss, valid_acc))

            if (epoch + 1) % eval_each == 0:
                print('Epoch %d | Train Loss: %.2f | Train Acc: %.2f | Test Loss: %.2f | Test Acc: %.2f'
                      %(epoch, train_loss, train_acc, valid_loss, valid_acc))
        
        return train_epoch_metrics, valid_epoch_metrics

### Run BiRNN and BiLSTM Model

In [25]:
birnn_classifier = SentimentClassifier(train_iter, valid_iter, model_birnn)
birnn_train_epoch_metrics, birnn_valid_epoch_metrics = birnn_classifier.run_epochs()

Epoch 0 | Train Loss: 0.69 | Train Acc: 0.52 | Test Loss: 0.70 | Test Acc: 0.51
Epoch 1 | Train Loss: 0.68 | Train Acc: 0.57 | Test Loss: 0.69 | Test Acc: 0.51
Epoch 2 | Train Loss: 0.67 | Train Acc: 0.61 | Test Loss: 0.69 | Test Acc: 0.51
Epoch 3 | Train Loss: 0.65 | Train Acc: 0.65 | Test Loss: 0.70 | Test Acc: 0.51
Epoch 4 | Train Loss: 0.62 | Train Acc: 0.69 | Test Loss: 0.69 | Test Acc: 0.55
Epoch 5 | Train Loss: 0.55 | Train Acc: 0.77 | Test Loss: 0.71 | Test Acc: 0.54
Epoch 6 | Train Loss: 0.44 | Train Acc: 0.84 | Test Loss: 0.76 | Test Acc: 0.54
Epoch 7 | Train Loss: 0.33 | Train Acc: 0.90 | Test Loss: 0.81 | Test Acc: 0.54
Epoch 8 | Train Loss: 0.24 | Train Acc: 0.93 | Test Loss: 0.91 | Test Acc: 0.54
Epoch 9 | Train Loss: 0.16 | Train Acc: 0.96 | Test Loss: 1.00 | Test Acc: 0.54


In [27]:
bilstm_classifier = SentimentClassifier(train_iter, valid_iter, model_bilstm)
bilstm_train_epoch_metrics, bilstm_valid_epoch_metrics = bilstm_classifier.run_epochs()

Epoch 0 | Train Loss: 0.69 | Train Acc: 0.49 | Test Loss: 0.69 | Test Acc: 0.52
Epoch 1 | Train Loss: 0.68 | Train Acc: 0.56 | Test Loss: 0.69 | Test Acc: 0.54
Epoch 2 | Train Loss: 0.67 | Train Acc: 0.63 | Test Loss: 0.67 | Test Acc: 0.60
Epoch 3 | Train Loss: 0.64 | Train Acc: 0.65 | Test Loss: 0.71 | Test Acc: 0.54
Epoch 4 | Train Loss: 0.63 | Train Acc: 0.66 | Test Loss: 0.66 | Test Acc: 0.61
Epoch 5 | Train Loss: 0.58 | Train Acc: 0.74 | Test Loss: 0.65 | Test Acc: 0.64
Epoch 6 | Train Loss: 0.53 | Train Acc: 0.75 | Test Loss: 0.63 | Test Acc: 0.65
Epoch 7 | Train Loss: 0.49 | Train Acc: 0.80 | Test Loss: 0.65 | Test Acc: 0.64
Epoch 8 | Train Loss: 0.37 | Train Acc: 0.86 | Test Loss: 0.69 | Test Acc: 0.65
Epoch 9 | Train Loss: 0.28 | Train Acc: 0.90 | Test Loss: 0.63 | Test Acc: 0.70


### Miscellaneous

In [None]:
for test_i in valid_iter:
    print(test_i)
    break

In [None]:
(test_i.label.squeeze(0) >= 3).float()