### Import

In [1]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torchtext import data
from tqdm import tnrange, tqdm_notebook

from data_loader import DataLoader

%load_ext autoreload
%autoreload 2

### Load Data

_**Note**: It takes a while to load data_

In [2]:
data_loader = DataLoader()
train, valid = data_loader.small_train_valid()

loading data...
splitting data...
building vocabulary...


In [3]:
BATCH_SIZE = 64
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# make iterator for splits
train_iter, valid_iter = data.BucketIterator.splits((train, valid), batch_size=BATCH_SIZE, device=device)

In [4]:
print(vars(train.examples[0]))

{'text': ['<', 'br', '/><br', '/>charlie', 'kauffman', 'has', 'made', 'weird', 'metaphysical', 'angst', 'popular', ',', 'but', 'this', 'canadian', 'gem', 'makes', 'it', 'hilarious', '.', '<', 'br', '/><br', '/>like', 'most', 'weird', 'films', 'the', 'less', 'said', 'about', 'plot', 'the', 'better', 'but', 'let', "'s", 'set', 'the', 'scene', ',', 'two', 'friends', 'anthony', 'and', 'dave', 'have', 'been', 'together', 'since', 'childhood', ',', 'they', 'ca', "n't", 'cope', 'with', 'the', 'world', 'and', 'eventually', 'this', 'means', 'they', 'no', 'longer', 'have', 'to', '.', 'but', 'that', 'is', 'where', 'even', 'more', 'problems', 'begin.<br', '/><br', '/>i', 'loved', 'this', 'film', ',', 'it', 'made', 'me', 'smile', 'long', 'after', 'the', 'final', 'credits', 'and', 'that', 'is', 'a', 'rare', 'experience', 'with', 'so', 'many', 'mass', 'produced', 'pieces', 'of', '"', 'nothing', '"', 'out', 'there.<br', '/><br', "/>don't", 'miss', 'this.<br', '/><br', '/', '>'], 'label': ['pos']}


In [5]:
print('%d training examples' %len(train))
print('%d validation examples' %len(valid))
print('%d unique tokens in TEXT' %len(data_loader.TEXT.vocab))
print('%d unique tokens in LABEL' %len(data_loader.LABEL.vocab))
print(data_loader.LABEL.vocab.itos)
print('Input data shape:', data_loader.TEXT.vocab.vectors.shape)

1250 training examples
1250 validation examples
22587 unique tokens in TEXT
4 unique tokens in LABEL
['<unk>', '<pad>', 'neg', 'pos']
Input data shape: torch.Size([22587, 100])


### Model Config

In [6]:
VOCAB_SIZE, EMBEDDING_DIM = data_loader.TEXT.vocab.vectors.shape
HIDDEN_DIM = 32
OUTPUT_DIM = 1
BIDRECTIONAL = True
LEARNING_RATE = 1e-3

### BiRNN

In [7]:
class BiRNN(nn.Module):
    
    def __init__(self, vocab_size, embedding_dim, hidden_dim, output_dim, bidirectional):
        super(BiRNN, self).__init__()
        
        self.embedding = nn.Embedding(vocab_size, embedding_dim)
        self.rnn = nn.RNN(embedding_dim, hidden_dim, bidirectional=bidirectional)
        self.fc = nn.Linear(hidden_dim * 2, output_dim)  # input is concated forward and backward hidden state
        
    def forward(self, x):
        embeds = self.embedding(x)
        lstm_output, hidden = self.rnn(embeds)
        hidden = torch.cat((hidden[-2,:,:], hidden[-1,:,:]), dim=1)  # concate forward and backward hidden before FC
        fc_output = self.fc(hidden.squeeze(0))
        
        return fc_output


In [8]:
# initialize model
model_birnn = BiRNN(VOCAB_SIZE, EMBEDDING_DIM, HIDDEN_DIM, OUTPUT_DIM, BIDRECTIONAL)
model_birnn = model_birnn.to(device)  # place it to GPU (if available)

# replace inintial weights of embedding layer with pred-trained embedding
pretrained_embeddings = data_loader.TEXT.vocab.vectors
model_birnn.embedding.weight.data.copy_(pretrained_embeddings)

tensor([[ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
        [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
        [-0.0382, -0.2449,  0.7281,  ..., -0.1459,  0.8278,  0.2706],
        ...,
        [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
        [-0.2415, -0.1050, -0.1875,  ..., -0.3229,  0.1251, -0.8694],
        [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000]])

### BiLSTM

In [9]:
class BiLSTM(nn.Module):
    
    def __init__(self, vocab_size, embedding_dim, hidden_dim, output_dim, bidirectional):
        super(BiLSTM, self).__init__()
        
        self.embedding = nn.Embedding(vocab_size, embedding_dim)
        self.lstm = nn.LSTM(embedding_dim, hidden_dim, bidirectional=bidirectional)
        self.fc = nn.Linear(hidden_dim * 2, output_dim)  # input is concated forward and backward hidden state
        
    def forward(self, x):
        embeds = self.embedding(x)
        lstm_output, (hidden, memory) = self.lstm(embeds)
        hidden = torch.cat((hidden[-2,:,:], hidden[-1,:,:]), dim=1)  # concate forward and backward hidden before FC
        fc_output = self.fc(hidden.squeeze(0))
        
        return fc_output

In [10]:
# initialize model
model_bilstm = BiLSTM(VOCAB_SIZE, EMBEDDING_DIM, HIDDEN_DIM, OUTPUT_DIM, BIDRECTIONAL)
model_bilstm = model_bilstm.to(device)  # place it to GPU (if available)

# replace inintial weights of embedding layer with pred-trained embedding
pretrained_embeddings = data_loader.TEXT.vocab.vectors
model_bilstm.embedding.weight.data.copy_(pretrained_embeddings)

tensor([[ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
        [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
        [-0.0382, -0.2449,  0.7281,  ..., -0.1459,  0.8278,  0.2706],
        ...,
        [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
        [-0.2415, -0.1050, -0.1875,  ..., -0.3229,  0.1251, -0.8694],
        [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000]])

### Classifier Class

In [11]:
class SentimentClassifier:
    
    def __init__(self, train_iter, valid_iter, model):
        self.train_iter = train_iter
        self.valid_iter = valid_iter
        self.model = model
        self.optimizer = optim.Adam(self.model.parameters())
        self.loss_function = nn.BCEWithLogitsLoss()          # Binary Cross-Entropy Loss
        self.loss_function = self.loss_function.to(device)   # place it to GPU (if available)
        
    def accuracy(self, pred, y):
        """Define metric for evaluation."""
        pred = torch.round(torch.sigmoid(pred))
        acc = torch.sum((pred == y)).float() / len(y)
        return acc

    def train_model(self):
        """Train one epoch of inputs and update weights.
        
        Return: average loss, average accuracy.
        """
        epoch_loss = []
        epoch_acc = []
        self.model.train()

        for batch_data in self.train_iter:
            self.optimizer.zero_grad()  # clear out gradient
            pred = self.model(batch_data.text).squeeze(1)
            y = (batch_data.label.squeeze(0) >= 3).float()  # neg:2, pos:3 -> convert them to 0 and 1
            loss = self.loss_function(pred, y)
            acc = self.accuracy(pred, y)

            # backprob and update gradient
            loss.backward()
            self.optimizer.step()

            epoch_loss.append(loss.item())
            epoch_acc.append(acc.item())

        return np.mean(epoch_loss), np.mean(epoch_acc)

    
    def evaluate_model(self):
        """Evaluate one epoch of inputs.
        
        Return: average loss, average accuracy.
        """
        epoch_loss = []
        epoch_acc = []
        self.model.eval()

        with torch.no_grad():    
            for batch_data in self.valid_iter:
                pred = self.model(batch_data.text).squeeze(1)
                y = (batch_data.label.squeeze(0) >= 3).float()
                loss = self.loss_function(pred, y)
                acc = self.accuracy(pred, y)

                epoch_loss.append(loss.item())
                epoch_acc.append(acc.item())

        return np.mean(epoch_loss), np.mean(epoch_acc)

    
    def run_epochs(self, num_epochs=10, eval_each=1):
        """Run # epochs and evaluate the model.
        
        Return: average loss and accuracy per epoch for training and validation set.
        """
        train_epoch_metrics, valid_epoch_metrics = [], []
        
        for epoch in tnrange(num_epochs, desc='EPOCHS'):
            train_loss, train_acc = self.train_model()
            valid_loss, valid_acc = self.evaluate_model()
            train_epoch_metrics.append((train_loss, train_acc))
            valid_epoch_metrics.append((valid_loss, valid_acc))

            if (epoch + 1) % eval_each == 0:
                print('Epoch %d | Train Loss: %.2f | Train Acc: %.2f | Test Loss: %.2f | Test Acc: %.2f'
                      %(epoch, train_loss, train_acc, valid_loss, valid_acc))
        
        return train_epoch_metrics, valid_epoch_metrics
    
    def save_model(self, PATH):
        torch.save(self.model.state_dict(), PATH)
        
    def load_model(self, PATH):
        self.model.load_state_dict(torch.load(PATH))
        self.model.eval()

### Run BiRNN and BiLSTM Model

In [25]:
birnn_classifier = SentimentClassifier(train_iter, valid_iter, model_birnn)
birnn_train_epoch_metrics, birnn_valid_epoch_metrics = birnn_classifier.run_epochs()

Epoch 0 | Train Loss: 0.69 | Train Acc: 0.52 | Test Loss: 0.70 | Test Acc: 0.51
Epoch 1 | Train Loss: 0.68 | Train Acc: 0.57 | Test Loss: 0.69 | Test Acc: 0.51
Epoch 2 | Train Loss: 0.67 | Train Acc: 0.61 | Test Loss: 0.69 | Test Acc: 0.51
Epoch 3 | Train Loss: 0.65 | Train Acc: 0.65 | Test Loss: 0.70 | Test Acc: 0.51
Epoch 4 | Train Loss: 0.62 | Train Acc: 0.69 | Test Loss: 0.69 | Test Acc: 0.55
Epoch 5 | Train Loss: 0.55 | Train Acc: 0.77 | Test Loss: 0.71 | Test Acc: 0.54
Epoch 6 | Train Loss: 0.44 | Train Acc: 0.84 | Test Loss: 0.76 | Test Acc: 0.54
Epoch 7 | Train Loss: 0.33 | Train Acc: 0.90 | Test Loss: 0.81 | Test Acc: 0.54
Epoch 8 | Train Loss: 0.24 | Train Acc: 0.93 | Test Loss: 0.91 | Test Acc: 0.54
Epoch 9 | Train Loss: 0.16 | Train Acc: 0.96 | Test Loss: 1.00 | Test Acc: 0.54


In [12]:
bilstm_classifier = SentimentClassifier(train_iter, valid_iter, model_bilstm)
bilstm_train_epoch_metrics, bilstm_valid_epoch_metrics = bilstm_classifier.run_epochs()

HBox(children=(IntProgress(value=0, description='EPOCHS', max=10, style=ProgressStyle(description_width='initi…

Epoch 0 | Train Loss: 0.69 | Train Acc: 0.53 | Test Loss: 0.69 | Test Acc: 0.50
Epoch 1 | Train Loss: 0.68 | Train Acc: 0.56 | Test Loss: 0.69 | Test Acc: 0.53
Epoch 2 | Train Loss: 0.67 | Train Acc: 0.61 | Test Loss: 0.67 | Test Acc: 0.58
Epoch 3 | Train Loss: 0.63 | Train Acc: 0.67 | Test Loss: 0.67 | Test Acc: 0.59
Epoch 4 | Train Loss: 0.56 | Train Acc: 0.73 | Test Loss: 0.62 | Test Acc: 0.66
Epoch 5 | Train Loss: 0.50 | Train Acc: 0.77 | Test Loss: 0.60 | Test Acc: 0.69
Epoch 6 | Train Loss: 0.43 | Train Acc: 0.83 | Test Loss: 0.72 | Test Acc: 0.64
Epoch 7 | Train Loss: 0.46 | Train Acc: 0.80 | Test Loss: 0.59 | Test Acc: 0.69
Epoch 8 | Train Loss: 0.32 | Train Acc: 0.89 | Test Loss: 0.69 | Test Acc: 0.67
Epoch 9 | Train Loss: 0.38 | Train Acc: 0.86 | Test Loss: 0.59 | Test Acc: 0.69



In [13]:
bilstm_classifier.save_model('bilstm-0.pt')
#torch.save(bilstm_classifier.model.state_dict(), 'bilstm-1.pt')

In [24]:
print("Model's state_dict:")
for param_tensor in bilstm_classifier.model.state_dict():
    print(param_tensor, "\t", bilstm_classifier.model.state_dict()[param_tensor].size())

Model's state_dict:
embedding.weight 	 torch.Size([22587, 100])
lstm.weight_ih_l0 	 torch.Size([128, 100])
lstm.weight_hh_l0 	 torch.Size([128, 32])
lstm.bias_ih_l0 	 torch.Size([128])
lstm.bias_hh_l0 	 torch.Size([128])
lstm.weight_ih_l0_reverse 	 torch.Size([128, 100])
lstm.weight_hh_l0_reverse 	 torch.Size([128, 32])
lstm.bias_ih_l0_reverse 	 torch.Size([128])
lstm.bias_hh_l0_reverse 	 torch.Size([128])
fc.weight 	 torch.Size([1, 64])
fc.bias 	 torch.Size([1])


### Adversary

#### Load model

In [39]:
# initialize model
model_bilstm_adversary = BiLSTM(VOCAB_SIZE, EMBEDDING_DIM, HIDDEN_DIM, OUTPUT_DIM, BIDRECTIONAL)
model_bilstm_adversary = model_bilstm_adversary.to(device)  # place it to GPU (if available)

# bilstm_adversary = SentimentClassifier(train_iter, valid_iter, model_bilstm_adversary)
# bilstm_adversary.load_model(saved_bilstm)

# load model
model_bilstm_adversary.load_state_dict(torch.load('bilstm-0.pt'))
model_bilstm_adversary.eval()

BiLSTM(
  (embedding): Embedding(22587, 100)
  (lstm): LSTM(100, 32, bidirectional=True)
  (fc): Linear(in_features=64, out_features=1, bias=True)
)

In [41]:
for valid_i in valid_iter:
    print(valid_i)
    break


[torchtext.data.batch.Batch of size 64]
	[.text]:[torch.LongTensor of size 76x64]
	[.label]:[torch.FloatTensor of size 1x64]


In [61]:
k = 10
text = valid_i.text[:, :k]
label = (valid_i.label.squeeze(0)[:k] >= 3).float()
with torch.no_grad():   
    logit = model_bilstm_adversary(text).squeeze(1)
    loss = nn.BCEWithLogitsLoss()(logit, label)
    pred = torch.round(torch.sigmoid(logit))
    acc = torch.sum((pred == label)).float() / len(label)

print(logit)
print(pred, acc)

tensor([ 2.3972,  2.2364, -2.2658, -2.5041,  2.3518,  1.5888,  0.3724, -0.7246,
         0.5422,  1.1477])
tensor([1., 1., 0., 0., 1., 1., 1., 0., 1., 1.]) tensor(0.8000)


In [102]:
for i in range(10):
    word1 = text[:, :1][i]
    word1_vec = model_bilstm_adversary.embedding(word1)
    perturb = torch.sqrt(torch.sum(torch.pow((model_bilstm.embedding.weight.data - word1_vec), 2), dim=1)) - logit[0]
    perturb[word1] = float('inf')  # do not choose the original word (argmin would be original word)
    word1_adversary = torch.argmin(perturb)
    original = data_loader.TEXT.vocab.itos[word1]
    adversary = data_loader.TEXT.vocab.itos[word1_adversary]
    print('%s -> %s' %(original, adversary))

this -> it
was -> being
one -> another
of -> the
the -> this
best -> well
war -> wars
movies -> movie
i -> 'm
've -> 'd


### Miscellaneous

In [None]:
for test_i in valid_iter:
    print(test_i)
    break

In [None]:
(test_i.label.squeeze(0) >= 3).float()