# Assignment 2 - Recurrent Neural Networks



## Programming (Full points: 100)

In this assignment, our goal is to use PyTorch to implement Recurrent Neural Networks (RNN) for sentiment analysis task. Sentiment analysis is to classify sentences (input) into certain sentiments (output labels), which includes positive, negative and neutral.

We will use a benckmark dataset, SST, for this assignment.
* we download the SST dataset from torchtext package, and do some preprocessing to build vocabulary and split the dataset into training/validation/test sets. You don't need to modify the code in this step.


In [None]:
#As per instruction, changing the torchtext version to 0.6.0.
!pip install torchtext==0.6.0

Collecting torchtext==0.6.0
  Downloading torchtext-0.6.0-py3-none-any.whl (64 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/64.2 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m64.2/64.2 kB[0m [31m1.8 MB/s[0m eta [36m0:00:00[0m
Collecting sentencepiece (from torchtext==0.6.0)
  Downloading sentencepiece-0.1.99-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m [31m23.9 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: sentencepiece, torchtext
  Attempting uninstall: torchtext
    Found existing installation: torchtext 0.15.2
    Uninstalling torchtext-0.15.2:
      Successfully uninstalled torchtext-0.15.2
Successfully installed sentencepiece-0.1.99 torchtext-0.6.0


In [None]:
#All required imports
import copy
import torch
from torch import nn
from torch import optim
import torchtext
from torchtext import data
from torchtext import datasets

#Creating two variables for holding text and labels data.
TEXT = data.Field(sequential=True, batch_first=True, lower=True)
LABEL = data.LabelField()

#Loading data splits from torchtext.datasets.
train_data, val_data, test_data = datasets.SST.splits(TEXT, LABEL)

#Building vocabulary dictionaries for Text and Label.
TEXT.build_vocab(train_data)
LABEL.build_vocab(train_data)

#Initializing the hyperparameters of the model.
vocab_size = len(TEXT.vocab)
label_size = len(LABEL.vocab)
padding_idx = TEXT.vocab.stoi['<pad>']
embedding_dim = 128
hidden_dim = 128

#Building the iterators.
train_iter, val_iter, test_iter = data.BucketIterator.splits(
    (train_data, val_data, test_data),
    batch_size=32)

downloading trainDevTestTrees_PTB.zip


trainDevTestTrees_PTB.zip: 100%|██████████| 790k/790k [00:02<00:00, 372kB/s]


extracting


* define the training and evaluation function in the cell below.
### (25 points)


In [None]:
#Defining the training model.
def train(model, iterator, optimizer, criterion):
    '''
    This function is used to train the model.
    '''
    model.train()
    total_loss = 0.0
    total_correct = 0
    for batch in iterator:
        optimizer.zero_grad()
        text, labels = batch.text, batch.label
        predictions = model(text)
        loss = criterion(predictions, labels)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
        total_correct += (predictions.argmax(1) == labels).sum().item()
    return total_loss / len(iterator), total_correct / len(iterator.dataset)

#Defining the evaluation function.
def evaluate(model, iterator, criterion):
    '''
    This function is used to evaluate the model trained.
    '''
    model.eval()
    total_loss = 0.0
    total_correct = 0
    with torch.no_grad():
        for batch in iterator:
            text, labels = batch.text, batch.label
            predictions = model(text)
            loss = criterion(predictions, labels)
            total_loss += loss.item()
            total_correct += (predictions.argmax(1) == labels).sum().item()
    return total_loss / len(iterator), total_correct / len(iterator.dataset)

* build a RNN model for sentiment analysis in the cell below.
We have provided several hyperparameters we needed for building the model, including vocabulary size (vocab_size), the word embedding dimension (embedding_dim), the hidden layer dimension (hidden_dim), the number of layers (num_layers) and the number of sentence labels (label_size). Please fill in the missing codes, and implement a RNN model.
### (40 points)

In [None]:
#Defining the RNNClassifier model for sentiment classification task.
class RNNClassifier(nn.Module):
    def __init__(self, vocab_size, embedding_dim, hidden_dim, label_size, padding_idx, num_layers=1):
        super(RNNClassifier, self).__init__()
        self.embedding = nn.Embedding(vocab_size, embedding_dim, padding_idx=padding_idx)
        self.rnn = nn.LSTM(embedding_dim, hidden_dim, num_layers=num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_dim, label_size)
        self.num_layers = num_layers

    def forward(self, text):
        embedded = self.embedding(text)
        output, _ = self.rnn(embedded)
        last_output = output[:, -1, :]
        logits = self.fc(last_output)
        return logits

#Creating the model with the hyperparameters provided and ADAM optimizer and crossentropy loss function.
num_layers = 1
model = RNNClassifier(vocab_size, embedding_dim, hidden_dim, label_size, padding_idx, num_layers)
optimizer = optim.Adam(model.parameters())
criterion = nn.CrossEntropyLoss()

* train the model and compute the accuracy in the cell below.
### (20 points)

In [None]:
#Training the model.
N_EPOCHS = 10
best_valid_loss = float('inf')

for epoch in range(N_EPOCHS):
    #Train and validate model.
    train_loss, train_acc = train(model, train_iter, optimizer, criterion)
    valid_loss, valid_acc = evaluate(model, val_iter, criterion)

    print(f'Epoch: {epoch+1:02}')
    print(f'\tTrain Loss: {train_loss:.3f} | Train Acc: {train_acc*100:.2f}%')
    print(f'\tValid Loss: {valid_loss:.3f} | Valid Acc: {valid_acc*100:.2f}%')

    #Saving the best model.
    if valid_loss < best_valid_loss:
        best_valid_loss = valid_loss
        torch.save(model.state_dict(), 'rnn_model.pt')

#Loading the best model.
model.load_state_dict(torch.load('rnn_model.pt'))

#Testing the model trained.
test_loss, test_acc = evaluate(model, test_iter, criterion)
print(f'Test Loss: {test_loss:.3f} | Test Acc: {test_acc*100:.2f}%')

Epoch: 01
	Train Loss: 1.054 | Train Acc: 41.60%
	Valid Loss: 1.055 | Valid Acc: 39.51%
Epoch: 02
	Train Loss: 1.047 | Train Acc: 42.21%
	Valid Loss: 1.066 | Valid Acc: 40.24%
Epoch: 03
	Train Loss: 1.039 | Train Acc: 42.65%
	Valid Loss: 1.165 | Valid Acc: 40.96%
Epoch: 04
	Train Loss: 1.003 | Train Acc: 49.65%
	Valid Loss: 1.166 | Valid Acc: 47.77%
Epoch: 05
	Train Loss: 0.892 | Train Acc: 61.38%
	Valid Loss: 1.048 | Valid Acc: 51.95%
Epoch: 06
	Train Loss: 0.746 | Train Acc: 68.88%
	Valid Loss: 1.141 | Valid Acc: 53.86%
Epoch: 07
	Train Loss: 0.619 | Train Acc: 74.80%
	Valid Loss: 1.174 | Valid Acc: 53.59%
Epoch: 08
	Train Loss: 0.520 | Train Acc: 79.65%
	Valid Loss: 1.282 | Valid Acc: 53.68%
Epoch: 09
	Train Loss: 0.425 | Train Acc: 84.70%
	Valid Loss: 1.330 | Valid Acc: 51.77%
Epoch: 10
	Train Loss: 0.359 | Train Acc: 87.52%
	Valid Loss: 1.399 | Valid Acc: 53.50%
Test Loss: 1.049 | Test Acc: 53.94%


* try to train a model with better accuracy in the cell below. For example, you can use different optimizers such as SGD and Adam. You can also compare different hyperparameters and model size.
### (15 points), to obtain FULL point in this problem, the accuracy needs to be higher than 70%

In [None]:
#Attempt1:
# 1) Since, the model is overfitting, I tried to use the technique of dropout.
# 2) Increased the number of epochs.
# 3) Provided the learning rate of 0.001.
# 4) Added a weight_decay factor of 1e-5 so as to do the L2 regularization.
#Updating the definition of the RNNClassifier model.
class RNNClassifier(nn.Module):
    def __init__(self, vocab_size, embedding_dim, hidden_dim, label_size, padding_idx, num_layers=1):
        super(RNNClassifier, self).__init__()
        self.embedding = nn.Embedding(vocab_size, embedding_dim, padding_idx=padding_idx)
        self.rnn = nn.LSTM(embedding_dim, hidden_dim, num_layers=num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_dim, label_size)
        self.num_layers = num_layers
        self.dropout = nn.Dropout(0.5)

    def forward(self, text):
        embedded = self.embedding(text)
        output, _ = self.rnn(embedded)
        last_output = output[:, -1, :]
        logits = self.fc(self.dropout(last_output))
        return logits

#Re-creating the model.
num_layers = 1
#Initializing the hyperparameters of the model.
vocab_size = len(TEXT.vocab)
label_size = len(LABEL.vocab)
padding_idx = TEXT.vocab.stoi['<pad>']
embedding_dim = 256
hidden_dim = 256
model = RNNClassifier(vocab_size, embedding_dim, hidden_dim, label_size, padding_idx, num_layers)
optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=1e-5)
criterion = nn.CrossEntropyLoss()

#Training the model.
N_EPOCHS = 20
best_valid_loss = float('inf')

for epoch in range(N_EPOCHS):
    #Train and validate the model.
    train_loss, train_acc = train(model, train_iter, optimizer, criterion)
    valid_loss, valid_acc = evaluate(model, val_iter, criterion)

    print(f'Epoch: {epoch+1:02}')
    print(f'\tTrain Loss: {train_loss:.3f} | Train Acc: {train_acc*100:.2f}%')
    print(f'\tValid Loss: {valid_loss:.3f} | Valid Acc: {valid_acc*100:.2f}%')

    #Saving the best model.
    if valid_loss < best_valid_loss:
        best_valid_loss = valid_loss
        torch.save(model.state_dict(), 'rnn_model.pt')

#Loading the best model.
model.load_state_dict(torch.load('rnn_model.pt'))

#Testing the model.
test_loss, test_acc = evaluate(model, test_iter, criterion)
print(f'Test Loss: {test_loss:.3f} | Test Acc: {test_acc*100:.2f}%')

Epoch: 01
	Train Loss: 1.054 | Train Acc: 41.37%
	Valid Loss: 1.063 | Valid Acc: 40.24%
Epoch: 02
	Train Loss: 1.047 | Train Acc: 42.02%
	Valid Loss: 1.083 | Valid Acc: 38.78%
Epoch: 03
	Train Loss: 1.043 | Train Acc: 42.33%
	Valid Loss: 1.128 | Valid Acc: 37.51%
Epoch: 04
	Train Loss: 1.035 | Train Acc: 42.74%
	Valid Loss: 1.165 | Valid Acc: 39.69%
Epoch: 05
	Train Loss: 1.027 | Train Acc: 43.97%
	Valid Loss: 1.199 | Valid Acc: 41.60%
Epoch: 06
	Train Loss: 1.005 | Train Acc: 48.60%
	Valid Loss: 1.215 | Valid Acc: 49.23%
Epoch: 07
	Train Loss: 0.882 | Train Acc: 62.31%
	Valid Loss: 1.068 | Valid Acc: 54.86%
Epoch: 08
	Train Loss: 0.675 | Train Acc: 72.93%
	Valid Loss: 1.093 | Valid Acc: 57.86%
Epoch: 09
	Train Loss: 0.509 | Train Acc: 79.17%
	Valid Loss: 1.207 | Valid Acc: 56.49%
Epoch: 10
	Train Loss: 0.382 | Train Acc: 85.65%
	Valid Loss: 1.287 | Valid Acc: 58.95%
Epoch: 11
	Train Loss: 0.279 | Train Acc: 90.70%
	Valid Loss: 1.399 | Valid Acc: 57.95%
Epoch: 12
	Train Loss: 0.208 | T

In [None]:
#Attempt2:
# 1) Since, the model is overfitting, I tried to use the technique of dropout.
# 2) Increased the number of epochs.
# 3) Used a different optimizer Stochastic Gradient Descent with learning rate of 0.01 and momentum of 0.5.
#Re-creating the model.
num_layers = 1
model = RNNClassifier(vocab_size, embedding_dim, hidden_dim, label_size, padding_idx, num_layers)

#Trying another optimizer (SGD with momentum).
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.5)
criterion = nn.CrossEntropyLoss()

#Training the model.
for epoch in range(N_EPOCHS):
    #Train and validate the model.
    train_loss, train_acc = train(model, train_iter, optimizer, criterion)
    valid_loss, valid_acc = evaluate(model, val_iter, criterion)

    print(f'Epoch: {epoch+1:02}')
    print(f'\tTrain Loss: {train_loss:.3f} | Train Acc: {train_acc*100:.2f}%')
    print(f'\tValid Loss: {valid_loss:.3f} | Valid Acc: {valid_acc*100:.2f}%')

    #Saving the best model.
    if valid_loss < best_valid_loss:
        best_valid_loss = valid_loss
        torch.save(model.state_dict(), 'rnn_model.pt')

#Loading the best model.
model.load_state_dict(torch.load('rnn_model.pt'))

#Testing the model.
test_loss, test_acc = evaluate(model, test_iter, criterion)
print(f'Test Loss: {test_loss:.3f} | Test Acc: {test_acc*100:.2f}%')

Epoch: 01
	Train Loss: 1.061 | Train Acc: 40.65%
	Valid Loss: 1.055 | Valid Acc: 40.60%
Epoch: 02
	Train Loss: 1.047 | Train Acc: 42.33%
	Valid Loss: 1.055 | Valid Acc: 40.51%
Epoch: 03
	Train Loss: 1.047 | Train Acc: 42.18%
	Valid Loss: 1.054 | Valid Acc: 40.69%
Epoch: 04
	Train Loss: 1.047 | Train Acc: 42.15%
	Valid Loss: 1.055 | Valid Acc: 40.51%
Epoch: 05
	Train Loss: 1.047 | Train Acc: 42.31%
	Valid Loss: 1.054 | Valid Acc: 40.60%
Epoch: 06
	Train Loss: 1.047 | Train Acc: 42.35%
	Valid Loss: 1.056 | Valid Acc: 40.51%
Epoch: 07
	Train Loss: 1.047 | Train Acc: 42.42%
	Valid Loss: 1.057 | Valid Acc: 40.60%
Epoch: 08
	Train Loss: 1.047 | Train Acc: 42.30%
	Valid Loss: 1.056 | Valid Acc: 40.60%
Epoch: 09
	Train Loss: 1.047 | Train Acc: 42.25%
	Valid Loss: 1.054 | Valid Acc: 40.60%
Epoch: 10
	Train Loss: 1.046 | Train Acc: 42.38%
	Valid Loss: 1.056 | Valid Acc: 40.51%
Epoch: 11
	Train Loss: 1.046 | Train Acc: 42.29%
	Valid Loss: 1.053 | Valid Acc: 40.60%
Epoch: 12
	Train Loss: 1.046 | T

In [None]:
#Attempt3:
# 1) Since, the model is overfitting, I tried to use the technique of bidrectional RNNs.
# 2) Increased the number of epochs.
# 3) Provided the learning rate of 0.001.
# 4) Added a weight_decay factor of 1e-5 so as to do the L2 regularization.
#Updating the definition of the RNNClassifier model.
class RNNClassifier(nn.Module):
    def __init__(self, vocab_size, embedding_dim, hidden_dim, label_size, padding_idx, num_layers=1, bidirectional=True, dropout_prob=0.3):
        super(RNNClassifier, self).__init__()
        self.embedding = nn.Embedding(vocab_size, embedding_dim, padding_idx=padding_idx)
        self.rnn = nn.LSTM(embedding_dim, hidden_dim, num_layers=num_layers,
                           batch_first=True, bidirectional=bidirectional, dropout=dropout_prob)
        self.fc = nn.Linear(hidden_dim * 2 if bidirectional else hidden_dim, label_size)

    def forward(self, text):
        embedded = self.embedding(text)
        output, _ = self.rnn(embedded)
        last_output = output[:, -1, :]
        logits = self.fc(last_output)
        return logits

#Re-creating the model with change in hyperparameter value.
vocab_size = len(TEXT.vocab)
label_size = len(LABEL.vocab)
padding_idx = TEXT.vocab.stoi['<pad>']
embedding_dim = 256
hidden_dim = 256
num_layers = 1
model = RNNClassifier(vocab_size, embedding_dim, hidden_dim, label_size, padding_idx, num_layers)
optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=1e-5)
criterion = nn.CrossEntropyLoss()

#Training the model.
N_EPOCHS = 30
best_valid_loss = float('inf')

for epoch in range(N_EPOCHS):
    #Train and evaluate the model.
    train_loss, train_acc = train(model, train_iter, optimizer, criterion)
    valid_loss, valid_acc = evaluate(model, val_iter, criterion)

    print(f'Epoch: {epoch+1:02}')
    print(f'\tTrain Loss: {train_loss:.3f} | Train Acc: {train_acc*100:.2f}%')
    print(f'\tValid Loss: {valid_loss:.3f} | Valid Acc: {valid_acc*100:.2f}%')

    #Saving the best model.
    if valid_loss < best_valid_loss:
        best_valid_loss = valid_loss
        torch.save(model.state_dict(), 'rnn_model.pt')

#Loading the best model.
model.load_state_dict(torch.load('rnn_model.pt'))

#Testing the model.
test_loss, test_acc = evaluate(model, test_iter, criterion)
print(f'Test Loss: {test_loss:.3f} | Test Acc: {test_acc*100:.2f}%')



Epoch: 01
	Train Loss: 1.051 | Train Acc: 41.43%
	Valid Loss: 1.071 | Valid Acc: 40.51%
Epoch: 02
	Train Loss: 1.047 | Train Acc: 41.90%
	Valid Loss: 1.067 | Valid Acc: 37.78%
Epoch: 03
	Train Loss: 1.038 | Train Acc: 43.13%
	Valid Loss: 1.081 | Valid Acc: 40.87%
Epoch: 04
	Train Loss: 1.025 | Train Acc: 46.79%
	Valid Loss: 1.094 | Valid Acc: 46.87%
Epoch: 05
	Train Loss: 1.013 | Train Acc: 50.14%
	Valid Loss: 1.095 | Valid Acc: 47.32%
Epoch: 06
	Train Loss: 1.019 | Train Acc: 47.45%
	Valid Loss: 1.068 | Valid Acc: 40.05%


KeyboardInterrupt: ignored

In [None]:
#Attempt4:
# 1) Since, the model is overfitting, I tried to use the technique of bidrectional RNNs.
# 2) Increased the number of epochs.
# 3) Provided the learning rate of 0.001.
# 4) Added a weight_decay factor of 1e-5 so as to do the L2 regularization.
#Updating the definition of the RNNClassifier model.
class RNNClassifier(nn.Module):
    def __init__(self, vocab_size, embedding_dim, hidden_dim, label_size, padding_idx, num_layers=1, bidirectional=True):
        super(RNNClassifier, self).__init__()
        self.embedding = nn.Embedding(vocab_size, embedding_dim, padding_idx=padding_idx)
        self.rnn = nn.LSTM(embedding_dim, hidden_dim, num_layers=num_layers,
                           batch_first=True, bidirectional=bidirectional)
        self.fc = nn.Linear(hidden_dim * 2 if bidirectional else hidden_dim, label_size)

    def forward(self, text):
        embedded = self.embedding(text)
        output, _ = self.rnn(embedded)
        last_output = output[:, -1, :]
        logits = self.fc(last_output)
        return logits

#Re-creating the model.
num_layers = 1
model = RNNClassifier(vocab_size, embedding_dim, hidden_dim, label_size, padding_idx, num_layers)
optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=1e-5)
criterion = nn.CrossEntropyLoss()

#Training the model
N_EPOCHS = 30
best_valid_loss = float('inf')

for epoch in range(N_EPOCHS):
    #Train and validate the model.
    train_loss, train_acc = train(model, train_iter, optimizer, criterion)
    valid_loss, valid_acc = evaluate(model, val_iter, criterion)

    print(f'Epoch: {epoch+1:02}')
    print(f'\tTrain Loss: {train_loss:.3f} | Train Acc: {train_acc*100:.2f}%')
    print(f'\tValid Loss: {valid_loss:.3f} | Valid Acc: {valid_acc*100:.2f}%')

    #Saving the best model.
    if valid_loss < best_valid_loss:
        best_valid_loss = valid_loss
        torch.save(model.state_dict(), 'rnn_model.pt')

#Loading the best model.
model.load_state_dict(torch.load('rnn_model.pt'))

#Testing the model.
test_loss, test_acc = evaluate(model, test_iter, criterion)
print(f'Test Loss: {test_loss:.3f} | Test Acc: {test_acc*100:.2f}%')

Epoch: 01
	Train Loss: 1.053 | Train Acc: 41.83%
	Valid Loss: 1.062 | Valid Acc: 40.33%
Epoch: 02
	Train Loss: 1.046 | Train Acc: 42.28%
	Valid Loss: 1.104 | Valid Acc: 40.42%
Epoch: 03
	Train Loss: 1.041 | Train Acc: 42.29%
	Valid Loss: 1.118 | Valid Acc: 43.87%
Epoch: 04
	Train Loss: 1.032 | Train Acc: 43.42%
	Valid Loss: 1.167 | Valid Acc: 42.33%
Epoch: 05
	Train Loss: 1.008 | Train Acc: 49.91%
	Valid Loss: 1.092 | Valid Acc: 48.23%
Epoch: 06
	Train Loss: 0.991 | Train Acc: 53.08%
	Valid Loss: 1.107 | Valid Acc: 51.68%
Epoch: 07
	Train Loss: 0.944 | Train Acc: 58.49%
	Valid Loss: 1.145 | Valid Acc: 50.86%


KeyboardInterrupt: ignored