In [None]:
#Changing the torchtext version to 0.6.0.
!pip install torchtext==0.6.0

In [None]:
#All required imports
import copy
import torch
from torch import nn
from torch import optim
import torchtext
from torchtext import data
from torchtext import datasets

#Creating two variables for holding text and labels data.
TEXT = data.Field(sequential=True, batch_first=True, lower=True)
LABEL = data.LabelField()

#Loading data splits from torchtext.datasets.
train_data, val_data, test_data = datasets.SST.splits(TEXT, LABEL)

#Building vocabulary dictionaries for Text and Label.
TEXT.build_vocab(train_data)
LABEL.build_vocab(train_data)

#Initializing the hyperparameters of the model.
vocab_size = len(TEXT.vocab)
label_size = len(LABEL.vocab)
padding_idx = TEXT.vocab.stoi['<pad>']
embedding_dim = 128
hidden_dim = 128

#Building the iterators.
train_iter, val_iter, test_iter = data.BucketIterator.splits(
    (train_data, val_data, test_data),
    batch_size=32)

In [None]:
#Defining the training model.
def train(model, iterator, optimizer, criterion):
    '''
    This function is used to train the model.
    '''
    model.train()
    total_loss = 0.0
    total_correct = 0
    for batch in iterator:
        optimizer.zero_grad()
        text, labels = batch.text, batch.label
        predictions = model(text)
        loss = criterion(predictions, labels)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
        total_correct += (predictions.argmax(1) == labels).sum().item()
    return total_loss / len(iterator), total_correct / len(iterator.dataset)

#Defining the evaluation function.
def evaluate(model, iterator, criterion):
    '''
    This function is used to evaluate the model trained.
    '''
    model.eval()
    total_loss = 0.0
    total_correct = 0
    with torch.no_grad():
        for batch in iterator:
            text, labels = batch.text, batch.label
            predictions = model(text)
            loss = criterion(predictions, labels)
            total_loss += loss.item()
            total_correct += (predictions.argmax(1) == labels).sum().item()
    return total_loss / len(iterator), total_correct / len(iterator.dataset)

In [None]:
#Defining the RNNClassifier model for sentiment classification task.
class RNNClassifier(nn.Module):
    def __init__(self, vocab_size, embedding_dim, hidden_dim, label_size, padding_idx, num_layers=1):
        super(RNNClassifier, self).__init__()
        self.embedding = nn.Embedding(vocab_size, embedding_dim, padding_idx=padding_idx)
        self.rnn = nn.LSTM(embedding_dim, hidden_dim, num_layers=num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_dim, label_size)
        self.num_layers = num_layers

    def forward(self, text):
        embedded = self.embedding(text)
        output, _ = self.rnn(embedded)
        last_output = output[:, -1, :]
        logits = self.fc(last_output)
        return logits

#Creating the model with the hyperparameters provided and ADAM optimizer and crossentropy loss function.
num_layers = 1
model = RNNClassifier(vocab_size, embedding_dim, hidden_dim, label_size, padding_idx, num_layers)
optimizer = optim.Adam(model.parameters())
criterion = nn.CrossEntropyLoss()

In [None]:
#Training the model.
N_EPOCHS = 10
best_valid_loss = float('inf')

for epoch in range(N_EPOCHS):
    #Train and validate model.
    train_loss, train_acc = train(model, train_iter, optimizer, criterion)
    valid_loss, valid_acc = evaluate(model, val_iter, criterion)

    print(f'Epoch: {epoch+1:02}')
    print(f'\tTrain Loss: {train_loss:.3f} | Train Acc: {train_acc*100:.2f}%')
    print(f'\tValid Loss: {valid_loss:.3f} | Valid Acc: {valid_acc*100:.2f}%')

    #Saving the best model.
    if valid_loss < best_valid_loss:
        best_valid_loss = valid_loss
        torch.save(model.state_dict(), 'rnn_model.pt')

#Loading the best model.
model.load_state_dict(torch.load('rnn_model.pt'))

#Testing the model trained.
test_loss, test_acc = evaluate(model, test_iter, criterion)
print(f'Test Loss: {test_loss:.3f} | Test Acc: {test_acc*100:.2f}%')

In [None]:
#Optimization Attempt1:
# 1) Since, the model is overfitting, I tried to use the technique of dropout.
# 2) Increased the number of epochs.
# 3) Provided the learning rate of 0.001.
# 4) Added a weight_decay factor of 1e-5 so as to do the L2 regularization.
#Updating the definition of the RNNClassifier model.
class RNNClassifier(nn.Module):
    def __init__(self, vocab_size, embedding_dim, hidden_dim, label_size, padding_idx, num_layers=1):
        super(RNNClassifier, self).__init__()
        self.embedding = nn.Embedding(vocab_size, embedding_dim, padding_idx=padding_idx)
        self.rnn = nn.LSTM(embedding_dim, hidden_dim, num_layers=num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_dim, label_size)
        self.num_layers = num_layers
        self.dropout = nn.Dropout(0.5)

    def forward(self, text):
        embedded = self.embedding(text)
        output, _ = self.rnn(embedded)
        last_output = output[:, -1, :]
        logits = self.fc(self.dropout(last_output))
        return logits

#Re-creating the model.
num_layers = 1
#Initializing the hyperparameters of the model.
vocab_size = len(TEXT.vocab)
label_size = len(LABEL.vocab)
padding_idx = TEXT.vocab.stoi['<pad>']
embedding_dim = 256
hidden_dim = 256
model = RNNClassifier(vocab_size, embedding_dim, hidden_dim, label_size, padding_idx, num_layers)
optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=1e-5)
criterion = nn.CrossEntropyLoss()

#Training the model.
N_EPOCHS = 20
best_valid_loss = float('inf')

for epoch in range(N_EPOCHS):
    #Train and validate the model.
    train_loss, train_acc = train(model, train_iter, optimizer, criterion)
    valid_loss, valid_acc = evaluate(model, val_iter, criterion)

    print(f'Epoch: {epoch+1:02}')
    print(f'\tTrain Loss: {train_loss:.3f} | Train Acc: {train_acc*100:.2f}%')
    print(f'\tValid Loss: {valid_loss:.3f} | Valid Acc: {valid_acc*100:.2f}%')

    #Saving the best model.
    if valid_loss < best_valid_loss:
        best_valid_loss = valid_loss
        torch.save(model.state_dict(), 'rnn_model.pt')

#Loading the best model.
model.load_state_dict(torch.load('rnn_model.pt'))

#Testing the model.
test_loss, test_acc = evaluate(model, test_iter, criterion)
print(f'Test Loss: {test_loss:.3f} | Test Acc: {test_acc*100:.2f}%')

In [None]:
#Optimization Attempt2:
# 1) Since, the model is overfitting, I tried to use the technique of dropout.
# 2) Increased the number of epochs.
# 3) Used a different optimizer Stochastic Gradient Descent with learning rate of 0.01 and momentum of 0.5.
#Re-creating the model.
num_layers = 1
model = RNNClassifier(vocab_size, embedding_dim, hidden_dim, label_size, padding_idx, num_layers)

#Trying another optimizer (SGD with momentum).
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.5)
criterion = nn.CrossEntropyLoss()

#Training the model.
for epoch in range(N_EPOCHS):
    #Train and validate the model.
    train_loss, train_acc = train(model, train_iter, optimizer, criterion)
    valid_loss, valid_acc = evaluate(model, val_iter, criterion)

    print(f'Epoch: {epoch+1:02}')
    print(f'\tTrain Loss: {train_loss:.3f} | Train Acc: {train_acc*100:.2f}%')
    print(f'\tValid Loss: {valid_loss:.3f} | Valid Acc: {valid_acc*100:.2f}%')

    #Saving the best model.
    if valid_loss < best_valid_loss:
        best_valid_loss = valid_loss
        torch.save(model.state_dict(), 'rnn_model.pt')

#Loading the best model.
model.load_state_dict(torch.load('rnn_model.pt'))

#Testing the model.
test_loss, test_acc = evaluate(model, test_iter, criterion)
print(f'Test Loss: {test_loss:.3f} | Test Acc: {test_acc*100:.2f}%')

In [None]:
#Optimization Attempt3:
# 1) Since, the model is overfitting, I tried to use the technique of bidrectional RNNs.
# 2) Increased the number of epochs.
# 3) Provided the learning rate of 0.001.
# 4) Added a weight_decay factor of 1e-5 so as to do the L2 regularization.
#Updating the definition of the RNNClassifier model.
class RNNClassifier(nn.Module):
    def __init__(self, vocab_size, embedding_dim, hidden_dim, label_size, padding_idx, num_layers=1, bidirectional=True, dropout_prob=0.3):
        super(RNNClassifier, self).__init__()
        self.embedding = nn.Embedding(vocab_size, embedding_dim, padding_idx=padding_idx)
        self.rnn = nn.LSTM(embedding_dim, hidden_dim, num_layers=num_layers,
                           batch_first=True, bidirectional=bidirectional, dropout=dropout_prob)
        self.fc = nn.Linear(hidden_dim * 2 if bidirectional else hidden_dim, label_size)

    def forward(self, text):
        embedded = self.embedding(text)
        output, _ = self.rnn(embedded)
        last_output = output[:, -1, :]
        logits = self.fc(last_output)
        return logits

#Re-creating the model with change in hyperparameter value.
vocab_size = len(TEXT.vocab)
label_size = len(LABEL.vocab)
padding_idx = TEXT.vocab.stoi['<pad>']
embedding_dim = 256
hidden_dim = 256
num_layers = 1
model = RNNClassifier(vocab_size, embedding_dim, hidden_dim, label_size, padding_idx, num_layers)
optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=1e-5)
criterion = nn.CrossEntropyLoss()

#Training the model.
N_EPOCHS = 30
best_valid_loss = float('inf')

for epoch in range(N_EPOCHS):
    #Train and evaluate the model.
    train_loss, train_acc = train(model, train_iter, optimizer, criterion)
    valid_loss, valid_acc = evaluate(model, val_iter, criterion)

    print(f'Epoch: {epoch+1:02}')
    print(f'\tTrain Loss: {train_loss:.3f} | Train Acc: {train_acc*100:.2f}%')
    print(f'\tValid Loss: {valid_loss:.3f} | Valid Acc: {valid_acc*100:.2f}%')

    #Saving the best model.
    if valid_loss < best_valid_loss:
        best_valid_loss = valid_loss
        torch.save(model.state_dict(), 'rnn_model.pt')

#Loading the best model.
model.load_state_dict(torch.load('rnn_model.pt'))

#Testing the model.
test_loss, test_acc = evaluate(model, test_iter, criterion)
print(f'Test Loss: {test_loss:.3f} | Test Acc: {test_acc*100:.2f}%')

In [None]:
#Optimization Attempt4:
# 1) Since, the model is overfitting, I tried to use the technique of bidrectional RNNs.
# 2) Increased the number of epochs.
# 3) Provided the learning rate of 0.001.
# 4) Added a weight_decay factor of 1e-5 so as to do the L2 regularization.
#Updating the definition of the RNNClassifier model.
class RNNClassifier(nn.Module):
    def __init__(self, vocab_size, embedding_dim, hidden_dim, label_size, padding_idx, num_layers=1, bidirectional=True):
        super(RNNClassifier, self).__init__()
        self.embedding = nn.Embedding(vocab_size, embedding_dim, padding_idx=padding_idx)
        self.rnn = nn.LSTM(embedding_dim, hidden_dim, num_layers=num_layers,
                           batch_first=True, bidirectional=bidirectional)
        self.fc = nn.Linear(hidden_dim * 2 if bidirectional else hidden_dim, label_size)

    def forward(self, text):
        embedded = self.embedding(text)
        output, _ = self.rnn(embedded)
        last_output = output[:, -1, :]
        logits = self.fc(last_output)
        return logits

#Re-creating the model.
num_layers = 1
model = RNNClassifier(vocab_size, embedding_dim, hidden_dim, label_size, padding_idx, num_layers)
optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=1e-5)
criterion = nn.CrossEntropyLoss()

#Training the model
N_EPOCHS = 30
best_valid_loss = float('inf')

for epoch in range(N_EPOCHS):
    #Train and validate the model.
    train_loss, train_acc = train(model, train_iter, optimizer, criterion)
    valid_loss, valid_acc = evaluate(model, val_iter, criterion)

    print(f'Epoch: {epoch+1:02}')
    print(f'\tTrain Loss: {train_loss:.3f} | Train Acc: {train_acc*100:.2f}%')
    print(f'\tValid Loss: {valid_loss:.3f} | Valid Acc: {valid_acc*100:.2f}%')

    #Saving the best model.
    if valid_loss < best_valid_loss:
        best_valid_loss = valid_loss
        torch.save(model.state_dict(), 'rnn_model.pt')

#Loading the best model.
model.load_state_dict(torch.load('rnn_model.pt'))

#Testing the model.
test_loss, test_acc = evaluate(model, test_iter, criterion)
print(f'Test Loss: {test_loss:.3f} | Test Acc: {test_acc*100:.2f}%')