<a href="https://colab.research.google.com/github/gopal2812/mlblr/blob/master/Session4Assignmentbididrectionallstm.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Instructions

Use this as a reference [link text](
https://github.com/bentrevett/pytorch-sentiment-analysis/blob/master/2%20-%20Upgraded%20Sentiment%20Analysis.ipynb )

Change this code in such a way that:

1. it has 3 LSTM layers

2. it has used a for loop to do so in the forward function
3. the dropout value used is 0.2
4. trained on the text that is reversed (for example "my name is Rohan" becomes "Rohan is name my"
5. achieves 87% or more accuracy
once done, share the Github link as well (after training on Google Colab, move the file to GitHub).


In [None]:
import torch
import random
import spacy
from torchtext import data, datasets
import torch.nn as nn
import torch.optim as optim

SEED = 1234
torch.manual_seed(SEED)
torch.backends.cudnn.deterministic = True

text = data.Field(tokenize = 'spacy', include_lengths = True)
label = data.LabelField(dtype = torch.float)

In [None]:
#load the IMDb dataset.
train_data, test_data = datasets.IMDB.splits(text, label)

In [None]:
#reverse training text data in-place
for i in range(len(train_data.examples)):
  vars(train_data.examples[i]).get('text').reverse()

In [None]:
# create the validation set from our training set.
train_data, valid_data = train_data.split(random_state = random.seed(SEED))

In [None]:
# build vocabulary with pre-trained global embedding

MAX_VOCAB_SIZE = 25_000

text.build_vocab(train_data, 
                 max_size = MAX_VOCAB_SIZE, 
                 vectors = "glove.6B.100d", 
                 unk_init = torch.Tensor.normal_)

label.build_vocab(train_data)

In [None]:
#Another thing for packed padded sequences all of the tensors within a batch 
#need to be sorted by their lengths. This is handled in the iterator by setting 
#sort_within_batch = True. 

BATCH_SIZE = 64

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

train_iterator, valid_iterator, test_iterator = data.BucketIterator.splits(
    (train_data, valid_data, test_data), 
    batch_size = BATCH_SIZE,
    sort_within_batch = True,
    device = device)


Build the Model

In [None]:
class RNN(nn.Module):

    #parts list for building blocks
    def __init__(self, vocab_size, embedding_dim, hidden_dim, output_dim, 
                 n_layers, bidirectional, dropout, pad_idx):
        
        super().__init__()
        
        self.embedding = nn.Embedding(vocab_size, embedding_dim, 
                                      padding_idx = pad_idx)
        #bidirectional = False
        self.rnns = nn.ModuleList([nn.LSTM(embedding_dim, hidden_dim, 
                                           bidirectional=bidirectional)])
        # LSTM layers = 3
        for _ in range(n_layers - 1):
          self.rnns.append(nn.LSTM(hidden_dim, hidden_dim, 
                                   bidirectional=bidirectional))
        
        self.fc = nn.Linear(hidden_dim, output_dim)
        
        self.dropout = nn.Dropout(dropout)

    #step-by-step manual for assembling building blocks    
    def forward(self, text, text_lengths):
        
        #text = [sent len, batch size]
        
        embedded = self.dropout(self.embedding(text))
        
        #embedded = [sent len, batch size, emb dim]
        
        #pack sequence
        packed_embedded = nn.utils.rnn.pack_padded_sequence(embedded, text_lengths)
        
        #stack multiple (3) LSTM layers with dropouts
        x = packed_embedded
        for rnn in self.rnns:
          _, (x, _) = rnn(x)
          x = self.dropout(x)
        
        #x = last hidden states [1, batch size, hid dim]
        hidden = x.squeeze(0)

        return self.fc(hidden)

In [None]:
#define model constants

INPUT_DIM = len(text.vocab)
EMBEDDING_DIM = 100
HIDDEN_DIM = 256
OUTPUT_DIM = 1
N_LAYERS = 3
#changed from True to False         
BIDIRECTIONAL = False
#changed from 0.5 to 0.2
DROPOUT = 0.2
PAD_IDX = text.vocab.stoi[text.pad_token]

In [None]:
model = RNN(INPUT_DIM, 
            EMBEDDING_DIM, 
            HIDDEN_DIM, 
            OUTPUT_DIM, 
            N_LAYERS, 
            BIDIRECTIONAL, 
            DROPOUT, 
            PAD_IDX)

In [None]:
def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

print(model)
print(f'The model has {count_parameters(model):,} trainable parameters')

In [None]:
pretrained_embeddings = text.vocab.vectors
print(pretrained_embeddings.shape)

In [None]:
#copy pre-trained embeddings from vocabulary to model
model.embedding.weight.data.copy_(pretrained_embeddings)

In [None]:
#zero weights for <unk> and <pad> tokens

UNK_IDX = text.vocab.stoi[text.unk_token]

model.embedding.weight.data[UNK_IDX] = torch.zeros(EMBEDDING_DIM)
model.embedding.weight.data[PAD_IDX] = torch.zeros(EMBEDDING_DIM)

print(model.embedding.weight.data)

Train the Model

In [None]:
#instantiate optimizer
optimizer = optim.Adam(model.parameters())

In [None]:
#instantiate loss function
criterion = nn.BCEWithLogitsLoss()

In [None]:
#place the model and criterion on the GPU (if available)
model = model.to(device)
criterion = criterion.to(device)

In [None]:
#define the accuracy for training, validation and testing
def binary_accuracy(preds, y):
    """
    Returns accuracy per batch,i.e. if you get 8/10 right, this returns 0.8, NOT 8
    """

    #round predictions to the closest integer
    rounded_preds = torch.round(torch.sigmoid(preds))
    correct = (rounded_preds == y).float() #convert into float for division 
    acc = correct.sum() / len(correct)
    return acc

In [None]:
#define the training
def train(model, iterator, optimizer, criterion):
    
    epoch_loss = 0
    epoch_acc = 0
    
    model.train()
    
    for batch in iterator:
        
        optimizer.zero_grad()
        
        text, text_lengths = batch.text

        #runtime type is GPU but  model expects CPU tensor
        text_lengths = text_lengths.cpu()

        predictions = model(text, text_lengths).squeeze(1)
        
        loss = criterion(predictions, batch.label)
        
        acc = binary_accuracy(predictions, batch.label)
        
        loss.backward()
        
        optimizer.step()
        
        epoch_loss += loss.item()
        epoch_acc += acc.item()
        
    return epoch_loss / len(iterator), epoch_acc / len(iterator)

In [None]:
def evaluate(model, iterator, criterion):
    
    epoch_loss = 0
    epoch_acc = 0
    
    model.eval()
    
    with torch.no_grad():
    
        for batch in iterator:

            text, text_lengths = batch.text
            
            #runtime type is GPU but model expects CPU tensor
            text_lengths = text_lengths.cpu()

            predictions = model(text, text_lengths).squeeze(1)
            
            loss = criterion(predictions, batch.label)
            
            acc = binary_accuracy(predictions, batch.label)

            epoch_loss += loss.item()
            epoch_acc += acc.item()
        
    return epoch_loss / len(iterator), epoch_acc / len(iterator)

In [None]:
# define how to calculate time required per epoch

import time

def epoch_time(start_time, end_time):
    elapsed_time = end_time - start_time
    elapsed_mins = int(elapsed_time / 60)
    elapsed_secs = int(elapsed_time - (elapsed_mins * 60))
    return elapsed_mins, elapsed_secs

In [None]:
N_EPOCHS = 10

best_valid_loss = float('inf')

for epoch in range(N_EPOCHS):

    start_time = time.time()
    
    train_loss, train_acc = train(model, train_iterator, optimizer, criterion)
    valid_loss, valid_acc = evaluate(model, valid_iterator, criterion)
    
    end_time = time.time()

    epoch_mins, epoch_secs = epoch_time(start_time, end_time)
    
    if valid_loss < best_valid_loss:
        best_valid_loss = valid_loss
        torch.save(model.state_dict(), 'tut2-model.pt')
    
    print(f'Epoch: {epoch+1:02} | Epoch Time: {epoch_mins}m {epoch_secs}s')
    print(f'\tTrain Loss: {train_loss:.3f} | Train Acc: {train_acc*100:.2f}%')
    print(f'\t Val. Loss: {valid_loss:.3f} |  Val. Acc: {valid_acc*100:.2f}%')

In [None]:
# test model using testing dataset
model.load_state_dict(torch.load('tut2-model.pt'))

test_loss, test_acc = evaluate(model, test_iterator, criterion)

print(f'Test Loss: {test_loss:.3f} | Test Acc: {test_acc*100:.2f}%')

In [34]:
nlp = spacy.load('en')

def predict_sentiment(model, sentence):
    model.eval()
    tokenized = [tok.text for tok in nlp.tokenizer(sentence)]
    indexed = [text.vocab.stoi[t] for t in tokenized]
    length = [len(indexed)]
    tensor = torch.LongTensor(indexed).to(device)
    tensor = tensor.unsqueeze(1)
    length_tensor = torch.LongTensor(length)
    prediction = torch.sigmoid(model(tensor, length_tensor))
    return prediction.item()

In [None]:
#test a negative sentence
predict_sentiment(model, "This film is terrible")

In [36]:
#test a positive sentence
predict_sentiment(model, "This film is very good")

0.7142864465713501