## Try it!

Before you start ensure the place the SICK test csv file in the Data/SICK folder.
The file can be found in this github page under this same folder

### Task 1: Classification

In [3]:
import torch
import torch.nn as nn
from torchtext import data
import torch.optim as optim

import pandas as pd
import numpy as np
import sys
from collections import Counter

#Reproducing same results
seed = 2020

#Set the seed to be fixed
torch.manual_seed(seed)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') 

#Global
TEST_BATCH_SIZE = 13

#Load the test set
test_label = data.LabelField(dtype=torch.int64, batch_first=True)
test_sentAB = data.Field(tokenize='spacy', include_lengths=True, batch_first=True)

test_fields = [(None, None), (None, None), (None, None), (None, None), (None, None),
               ('test_label', test_label), ('test_sentAB', test_sentAB)]

test_dataset = data.TabularDataset(path='../DATA/SICK/SICK test.csv', format='CSV', 
                                    fields=test_fields, skip_header=True)

test_sentAB.build_vocab(test_dataset, min_freq=1, vectors_cache="Vectors/",
                        vectors="glove.6B.300d")

test_label.build_vocab(test_dataset)

test_iterator = data.BucketIterator(test_dataset, TEST_BATCH_SIZE,
                                     sort_key=lambda x : x.test_sentAB,
                                     device=device,
                                     shuffle=False)

In [9]:
class RNNClassifModel(nn.Module):
    
    def __init__(self, vocab_size, embedding_dim, hidden_dim,
                 output_dim, num_layers, bidirectional,dropout_rate):
        super().__init__()
        
        #Embedding layer
        self.embedding = nn.Embedding(vocab_size, embedding_dim)
        
        #More regularization
        self.dropout1 = nn.Dropout(p=dropout_rate)
        
        #LSTM layer
        self.lstm = nn.LSTM(embedding_dim, hidden_dim,
                            num_layers=num_layers,
                            bidirectional=bidirectional,
                            batch_first=True)
        
        #More regularization
        self.dropout2 = nn.Dropout(p=dropout_rate)
        
        #Full connected layer
        self.fc = nn.Linear(2 * hidden_dim, output_dim)
        
    def forward(self, text, text_lengths):
        
        #text = [batch size, max sentence length in batch]
        embedded = self.embedding(text)
        
        #Regularize!
        reg_embedded = self.dropout1(embedded)
        
        #pack the batch sentences to max length
        packed_embedded = nn.utils.rnn.pack_padded_sequence(reg_embedded, text_lengths, 
                                                            batch_first=True, enforce_sorted=False)
        
        packed_output, (hidden, cell) = self.lstm(packed_embedded)
        
        hidden = torch.cat((hidden[-2,:,:], hidden[-1,:,:]), dim=1)
        
        inputs = self.dropout2(hidden)
        
        #Direct values are given to CE loss for loss calculation
        #LogSoftmax is used for inference
        outputs = self.fc(inputs)
    
        return outputs

In [10]:
vocab_size = 2298 #Size of the training set vocabulary
embedding_dim = 300
num_hidden_nodes = 32
num_output_nodes = 3
num_layers = 1
bidirectional = True
dropout_rate = 0.4

#Create the model
model = RNNClassifModel(vocab_size, embedding_dim, num_hidden_nodes,
                 num_output_nodes, num_layers, bidirectional,dropout_rate)

#Load the pretrained model
checkpoint = torch.load('../Models/bilstm_task1_fn.pth')
model.load_state_dict(checkpoint["model"])


#Optimizer and Loss
criterion = nn.CrossEntropyLoss()

#Softmax layer only used to get probabilties
softmax = nn.LogSoftmax(dim=1)

model.to(device)

model

RNNClassifModel(
  (embedding): Embedding(2298, 300)
  (dropout1): Dropout(p=0.4, inplace=False)
  (lstm): LSTM(300, 32, batch_first=True, bidirectional=True)
  (dropout2): Dropout(p=0.4, inplace=False)
  (fc): Linear(in_features=64, out_features=3, bias=True)
)

In [None]:
def classification_prediction(dataset, ):
    iterations = len(dataset) / TEST_BATCH_SIZE

    model.eval()

    running_loss = 0
    running_correct = 0
    _predictions = []
    
    with torch.no_grad():
        for batch_idx, items in enumerate(test_iterator):

            sys.stdout.write('\r')
            sys.stdout.write("{} Iteration :{}/{}"
                                .format("test", batch_idx + 1, iterations))


            #Get the text and length of sentences
            text, text_lengths = items.test_sentAB
            text = text.to(device)
            text_lengths = text_lengths.to(device)

            #Get labels of each batch
            labels = items.test_label
            labels = labels.to(device)

            #Predictions are in size [1, ..]
            outputs = model(text, labels).squeeze()

            #Convert predictions to probabilities
            probabilites = softmax(outputs)
            #Give's the index of the node with the highest probability
            predictions = torch.argmax(probabilites, dim=1)
            correct = (predictions == labels).float()

            #Save the predictions in a list
            _predictions += list(predictions.cpu().squeeze().numpy())
            
            #Calculate running loss and accuracy
            running_loss += loss.item()
            running_correct += correct.sum().item()
        
        epoch_loss = running_loss / len(test_iterator)
        epoch_accuracy = running_correct / len(test_dataset)
        
    return _predictions 