<a href="https://colab.research.google.com/github/denzilsaldanha/neural-nets-course/blob/master/hw2_part3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
# Download hw2 archive and setup environment
!wget https://www.cse.unsw.edu.au/~cs9444/19T3/hw2/hw2.zip
!unzip hw2.zip
!mv -t . hw2/data hw2/imdb_dataloader.py hw2/.vector_cache
!rm -rf hw2 hw2.zip

BI DIRECTIONAL GRU

In [0]:
!mkdir -p data/imdb/aclImdb/train/{pos,neg}

In [0]:
!cp -a data/imdb/aclImdb/train/neg data/imdb/aclImdb/all
!cp -a data/imdb/aclImdb/train/pos data/imdb/aclImdb/all
!cp -a data/imdb/aclImdb/dev/neg data/imdb/aclImdb/all
!cp -a data/imdb/aclImdb/dev/pos data/imdb/aclImdb/all

In [0]:
import numpy as np
import torch
import torch.nn as tnn
import torch.nn.functional as F
import torch.optim as topti
from torchtext import data
from torchtext.vocab import GloVe
from imdb_dataloader import IMDB
import string


# Class for creating the neural network.
class Network(tnn.Module):
  def __init__(self):

        super(Network, self).__init__()
        



        embedding_dim = 50
        hidden_dim = 220
        output_dim = 1
        n_layers = 2
        bidirectional = True
        dropout = 0.6
        self.gru = tnn.GRU(embedding_dim, 
                           hidden_dim, 
                           num_layers=n_layers, 
                           bidirectional=bidirectional, 
                           dropout=dropout,batch_first = True)
      
        
        self.fc = tnn.Linear(hidden_dim * 2, hidden_dim)
        self.fc1 = tnn.Linear(hidden_dim,1)
        self.dropout = tnn.Dropout(dropout)
        
  def forward(self, input, lengths):
      # print(input.size())
      #text = [sent len, batch size]
      # input = input.permute(0,2,1)
      embedded = input
      # embedded = self.dropout(self.embedding(text))
      
      #embedded = [sent len, batch size, emb dim]
      
      #pack sequence
      packed_embedded = tnn.utils.rnn.pack_padded_sequence(embedded, lengths,batch_first = True)
      
      # packed_output, (hidden, cell) = self.rnn(packed_embedded)
      packed_output, hidden = self.gru(packed_embedded)
      #unpack sequence
      output, output_lengths = tnn.utils.rnn.pad_packed_sequence(packed_output)

      #output = [sent len, batch size, hid dim * num directions]
      #output over padding tokens are zero tensors
      
      #hidden = [num layers * num directions, batch size, hid dim]
      #cell = [num layers * num directions, batch size, hid dim]
      
      #concat the final forward (hidden[-2,:,:]) and backward (hidden[-1,:,:]) hidden layers
      #and apply dropout
      
      hidden = self.dropout(torch.cat((hidden[-2,:,:], hidden[-1,:,:]), dim = 1))
              
      #hidden = [batch size, hid dim * num directions]
          
      output=   self.fc(hidden) 
      output = self.fc1(output)
      # output = self.fc2(output)
      # output = self.fc3(output)
      # output = self.fc4(output)
     

      output = output[:,-1]
      return output
        


class PreProcessing():
    def pre(x):
        """Called after tokenization"""
        # print(x)
        #Remove Punctuations
        table = str.maketrans('', '', string.punctuation)
        tokens = [w.translate(table) for w in x]
        #Remove numbers
        tokens = [word for word in tokens if word.isalpha()]
        #Removing common stop words
        # TO DO  - Would removing stop words make a difference ? 
        # stop_words = ['br','a','about','after','again','against','ain','all','am','an','and','any','are','aren','arent','as','at','be','because','been','before','being','both','but','by','can','couldn','couldnt','d','did','didn','didnt','do','does','doesn','doesnt','doing','don','dont','down','during','each','few','for','from','further','had','hadn','hadnt','has','hasn','hasnt','have','haven','havent','having','he','her','here','hers','herself','him','himself','his','how','i','if','in','into','is','isn','isnt','it','its','its','itself','just','ll','m','ma','me','mightn','mightnt','more','most','mustn','mustnt','my','myself','needn','neednt','no','nor','not','now','o','of','off','on','once','only','or','other','our','ours','ourselves','out','over','own','re','s','same','shan','shant','she','shes','should','shouldve','shouldn','shouldnt','so','some','such','t','than','that','thatll','the','their','theirs','them','themselves','then','there','these','they','this','those','through','to','too','under','until','up','ve','very','was','wasn','wasnt','we','were','weren','werent','what','when','where','which','while','who','whom','why','will','with','won','wont','wouldn','y','you','youd','youll','youre','youve','your','yours','yourself','yourselves','could','hed','hell','hes','heres','hows','id','ill','im','ive','lets','shed','shell','thats','theres','theyd','theyll','theyre','theyve','wed','well','were','weve','whats','whens','whos','whys','would'] 
        stop_words = ['br']
        tokens = [w for w in tokens if not w in stop_words]
        #removing alphabets
        x = [word for word in tokens if len(word) > 1]
        return x

    def post(batch, vocab):
        """Called after numericalization but prior to vectorization"""
      
        
        return batch, vocab

    text_field = data.Field(lower=True, include_lengths=True, batch_first=True, preprocessing=pre, postprocessing=None)


def lossFunc():
    """
    Define a loss function appropriate for the above networks that will
    add a sigmoid to the output and calculate the binary cross-entropy.
    """
    return tnn.BCEWithLogitsLoss()

def main():
    # Use a GPU if available, as it should be faster.
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    print("Using device: " + str(device))

    # Load the training dataset, and create a data loader to generate a batch.
    textField = PreProcessing.text_field
    labelField = data.Field(sequential=False)

    train, dev = IMDB.splits(textField, labelField, train="all", validation="dev")

    textField.build_vocab(train, dev, vectors=GloVe(name="6B", dim=50))
    labelField.build_vocab(train, dev)
    # print("Input_DIM = " , len(textField.vocab))
    # print("pad_idx = " ,  textField.vocab.stoi[textField.pad_token])

    trainLoader, testLoader = data.BucketIterator.splits((train, dev), shuffle=True, batch_size=64,
                                                         sort_key=lambda x: len(x.text), sort_within_batch=True)
    
    net = Network().to(device)
    criterion =lossFunc()
    optimiser = topti.Adam(net.parameters(), lr=0.001)  # Minimise the loss using the Adam algorithm.

    for epoch in range(15):
        running_loss = 0

        for i, batch in enumerate(trainLoader):
            # Get a batch and potentially send it to GPU memory.
            inputs, length, labels = textField.vocab.vectors[batch.text[0]].to(device), batch.text[1].to(
                device), batch.label.type(torch.FloatTensor).to(device)

            labels -= 1

            # PyTorch calculates gradients by accumulating contributions to them (useful for
            # RNNs).  Hence we must manually set them to zero before calculating them.
            optimiser.zero_grad()

            # Forward pass through the network.
            output = net(inputs, length)

            loss = criterion(output, labels)

            # Calculate gradients.
            loss.backward()

            # Minimise the loss according to the gradient.
            optimiser.step()

            running_loss += loss.item()

            if i % 32 == 31:
                print("Epoch: %2d, Batch: %4d, Loss: %.3f" % (epoch + 1, i + 1, running_loss / 32))
                running_loss = 0

    num_correct = 0

    # Save mode
    torch.save(net.state_dict(), "./model.pth")
    print("Saved model")

    # Evaluate network on the test dataset.  We aren't calculating gradients, so disable autograd to speed up
    # computations and reduce memory usage.
    with torch.no_grad():
        for batch in testLoader:
            # Get a batch and potentially send it to GPU memory.
            inputs, length, labels = textField.vocab.vectors[batch.text[0]].to(device), batch.text[1].to(
                device), batch.label.type(torch.FloatTensor).to(device)
            net.eval()
            labels -= 1

            # Get predictions
            outputs = torch.sigmoid(net(inputs, length))
            predicted = torch.round(outputs)

            num_correct += torch.sum(labels == predicted).item()

    accuracy = 100 * num_correct / len(dev)

    print(f"Classification accuracy: {accuracy}")

if __name__ == '__main__':
    main()


Using device: cuda:0


100%|█████████▉| 399668/400000 [00:09<00:00, 41875.17it/s]


Epoch:  1, Batch:   32, Loss: 0.474
Epoch:  1, Batch:   64, Loss: 0.454
Epoch:  1, Batch:   96, Loss: 0.410
Epoch:  1, Batch:  128, Loss: 0.425
Epoch:  1, Batch:  160, Loss: 0.445
Epoch:  1, Batch:  192, Loss: 0.405
Epoch:  1, Batch:  224, Loss: 0.422
Epoch:  1, Batch:  256, Loss: 0.373
Epoch:  2, Batch:   32, Loss: 0.348
Epoch:  2, Batch:   64, Loss: 0.360
Epoch:  2, Batch:   96, Loss: 0.319
Epoch:  2, Batch:  128, Loss: 0.302
Epoch:  2, Batch:  160, Loss: 0.333
Epoch:  2, Batch:  192, Loss: 0.288
Epoch:  2, Batch:  224, Loss: 0.285
Epoch:  2, Batch:  256, Loss: 0.246
Epoch:  3, Batch:   32, Loss: 0.265
Epoch:  3, Batch:   64, Loss: 0.301
Epoch:  3, Batch:   96, Loss: 0.273
Epoch:  3, Batch:  128, Loss: 0.260
Epoch:  3, Batch:  160, Loss: 0.256
Epoch:  3, Batch:  192, Loss: 0.270
Epoch:  3, Batch:  224, Loss: 0.241
Epoch:  3, Batch:  256, Loss: 0.256
Epoch:  4, Batch:   32, Loss: 0.269
Epoch:  4, Batch:   64, Loss: 0.206
Epoch:  4, Batch:   96, Loss: 0.256
Epoch:  4, Batch:  128, Loss

GRU WITHOUT STOP WORDS

In [0]:
import numpy as np
import torch
import torch.nn as tnn
import torch.nn.functional as F
import torch.optim as topti
from torchtext import data
from torchtext.vocab import GloVe
from imdb_dataloader import IMDB
import string


# Class for creating the neural network.
class Network(tnn.Module):
  def __init__(self):

        super(Network, self).__init__()
        
  #       n_filters = 100
  #       filter_sizes = [3,4,5]
  #       output_dim = 1
  #       dropout = 0.5
  #       self.embedding = tnn.Embedding(133418, 100, padding_idx = 1)
        
  #       self.convs = tnn.ModuleList([
  #                                   tnn.Conv1d(in_channels = 50, 
  #                                             out_channels = 100, 
  #                                             kernel_size = fs)
  #                                   for fs in filter_sizes
  #                                   ])
        
  #       self.fc = tnn.Linear(len(filter_sizes) * n_filters, output_dim)
        
  #       self.dropout = tnn.Dropout(dropout)
        
  # def forward(self, input,length):
      
  #     #text = [batch size, sent len]
  #     print(input)
  #     embedded = input
              
  #     #embedded = [batch size, sent len, emb dim]
      
  #     embedded = embedded.permute(0, 2, 1)
      
  #     #embedded = [batch size, emb dim, sent len]
      
  #     conved = [F.relu(conv(embedded)) for conv in self.convs]
          
  #     #conved_n = [batch size, n_filters, sent len - filter_sizes[n] + 1]
      
  #     pooled = [F.max_pool1d(conv, conv.shape[2]).squeeze(2) for conv in conved]
      
  #     #pooled_n = [batch size, n_filters]
      
  #     cat = self.dropout(torch.cat(pooled, dim = 1))
  #     output = self.fc(cat)






        embedding_dim = 50
        hidden_dim = 256
        output_dim = 1
        n_layers = 2
        bidirectional = True
        dropout = 0.5

        # self.embedding = tnn.Embedding(133418, embedding_dim, padding_idx = 1).float()
        self.gru = tnn.GRU(embedding_dim, 
                           hidden_dim, 
                           num_layers=n_layers, 
                           bidirectional=bidirectional, 
                           dropout=dropout,batch_first = True)
        # self.rnn = tnn.LSTM(embedding_dim, 
        #                    hidden_dim, 
        #                    num_layers=n_layers, 
        #                    bidirectional=bidirectional, 
        #                    dropout=dropout,batch_first = True)
        
        self.fc = tnn.Linear(hidden_dim * 2, output_dim)
        
        self.dropout = tnn.Dropout(dropout)
        
  def forward(self, input, lengths):
      # print(input.size())
      #text = [sent len, batch size]
      # input = input.permute(0,2,1)
      embedded = input
      # embedded = self.dropout(self.embedding(text))
      
      #embedded = [sent len, batch size, emb dim]
      
      #pack sequence
      packed_embedded = tnn.utils.rnn.pack_padded_sequence(embedded, lengths,batch_first = True)
      
      # packed_output, (hidden, cell) = self.rnn(packed_embedded)
      packed_output, hidden = self.gru(packed_embedded)
      #unpack sequence
      output, output_lengths = tnn.utils.rnn.pad_packed_sequence(packed_output)

      #output = [sent len, batch size, hid dim * num directions]
      #output over padding tokens are zero tensors
      
      #hidden = [num layers * num directions, batch size, hid dim]
      #cell = [num layers * num directions, batch size, hid dim]
      
      #concat the final forward (hidden[-2,:,:]) and backward (hidden[-1,:,:]) hidden layers
      #and apply dropout
      
      hidden = self.dropout(torch.cat((hidden[-2,:,:], hidden[-1,:,:]), dim = 1))
              
      #hidden = [batch size, hid dim * num directions]
          
      output=   self.fc(hidden) 








  #       self.relu = tnn.ReLU()
  #       self.max_pool =tnn.MaxPool1d(kernel_size = 4)
  #       self.conv_1 = tnn.Conv1d(in_channels = 50,
  #                             out_channels = 50,
  #                             kernel_size = 8,
  #                             padding = 5)
  #       self.conv_2 = tnn.Conv1d(in_channels = 50,
  #                             out_channels = 50,
  #                             kernel_size = 8,
  #                             padding = 5)
  #       self.conv_3 = tnn.Conv1d(in_channels = 50,
  #                             out_channels = 50,
  #                             kernel_size = 8,
  #                             padding = 5)
  #       self.mp = tnn.AdaptiveMaxPool1d(output_size = 1)
  #       self.fc = tnn.Linear(50, 1)
  # def forward(self, input, length):
  #     """
  #     DO NOT MODIFY FUNCTION SIGNATURE
  #     TODO:
  #     Create the forward pass through the network.
  #     """
  # #        print(length)
  # #        print(input.shape)
  #     #batch_size, seq_length, ip_dim = input.shape
  #     #x = input.view([batch_size,ip_dim, seq_length])
  #     x = input.permute(0,2,1)
  #     x= self.conv_1(x)
  #     x = self.relu(x)
  #     x= self.max_pool(x)
  #     x= self.conv_2(x)
  #     x = self.relu(x)
  #     x= self.max_pool(x) 
  #     x= self.conv_3(x)
  #     x = self.relu(x)
  #     x = self.mp(x)
  #     #x= x.view(batch_size, -1)
  #     x = x.squeeze(-1)
  #     output = self.fc(x)
      output = output[:,-1]
      return output
        


class PreProcessing():
    def pre(x):
        """Called after tokenization"""
        # print(x)
        #Remove Punctuations
        table = str.maketrans('', '', string.punctuation)
        tokens = [w.translate(table) for w in x]
        #Remove numbers
        tokens = [word for word in tokens if word.isalpha()]
        #Removing common stop words
        # TO DO  - Would removing stop words make a difference ? 
        stop_words = ["br","i", "me", "my", "myself", "we", "our", "ours", "ourselves", "you", "your", "yours", "yourself", "yourselves", "he", "him", "his", "himself", "she", "her", "hers", "herself", "it", "its", "itself", "they", "them", "their", "theirs", "themselves", "what", "which", "who", "whom", "this", "that", "these", "those", "am", "is", "are", "was", "were", "be", "been", "being", "have", "has", "had", "having", "do", "does", "did", "doing", "a", "an", "the", "and", "but", "if", "or", "because", "as", "until", "while", "of", "at", "by", "for", "with", "about", "against", "between", "into", "through", "during", "before", "after", "above", "below", "to", "from", "up", "down", "in", "out", "on", "off", "over", "under", "again", "further", "then", "once", "here", "there", "when", "where", "why", "how", "all", "any", "both", "each", "few", "more", "most", "other", "some", "such", "no", "nor", "not", "only", "own", "same", "so", "than", "too", "very", "s", "t", "can", "will", "just", "don", "should", "now"]
        # stop_words = ['br']
        tokens = [w for w in tokens if not w in stop_words]
        #removing alphabets
        x = [word for word in tokens if len(word) > 1]
        return x

    def post(batch, vocab):
        """Called after numericalization but prior to vectorization"""
      
        
        return batch, vocab

    text_field = data.Field(lower=True, include_lengths=True, batch_first=True, preprocessing=pre, postprocessing=None)


def lossFunc():
    """
    Define a loss function appropriate for the above networks that will
    add a sigmoid to the output and calculate the binary cross-entropy.
    """
    return tnn.BCEWithLogitsLoss()

def main():
    # Use a GPU if available, as it should be faster.
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    print("Using device: " + str(device))

    # Load the training dataset, and create a data loader to generate a batch.
    textField = PreProcessing.text_field
    labelField = data.Field(sequential=False)

    train, dev = IMDB.splits(textField, labelField, train="train", validation="dev")

    textField.build_vocab(train, dev, vectors=GloVe(name="6B", dim=50))
    labelField.build_vocab(train, dev)
    print("Input_DIM = " , len(textField.vocab))
    print("pad_idx = " ,  textField.vocab.stoi[textField.pad_token])

    trainLoader, testLoader = data.BucketIterator.splits((train, dev), shuffle=True, batch_size=64,
                                                         sort_key=lambda x: len(x.text), sort_within_batch=True)
    
    net = Network().to(device)
    criterion =lossFunc()
    optimiser = topti.Adam(net.parameters(), lr=0.001)  # Minimise the loss using the Adam algorithm.

    for epoch in range(10):
        running_loss = 0

        for i, batch in enumerate(trainLoader):
            # Get a batch and potentially send it to GPU memory.
            inputs, length, labels = textField.vocab.vectors[batch.text[0]].to(device), batch.text[1].to(
                device), batch.label.type(torch.FloatTensor).to(device)

            labels -= 1

            # PyTorch calculates gradients by accumulating contributions to them (useful for
            # RNNs).  Hence we must manually set them to zero before calculating them.
            optimiser.zero_grad()

            # Forward pass through the network.
            output = net(inputs, length)

            loss = criterion(output, labels)

            # Calculate gradients.
            loss.backward()

            # Minimise the loss according to the gradient.
            optimiser.step()

            running_loss += loss.item()

            if i % 32 == 31:
                print("Epoch: %2d, Batch: %4d, Loss: %.3f" % (epoch + 1, i + 1, running_loss / 32))
                running_loss = 0

    num_correct = 0

    # Save mode
    torch.save(net.state_dict(), "./model.pth")
    print("Saved model")

    # Evaluate network on the test dataset.  We aren't calculating gradients, so disable autograd to speed up
    # computations and reduce memory usage.
    with torch.no_grad():
        for batch in testLoader:
            # Get a batch and potentially send it to GPU memory.
            inputs, length, labels = textField.vocab.vectors[batch.text[0]].to(device), batch.text[1].to(
                device), batch.label.type(torch.FloatTensor).to(device)

            labels -= 1

            # Get predictions
            outputs = torch.sigmoid(net(inputs, length))
            predicted = torch.round(outputs)

            num_correct += torch.sum(labels == predicted).item()

    accuracy = 100 * num_correct / len(dev)

    print(f"Classification accuracy: {accuracy}")

if __name__ == '__main__':
    main()


LSTM WITH STOP WORDS REMOVED

In [0]:
import numpy as np
import torch
import torch.nn as tnn
import torch.nn.functional as F
import torch.optim as topti
from torchtext import data
from torchtext.vocab import GloVe
from imdb_dataloader import IMDB
import string


# Class for creating the neural network.
class Network(tnn.Module):
  def __init__(self):

        super(Network, self).__init__()
        
  #       n_filters = 100
  #       filter_sizes = [3,4,5]
  #       output_dim = 1
  #       dropout = 0.5
  #       self.embedding = tnn.Embedding(133418, 100, padding_idx = 1)
        
  #       self.convs = tnn.ModuleList([
  #                                   tnn.Conv1d(in_channels = 50, 
  #                                             out_channels = 100, 
  #                                             kernel_size = fs)
  #                                   for fs in filter_sizes
  #                                   ])
        
  #       self.fc = tnn.Linear(len(filter_sizes) * n_filters, output_dim)
        
  #       self.dropout = tnn.Dropout(dropout)
        
  # def forward(self, input,length):
      
  #     #text = [batch size, sent len]
  #     print(input)
  #     embedded = input
              
  #     #embedded = [batch size, sent len, emb dim]
      
  #     embedded = embedded.permute(0, 2, 1)
      
  #     #embedded = [batch size, emb dim, sent len]
      
  #     conved = [F.relu(conv(embedded)) for conv in self.convs]
          
  #     #conved_n = [batch size, n_filters, sent len - filter_sizes[n] + 1]
      
  #     pooled = [F.max_pool1d(conv, conv.shape[2]).squeeze(2) for conv in conved]
      
  #     #pooled_n = [batch size, n_filters]
      
  #     cat = self.dropout(torch.cat(pooled, dim = 1))
  #     output = self.fc(cat)






        embedding_dim = 50
        hidden_dim = 256
        output_dim = 1
        n_layers = 2
        bidirectional = True
        dropout = 0.5

        # self.embedding = tnn.Embedding(133418, embedding_dim, padding_idx = 1).float()
        
        self.rnn = tnn.LSTM(embedding_dim, 
                           hidden_dim, 
                           num_layers=n_layers, 
                           bidirectional=bidirectional, 
                           dropout=dropout,batch_first = True)
        
        self.fc = tnn.Linear(hidden_dim * 2, output_dim)
        
        self.dropout = tnn.Dropout(dropout)
        
  def forward(self, input, lengths):
      # print(input.size())
      #text = [sent len, batch size]
      # input = input.permute(0,2,1)
      embedded = input
      # embedded = self.dropout(self.embedding(text))
      
      #embedded = [sent len, batch size, emb dim]
      
      #pack sequence
      packed_embedded = tnn.utils.rnn.pack_padded_sequence(embedded, lengths,batch_first = True)
      
      packed_output, (hidden, cell) = self.rnn(packed_embedded)
      # packed_output, hidden = self.rnn(packed_embedded)
      #unpack sequence
      output, output_lengths = tnn.utils.rnn.pad_packed_sequence(packed_output)

      #output = [sent len, batch size, hid dim * num directions]
      #output over padding tokens are zero tensors
      
      #hidden = [num layers * num directions, batch size, hid dim]
      #cell = [num layers * num directions, batch size, hid dim]
      
      #concat the final forward (hidden[-2,:,:]) and backward (hidden[-1,:,:]) hidden layers
      #and apply dropout
      
      hidden = self.dropout(torch.cat((hidden[-2,:,:], hidden[-1,:,:]), dim = 1))
              
      #hidden = [batch size, hid dim * num directions]
          
      output=   self.fc(hidden) 








  #       self.relu = tnn.ReLU()
  #       self.max_pool =tnn.MaxPool1d(kernel_size = 4)
  #       self.conv_1 = tnn.Conv1d(in_channels = 50,
  #                             out_channels = 50,
  #                             kernel_size = 8,
  #                             padding = 5)
  #       self.conv_2 = tnn.Conv1d(in_channels = 50,
  #                             out_channels = 50,
  #                             kernel_size = 8,
  #                             padding = 5)
  #       self.conv_3 = tnn.Conv1d(in_channels = 50,
  #                             out_channels = 50,
  #                             kernel_size = 8,
  #                             padding = 5)
  #       self.mp = tnn.AdaptiveMaxPool1d(output_size = 1)
  #       self.fc = tnn.Linear(50, 1)
  # def forward(self, input, length):
  #     """
  #     DO NOT MODIFY FUNCTION SIGNATURE
  #     TODO:
  #     Create the forward pass through the network.
  #     """
  # #        print(length)
  # #        print(input.shape)
  #     #batch_size, seq_length, ip_dim = input.shape
  #     #x = input.view([batch_size,ip_dim, seq_length])
  #     x = input.permute(0,2,1)
  #     x= self.conv_1(x)
  #     x = self.relu(x)
  #     x= self.max_pool(x)
  #     x= self.conv_2(x)
  #     x = self.relu(x)
  #     x= self.max_pool(x) 
  #     x= self.conv_3(x)
  #     x = self.relu(x)
  #     x = self.mp(x)
  #     #x= x.view(batch_size, -1)
  #     x = x.squeeze(-1)
  #     output = self.fc(x)
      output = output[:,-1]
      return output
        


class PreProcessing():
    def pre(x):
        """Called after tokenization"""
        # print(x)
        #Remove Punctuations
        table = str.maketrans('', '', string.punctuation)
        tokens = [w.translate(table) for w in x]
        #Remove numbers
        tokens = [word for word in tokens if word.isalpha()]
        #Removing common stop words
        # TO DO  - Would removing stop words make a difference ? 
        stop_words = ["br","i", "me", "my", "myself", "we", "our", "ours", "ourselves", "you", "your", "yours", "yourself", "yourselves", "he", "him", "his", "himself", "she", "her", "hers", "herself", "it", "its", "itself", "they", "them", "their", "theirs", "themselves", "what", "which", "who", "whom", "this", "that", "these", "those", "am", "is", "are", "was", "were", "be", "been", "being", "have", "has", "had", "having", "do", "does", "did", "doing", "a", "an", "the", "and", "but", "if", "or", "because", "as", "until", "while", "of", "at", "by", "for", "with", "about", "against", "between", "into", "through", "during", "before", "after", "above", "below", "to", "from", "up", "down", "in", "out", "on", "off", "over", "under", "again", "further", "then", "once", "here", "there", "when", "where", "why", "how", "all", "any", "both", "each", "few", "more", "most", "other", "some", "such", "no", "nor", "not", "only", "own", "same", "so", "than", "too", "very", "s", "t", "can", "will", "just", "don", "should", "now"]
        # stop_words = ['br']
        tokens = [w for w in tokens if not w in stop_words]
        #removing alphabets
        x = [word for word in tokens if len(word) > 1]
        return x

    def post(batch, vocab):
        """Called after numericalization but prior to vectorization"""
      
        
        return batch, vocab

    text_field = data.Field(lower=True, include_lengths=True, batch_first=True, preprocessing=pre, postprocessing=None)


def lossFunc():
    """
    Define a loss function appropriate for the above networks that will
    add a sigmoid to the output and calculate the binary cross-entropy.
    """
    return tnn.BCEWithLogitsLoss()

def main():
    # Use a GPU if available, as it should be faster.
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    print("Using device: " + str(device))

    # Load the training dataset, and create a data loader to generate a batch.
    textField = PreProcessing.text_field
    labelField = data.Field(sequential=False)

    train, dev = IMDB.splits(textField, labelField, train="train", validation="dev")

    textField.build_vocab(train, dev, vectors=GloVe(name="6B", dim=50))
    labelField.build_vocab(train, dev)
    print("Input_DIM = " , len(textField.vocab))
    print("pad_idx = " ,  textField.vocab.stoi[textField.pad_token])

    trainLoader, testLoader = data.BucketIterator.splits((train, dev), shuffle=True, batch_size=64,
                                                         sort_key=lambda x: len(x.text), sort_within_batch=True)
    
    net = Network().to(device)
    criterion =lossFunc()
    optimiser = topti.Adam(net.parameters(), lr=0.001)  # Minimise the loss using the Adam algorithm.

    for epoch in range(10):
        running_loss = 0

        for i, batch in enumerate(trainLoader):
            # Get a batch and potentially send it to GPU memory.
            inputs, length, labels = textField.vocab.vectors[batch.text[0]].to(device), batch.text[1].to(
                device), batch.label.type(torch.FloatTensor).to(device)

            labels -= 1

            # PyTorch calculates gradients by accumulating contributions to them (useful for
            # RNNs).  Hence we must manually set them to zero before calculating them.
            optimiser.zero_grad()

            # Forward pass through the network.
            output = net(inputs, length)

            loss = criterion(output, labels)

            # Calculate gradients.
            loss.backward()

            # Minimise the loss according to the gradient.
            optimiser.step()

            running_loss += loss.item()

            if i % 32 == 31:
                print("Epoch: %2d, Batch: %4d, Loss: %.3f" % (epoch + 1, i + 1, running_loss / 32))
                running_loss = 0

    num_correct = 0

    # Save mode
    torch.save(net.state_dict(), "./model.pth")
    print("Saved model")

    # Evaluate network on the test dataset.  We aren't calculating gradients, so disable autograd to speed up
    # computations and reduce memory usage.
    with torch.no_grad():
        for batch in testLoader:
            # Get a batch and potentially send it to GPU memory.
            inputs, length, labels = textField.vocab.vectors[batch.text[0]].to(device), batch.text[1].to(
                device), batch.label.type(torch.FloatTensor).to(device)

            labels -= 1

            # Get predictions
            outputs = torch.sigmoid(net(inputs, length))
            predicted = torch.round(outputs)

            num_correct += torch.sum(labels == predicted).item()

    accuracy = 100 * num_correct / len(dev)

    print(f"Classification accuracy: {accuracy}")

if __name__ == '__main__':
    main()


Using device: cuda:0
Input_DIM =  133295
pad_idx =  1
Epoch:  1, Batch:   32, Loss: 0.641
Epoch:  1, Batch:   64, Loss: 0.649
Epoch:  1, Batch:   96, Loss: 0.636
Epoch:  1, Batch:  128, Loss: 0.585
Epoch:  1, Batch:  160, Loss: 0.593
Epoch:  1, Batch:  192, Loss: 0.580
Epoch:  1, Batch:  224, Loss: 0.612
Epoch:  1, Batch:  256, Loss: 0.595
Epoch:  1, Batch:  288, Loss: 0.631
Epoch:  1, Batch:  320, Loss: 0.627
Epoch:  1, Batch:  352, Loss: 0.608
Epoch:  1, Batch:  384, Loss: 0.685
Epoch:  2, Batch:   32, Loss: 0.635
Epoch:  2, Batch:   64, Loss: 0.607
Epoch:  2, Batch:   96, Loss: 0.609
Epoch:  2, Batch:  128, Loss: 0.592
Epoch:  2, Batch:  160, Loss: 0.555
Epoch:  2, Batch:  192, Loss: 0.611
Epoch:  2, Batch:  224, Loss: 0.592
Epoch:  2, Batch:  256, Loss: 0.584
Epoch:  2, Batch:  288, Loss: 0.596
Epoch:  2, Batch:  320, Loss: 0.592
Epoch:  2, Batch:  352, Loss: 0.538
Epoch:  2, Batch:  384, Loss: 0.516
Epoch:  3, Batch:   32, Loss: 0.501
Epoch:  3, Batch:   64, Loss: 0.523
Epoch:  3,

LSTM WITH ONLY BR REMOVED


In [0]:
import numpy as np
import torch
import torch.nn as tnn
import torch.nn.functional as F
import torch.optim as topti
from torchtext import data
from torchtext.vocab import GloVe
from imdb_dataloader import IMDB
import string


# Class for creating the neural network.
class Network(tnn.Module):
  def __init__(self):

        super(Network, self).__init__()
        
  #       n_filters = 100
  #       filter_sizes = [3,4,5]
  #       output_dim = 1
  #       dropout = 0.5
  #       self.embedding = tnn.Embedding(133418, 100, padding_idx = 1)
        
  #       self.convs = tnn.ModuleList([
  #                                   tnn.Conv1d(in_channels = 50, 
  #                                             out_channels = 100, 
  #                                             kernel_size = fs)
  #                                   for fs in filter_sizes
  #                                   ])
        
  #       self.fc = tnn.Linear(len(filter_sizes) * n_filters, output_dim)
        
  #       self.dropout = tnn.Dropout(dropout)
        
  # def forward(self, input,length):
      
  #     #text = [batch size, sent len]
  #     print(input)
  #     embedded = input
              
  #     #embedded = [batch size, sent len, emb dim]
      
  #     embedded = embedded.permute(0, 2, 1)
      
  #     #embedded = [batch size, emb dim, sent len]
      
  #     conved = [F.relu(conv(embedded)) for conv in self.convs]
          
  #     #conved_n = [batch size, n_filters, sent len - filter_sizes[n] + 1]
      
  #     pooled = [F.max_pool1d(conv, conv.shape[2]).squeeze(2) for conv in conved]
      
  #     #pooled_n = [batch size, n_filters]
      
  #     cat = self.dropout(torch.cat(pooled, dim = 1))
  #     output = self.fc(cat)






        embedding_dim = 50
        hidden_dim = 256
        output_dim = 1
        n_layers = 2
        bidirectional = True
        dropout = 0.5

        # self.embedding = tnn.Embedding(133418, embedding_dim, padding_idx = 1).float()
        
        self.rnn = tnn.LSTM(embedding_dim, 
                           hidden_dim, 
                           num_layers=n_layers, 
                           bidirectional=bidirectional, 
                           dropout=dropout,batch_first = True)
        
        self.fc = tnn.Linear(hidden_dim * 2, output_dim)
        
        self.dropout = tnn.Dropout(dropout)
        
  def forward(self, input, lengths):
      # print(input.size())
      #text = [sent len, batch size]
      # input = input.permute(0,2,1)
      embedded = input
      # embedded = self.dropout(self.embedding(text))
      
      #embedded = [sent len, batch size, emb dim]
      
      #pack sequence
      packed_embedded = tnn.utils.rnn.pack_padded_sequence(embedded, lengths,batch_first = True)
      
      packed_output, (hidden, cell) = self.rnn(packed_embedded)
      # packed_output, hidden = self.rnn(packed_embedded)
      #unpack sequence
      output, output_lengths = tnn.utils.rnn.pad_packed_sequence(packed_output)

      #output = [sent len, batch size, hid dim * num directions]
      #output over padding tokens are zero tensors
      
      #hidden = [num layers * num directions, batch size, hid dim]
      #cell = [num layers * num directions, batch size, hid dim]
      
      #concat the final forward (hidden[-2,:,:]) and backward (hidden[-1,:,:]) hidden layers
      #and apply dropout
      
      hidden = self.dropout(torch.cat((hidden[-2,:,:], hidden[-1,:,:]), dim = 1))
              
      #hidden = [batch size, hid dim * num directions]
          
      output=   self.fc(hidden) 








  #       self.relu = tnn.ReLU()
  #       self.max_pool =tnn.MaxPool1d(kernel_size = 4)
  #       self.conv_1 = tnn.Conv1d(in_channels = 50,
  #                             out_channels = 50,
  #                             kernel_size = 8,
  #                             padding = 5)
  #       self.conv_2 = tnn.Conv1d(in_channels = 50,
  #                             out_channels = 50,
  #                             kernel_size = 8,
  #                             padding = 5)
  #       self.conv_3 = tnn.Conv1d(in_channels = 50,
  #                             out_channels = 50,
  #                             kernel_size = 8,
  #                             padding = 5)
  #       self.mp = tnn.AdaptiveMaxPool1d(output_size = 1)
  #       self.fc = tnn.Linear(50, 1)
  # def forward(self, input, length):
  #     """
  #     DO NOT MODIFY FUNCTION SIGNATURE
  #     TODO:
  #     Create the forward pass through the network.
  #     """
  # #        print(length)
  # #        print(input.shape)
  #     #batch_size, seq_length, ip_dim = input.shape
  #     #x = input.view([batch_size,ip_dim, seq_length])
  #     x = input.permute(0,2,1)
  #     x= self.conv_1(x)
  #     x = self.relu(x)
  #     x= self.max_pool(x)
  #     x= self.conv_2(x)
  #     x = self.relu(x)
  #     x= self.max_pool(x) 
  #     x= self.conv_3(x)
  #     x = self.relu(x)
  #     x = self.mp(x)
  #     #x= x.view(batch_size, -1)
  #     x = x.squeeze(-1)
  #     output = self.fc(x)
      output = output[:,-1]
      return output
        


class PreProcessing():
    def pre(x):
        """Called after tokenization"""
        # print(x)
        #Remove Punctuations
        table = str.maketrans('', '', string.punctuation)
        tokens = [w.translate(table) for w in x]
        #Remove numbers
        tokens = [word for word in tokens if word.isalpha()]
        #Removing common stop words
        # TO DO  - Would removing stop words make a difference ? 
        # stop_words = ['br','a','about','after','again','against','ain','all','am','an','and','any','are','aren','arent','as','at','be','because','been','before','being','both','but','by','can','couldn','couldnt','d','did','didn','didnt','do','does','doesn','doesnt','doing','don','dont','down','during','each','few','for','from','further','had','hadn','hadnt','has','hasn','hasnt','have','haven','havent','having','he','her','here','hers','herself','him','himself','his','how','i','if','in','into','is','isn','isnt','it','its','its','itself','just','ll','m','ma','me','mightn','mightnt','more','most','mustn','mustnt','my','myself','needn','neednt','no','nor','not','now','o','of','off','on','once','only','or','other','our','ours','ourselves','out','over','own','re','s','same','shan','shant','she','shes','should','shouldve','shouldn','shouldnt','so','some','such','t','than','that','thatll','the','their','theirs','them','themselves','then','there','these','they','this','those','through','to','too','under','until','up','ve','very','was','wasn','wasnt','we','were','weren','werent','what','when','where','which','while','who','whom','why','will','with','won','wont','wouldn','y','you','youd','youll','youre','youve','your','yours','yourself','yourselves','could','hed','hell','hes','heres','hows','id','ill','im','ive','lets','shed','shell','thats','theres','theyd','theyll','theyre','theyve','wed','well','were','weve','whats','whens','whos','whys','would'] 
        stop_words = ['br']
        tokens = [w for w in tokens if not w in stop_words]
        #removing alphabets
        x = [word for word in tokens if len(word) > 1]
        return x

    def post(batch, vocab):
        """Called after numericalization but prior to vectorization"""
      
        
        return batch, vocab

    text_field = data.Field(lower=True, include_lengths=True, batch_first=True, preprocessing=pre, postprocessing=None)


def lossFunc():
    """
    Define a loss function appropriate for the above networks that will
    add a sigmoid to the output and calculate the binary cross-entropy.
    """
    return tnn.BCEWithLogitsLoss()

def main():
    # Use a GPU if available, as it should be faster.
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    print("Using device: " + str(device))

    # Load the training dataset, and create a data loader to generate a batch.
    textField = PreProcessing.text_field
    labelField = data.Field(sequential=False)

    train, dev = IMDB.splits(textField, labelField, train="train", validation="dev")

    textField.build_vocab(train, dev, vectors=GloVe(name="6B", dim=50))
    labelField.build_vocab(train, dev)
    print("Input_DIM = " , len(textField.vocab))
    print("pad_idx = " ,  textField.vocab.stoi[textField.pad_token])

    trainLoader, testLoader = data.BucketIterator.splits((train, dev), shuffle=True, batch_size=64,
                                                         sort_key=lambda x: len(x.text), sort_within_batch=True)
    
    net = Network().to(device)
    criterion =lossFunc()
    optimiser = topti.Adam(net.parameters(), lr=0.001)  # Minimise the loss using the Adam algorithm.

    for epoch in range(10):
        running_loss = 0

        for i, batch in enumerate(trainLoader):
            # Get a batch and potentially send it to GPU memory.
            inputs, length, labels = textField.vocab.vectors[batch.text[0]].to(device), batch.text[1].to(
                device), batch.label.type(torch.FloatTensor).to(device)

            labels -= 1

            # PyTorch calculates gradients by accumulating contributions to them (useful for
            # RNNs).  Hence we must manually set them to zero before calculating them.
            optimiser.zero_grad()

            # Forward pass through the network.
            output = net(inputs, length)

            loss = criterion(output, labels)

            # Calculate gradients.
            loss.backward()

            # Minimise the loss according to the gradient.
            optimiser.step()

            running_loss += loss.item()

            if i % 32 == 31:
                print("Epoch: %2d, Batch: %4d, Loss: %.3f" % (epoch + 1, i + 1, running_loss / 32))
                running_loss = 0

    num_correct = 0

    # Save mode
    torch.save(net.state_dict(), "./model.pth")
    print("Saved model")

    # Evaluate network on the test dataset.  We aren't calculating gradients, so disable autograd to speed up
    # computations and reduce memory usage.
    with torch.no_grad():
        for batch in testLoader:
            # Get a batch and potentially send it to GPU memory.
            inputs, length, labels = textField.vocab.vectors[batch.text[0]].to(device), batch.text[1].to(
                device), batch.label.type(torch.FloatTensor).to(device)

            labels -= 1

            # Get predictions
            outputs = torch.sigmoid(net(inputs, length))
            predicted = torch.round(outputs)

            num_correct += torch.sum(labels == predicted).item()

    accuracy = 100 * num_correct / len(dev)

    print(f"Classification accuracy: {accuracy}")

if __name__ == '__main__':
    main()


Using device: cuda:0
Input_DIM =  133418
pad_idx =  1
Epoch:  1, Batch:   32, Loss: 0.688
Epoch:  1, Batch:   64, Loss: 0.668
Epoch:  1, Batch:   96, Loss: 0.686
Epoch:  1, Batch:  128, Loss: 0.688
Epoch:  1, Batch:  160, Loss: 0.676
Epoch:  1, Batch:  192, Loss: 0.668
Epoch:  1, Batch:  224, Loss: 0.633
Epoch:  1, Batch:  256, Loss: 0.635
Epoch:  1, Batch:  288, Loss: 0.646
Epoch:  1, Batch:  320, Loss: 0.648
Epoch:  1, Batch:  352, Loss: 0.635
Epoch:  1, Batch:  384, Loss: 0.642
Epoch:  2, Batch:   32, Loss: 0.611
Epoch:  2, Batch:   64, Loss: 0.636
Epoch:  2, Batch:   96, Loss: 0.636
Epoch:  2, Batch:  128, Loss: 0.686
Epoch:  2, Batch:  160, Loss: 0.672
Epoch:  2, Batch:  192, Loss: 0.675
Epoch:  2, Batch:  224, Loss: 0.672
Epoch:  2, Batch:  256, Loss: 0.666
Epoch:  2, Batch:  288, Loss: 0.669
Epoch:  2, Batch:  320, Loss: 0.659
Epoch:  2, Batch:  352, Loss: 0.650
Epoch:  2, Batch:  384, Loss: 0.656
Epoch:  3, Batch:   32, Loss: 0.658
Epoch:  3, Batch:   64, Loss: 0.659
Epoch:  3,