<a href="https://colab.research.google.com/github/Ware-Hard-or-Soft/NLP-LLM/blob/main/Deep_RNN_for_sentiment_analysis_ipynb.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### Deep RNN for sentiment analysis
By David Zheng
1. Data Loading and Preprocessing: It loads a dataset of movie reviews, preprocesses them (like converting words to numerical representations), and prepares them for training and testing.
2. Network Architecture: It defines an RNN architecture, specifically using GRU (Gated Recurrent Unit) or TEXTnetOrder2 with a hidden state for processing variable-length reviews.
3. Training: It trains the network on the labeled movie reviews, adjusting the 4. network's parameters to minimize prediction errors.
Testing: It evaluates the trained network's performance on unseen test data, measuring its accuracy in classifying positive and negative reviews.

In [None]:
#############################################
#This code is based on Prof. Kak material
#1 using word_index as the integer of the word to be fed in the net,change the dimension of tensors in the network to fit
#2 replacing the tanh nonlinear with the sigmoid, adding detach() to cell function, changing learning rate to 1e-7,change the dimension of tensors in the network to fit
#3 using batach size =3 and make all the review to the longest,using drop_last to match the batck size, change the dimension of tensors in the network to fit


import sys,os,os.path
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import torchvision.transforms as tvt
import torch.optim as optim
from torchsummary import summary
import numpy as np
from PIL import ImageFilter
import numbers
import re
import math
import random
import copy
import matplotlib.pyplot as plt
import gzip
import pickle
import time

In [None]:
!pip3 install pymsgbox
import matplotlib
matplotlib.use('Agg')

Collecting pymsgbox
  Downloading https://files.pythonhosted.org/packages/ac/e0/0ac1ac67178a71b92e46f46788ddd799bb40bff40acd60c47c50be170374/PyMsgBox-1.0.7.tar.gz
Building wheels for collected packages: pymsgbox
  Building wheel for pymsgbox (setup.py) ... [?25l[?25hdone
  Created wheel for pymsgbox: filename=PyMsgBox-1.0.7-cp36-none-any.whl size=7321 sha256=8fe840b5073db78869cc84a846dd6eaabe9e9c52bb21b7c388de9175acef9fc0
  Stored in directory: /root/.cache/pip/wheels/8e/62/9f/951a04461ec012e443f9aa172598fc8f9c6e409bf753687fad
Successfully built pymsgbox
Installing collected packages: pymsgbox
Successfully installed pymsgbox-1.0.7


In [None]:
from google.colab import drive
drive.mount('/content/gdrive')
dataroot = '/content/gdrive/My Drive/ECE695/DLStudio/DLStudio-1.1.0/Examples/data/'

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly

Enter your authorization code:
··········
Mounted at /content/gdrive


In [None]:
class DLStudio(object):

    def __init__(self, *args, **kwargs ):
        if args:
            raise ValueError(
                   '''DLStudio constructor can only be called with keyword arguments for
                      the following keywords: epochs, learning_rate, batch_size, momentum,
                      convo_layers_config, image_size, dataroot, path_saved_model, classes,
                      image_size, convo_layers_config, fc_layers_config, debug_train, use_gpu, and
                      debug_test''')
        learning_rate = epochs = batch_size = convo_layers_config = momentum = None
        image_size = fc_layers_config = dataroot =  path_saved_model = classes = use_gpu = None
        debug_train  = debug_test = None
        if 'dataroot' in kwargs                      :   dataroot = kwargs.pop('dataroot')
        if 'learning_rate' in kwargs                 :   learning_rate = kwargs.pop('learning_rate')
        if 'momentum' in kwargs                      :   momentum = kwargs.pop('momentum')
        if 'epochs' in kwargs                        :   epochs = kwargs.pop('epochs')
        if 'batch_size' in kwargs                    :   batch_size = kwargs.pop('batch_size')
        if 'convo_layers_config' in kwargs           :   convo_layers_config = kwargs.pop('convo_layers_config')
        if 'image_size' in kwargs                    :   image_size = kwargs.pop('image_size')
        if 'fc_layers_config' in kwargs              :   fc_layers_config = kwargs.pop('fc_layers_config')
        if 'path_saved_model' in kwargs              :   path_saved_model = kwargs.pop('path_saved_model')
        if 'classes' in kwargs                       :   classes = kwargs.pop('classes')
        if 'use_gpu' in kwargs                       :   use_gpu = kwargs.pop('use_gpu')
        if 'debug_train' in kwargs                   :   debug_train = kwargs.pop('debug_train')
        if 'debug_test' in kwargs                    :   debug_test = kwargs.pop('debug_test')
        if len(kwargs) != 0: raise ValueError('''You have provided unrecognizable keyword args''')
        if dataroot:
            self.dataroot = dataroot
        if convo_layers_config:
            self.convo_layers_config = convo_layers_config
        if image_size:
            self.image_size = image_size
        if fc_layers_config:
            self.fc_layers_config = fc_layers_config
            if fc_layers_config[0] is not -1:
                raise Exception("""\n\n\nYour 'fc_layers_config' construction option is not correct. """
                                """The first element of the list of nodes in the fc layer must be -1 """
                                """because the input to fc will be set automatically to the size of """
                                """the final activation volume of the convolutional part of the network""")
        if  path_saved_model:
            self.path_saved_model = path_saved_model
        if classes:
            self.class_labels = classes
        if learning_rate:
            self.learning_rate = learning_rate
        else:
            self.learning_rate = 1e-6
        if momentum:
            self.momentum = momentum
        if epochs:
            self.epochs = epochs
        if batch_size:
            self.batch_size = batch_size
        if use_gpu is not None:
            self.use_gpu = use_gpu
            if use_gpu is True:
                if torch.cuda.is_available():
                    self.device = torch.device("cuda:0")
                else:
                    raise Exception("You requested GPU support, but there's no GPU on this machine")
            #else:
                #self.device = torch.device("cpu")
        if debug_train:
            self.debug_train = debug_train
        else:
            self.debug_train = 0
        if debug_test:
            self.debug_test = debug_test
        else:
            self.debug_test = 0
        self.debug_config = 0
#        self.device = torch.device("cuda:0" if torch.cuda.is_available() and self.use_gpu is False else "cpu")


In [None]:
    device = torch.device("cuda:0")
    class TextClassification(nn.Module):
        """
        The purpose of this inner class is to be able to use the DLStudio module for simple
        experiments in text classification.  Consider, for example, the problem of automatic
        classification of variable-length user feedback: you want to create a neural network
        that can label an uploaded product review of arbitrary length as positive or negative.
        One way to solve this problem is with a recurrent neural network in which you use a
        hidden state for characterizing a variable-length product review with a fixed-length
        state vector.  This inner class allows you to carry out such experiments.
        """
        def __init__(self, dl_studio, dataserver_train=None, dataserver_test=None, dataset_file_train=None, dataset_file_test=None):
            super(TextClassification, self).__init__()
            self.dl_studio = dl_studio
            self.dataserver_train = dataserver_train
            self.dataserver_test = dataserver_test

        class SentimentAnalysisDataset(torch.utils.data.Dataset):

            def __init__(self, dl_studio, train_or_test, dataset_file):
                super(TextClassification.SentimentAnalysisDataset, self).__init__()
                self.train_or_test = train_or_test
                root_dir = dl_studio.dataroot
                f = gzip.open(root_dir + dataset_file, 'rb')
                dataset = f.read()
                if train_or_test is 'train':
                    if sys.version_info[0] == 3:
                        self.positive_reviews_train, self.negative_reviews_train, self.vocab = pickle.loads(dataset, encoding='latin1')
                    else:
                        self.positive_reviews_train, self.negative_reviews_train, self.vocab = pickle.loads(dataset)
                    self.categories = sorted(list(self.positive_reviews_train.keys()))
                    self.category_sizes_train_pos = {category : len(self.positive_reviews_train[category]) for category in self.categories}
                    self.category_sizes_train_neg = {category : len(self.negative_reviews_train[category]) for category in self.categories}
                    self.indexed_dataset_train = []
                    self.train_review_len = []
                    self.test_review_len = []
                    for category in self.positive_reviews_train:
                        for review in self.positive_reviews_train[category]:
                            self.indexed_dataset_train.append([review, category, 1])
                            self.train_review_len.append(len(review))
                    for category in self.negative_reviews_train:
                        for review in self.negative_reviews_train[category]:
                            self.indexed_dataset_train.append([review, category, 0])
                            self.train_review_len.append(len(review))
                    random.shuffle(self.indexed_dataset_train)
                    self.max_review_len = max(self.train_review_len)
                elif train_or_test is 'test':
                    if sys.version_info[0] == 3:
                        self.positive_reviews_test, self.negative_reviews_test, self.vocab = pickle.loads(dataset, encoding='latin1')
                    else:
                        self.positive_reviews_test, self.negative_reviews_test, self.vocab = pickle.loads(dataset)
                    self.vocab = sorted(self.vocab)
                    self.categories = sorted(list(self.positive_reviews_test.keys()))
                    self.category_sizes_test_pos = {category : len(self.positive_reviews_test[category]) for category in self.categories}
                    self.category_sizes_test_neg = {category : len(self.negative_reviews_test[category]) for category in self.categories}
                    self.indexed_dataset_test = []
                    self.train_review_len = []
                    self.test_review_len = []
                    for category in self.positive_reviews_test:
                        for review in self.positive_reviews_test[category]:
                            self.indexed_dataset_test.append([review, category, 1])
                            self.test_review_len.append(len(review))
                    for category in self.negative_reviews_test:
                        for review in self.negative_reviews_test[category]:
                            self.indexed_dataset_test.append([review, category, 0])
                            self.test_review_len.append(len(review))
                    random.shuffle(self.indexed_dataset_test)
                    self.max_review_len = max(self.test_review_len)
                #print(max_review_len)
            def get_vocab_size(self):
                return len(self.vocab)

            #def one_hotvec_for_word(self, word):
                #word_index =  self.vocab.index(word)
                #hotvec = torch.zeros(1, len(self.vocab))
                #hotvec[0, word_index] = 1

                #hotvec=torch.as_tensor(word_index)
                #print(hotvec)
                #return word_index
##################################################################################
            def review_to_tensor(self, review):
                #wordlist=[]
                review_tensor = torch.zeros(len(review),1)
                for i,word in enumerate(review):
                    word_index =  self.vocab.index(word)
                    #m = one_hotvec_for_word(word)
                    #wordlist.append(word_index)
                    review_tensor[i,:]=torch.as_tensor(word_index)
                return review_tensor
##############################################################################
            def sentiment_to_tensor(self, sentiment):
                sentiment_tensor = torch.zeros(2)
                if sentiment is 1:
                    sentiment_tensor[1] = 1
                elif sentiment is 0:
                    sentiment_tensor[0] = 1
                sentiment_tensor = sentiment_tensor.type(torch.long)
                return sentiment_tensor

            def __len__(self):
                if self.train_or_test is 'train':
                    return len(self.indexed_dataset_train)
                elif self.train_or_test is 'test':
                    return len(self.indexed_dataset_test)

            def __getitem__(self, idx):
                sample = self.indexed_dataset_train[idx] if self.train_or_test is 'train' else self.indexed_dataset_test[idx]
                review = sample[0]
                review_category = sample[1]
                review_sentiment = sample[2]
                review_sentiment = self.sentiment_to_tensor(review_sentiment)
                review_tensor = self.review_to_tensor(review)
                category_index = self.categories.index(review_category)
                sample = {'review'       : review_tensor,
                          'category'     : category_index, # should be converted to tensor, but not yet used
                          'sentiment'    : review_sentiment }
                return sample

        def load_SentimentAnalysisDataset(self, dataserver_train, dataserver_test ):
            self.train_dataloader = torch.utils.data.DataLoader(dataserver_train,
                        batch_size=self.dl_studio.batch_size,shuffle=True, num_workers=1,drop_last=True)
            self.test_dataloader = torch.utils.data.DataLoader(dataserver_test,
                               batch_size=self.dl_studio.batch_size,shuffle=False, num_workers=1,drop_last=True)

        class TEXTnet(nn.Module):
            """
            TEXTnet stands for "Text Classification Network".
            This network is meant for semantic classification of variable length sentiment
            data.  Based on my limited testing, the performance of this network is rather
            poor because it has no protection against vanishing gradients when used in an
            RNN.
            Location: Inner class TextClassification
            """
            def __init__(self, input_size, hidden_size, output_size):
                super(TextClassification.TEXTnet, self).__init__()
                self.input_size = input_size
                self.hidden_size = hidden_size
                self.output_size = output_size
                self.combined_to_hidden = nn.Linear(input_size + hidden_size, hidden_size)
                self.combined_to_middle = nn.Linear(input_size + hidden_size, 100)
                self.middle_to_out = nn.Linear(100, output_size)
                self.logsoftmax = nn.LogSoftmax(dim=1)
                self.dropout = nn.Dropout(p=0.1)

            def forward(self, input, hidden):
                combined = torch.cat((input, hidden), 1)
                hidden = self.combined_to_hidden(combined)
                out = self.combined_to_middle(combined)
                out = torch.nn.functional.relu(out)
                out = self.dropout(out)
                out = self.middle_to_out(out)
                out = self.logsoftmax(out)
                return out,hidden

        class TEXTnetOrder2(nn.Module):

            def __init__(self, input_size, hidden_size, output_size, dls):
                super(TextClassification.TEXTnetOrder2, self).__init__()
                self.input_size = input_size
                self.hidden_size = hidden_size
                self.output_size = output_size
                self.combined_to_hidden = nn.Linear(input_size + 2*hidden_size, hidden_size)
                self.combined_to_middle = nn.Linear(input_size + 2*hidden_size, 100)
                self.middle_to_out = nn.Linear(100, output_size)
                self.logsoftmax = nn.LogSoftmax(dim=1)
                self.dropout = nn.Dropout(p=0.1)
                # for the cell
                self.cell = torch.zeros(1, hidden_size).detach().to(device)
                self.linear_for_cell = nn.Linear(hidden_size, hidden_size).to(device)

            def forward(self, input, hidden):
                combined = torch.cat((input, hidden, self.cell), 1)
                hidden = self.combined_to_hidden(combined)
                out = self.combined_to_middle(combined)
                out = torch.nn.functional.relu(out)
                out = self.dropout(out)
                out = self.middle_to_out(out)
                out = self.logsoftmax(out)
                hidden_clone = hidden.clone()
                self.cell = torch.sigmoid(self.linear_for_cell(hidden_clone)).detach()
                return out,hidden

        class GRUnet(nn.Module):

            def __init__(self, input_size, hidden_size, output_size, n_layers, drop_prob=0.2):
                super(TextClassification.GRUnet, self).__init__()
                self.hidden_size = hidden_size
                self.n_layers = n_layers
                self.gru = nn.GRU(input_size, hidden_size, n_layers, batch_first=True, dropout=drop_prob)
                self.fc = nn.Linear(hidden_size, output_size)
                self.relu = nn.ReLU()
                self.logsoftmax = nn.LogSoftmax(dim=1)

            def forward(self, x, h):
                out, h = self.gru(x, h)
                out = self.fc(self.relu(out[:,-1]))
                out = self.logsoftmax(out)
                return out, h

            def init_hidden(self, batch_size):
                weight = next(self.parameters()).data
                hidden = weight.new(self.n_layers, batch_size, self.hidden_size).zero_()
                return hidden

        def save_model(self, model):
            "Save the trained model to a disk file"
            torch.save(model.state_dict(), self.dl_studio.path_saved_model)

        def run_code_for_training_for_text_classification_no_gru(self, net, hidden_size):
            filename_for_out = "performance_numbers_" + str(self.dl_studio.epochs) + ".txt"
            FILE = open(filename_for_out, 'w')
            net = copy.deepcopy(net)
            net = net.to(self.dl_studio.device)
            ## Note that the TEXTnet and TEXTnetOrder2 both produce LogSoftmax output:
            criterion = nn.NLLLoss()
#            criterion = nn.MSELoss()
#            criterion = nn.CrossEntropyLoss()
            accum_times = []
            optimizer = optim.SGD(net.parameters(),
                         lr=self.dl_studio.learning_rate, momentum=self.dl_studio.momentum)
            start_time = time.clock()
            for epoch in range(self.dl_studio.epochs):
                print("")
                running_loss = 0.0
                for i, data in enumerate(self.train_dataloader):
                    hidden = torch.zeros(1, hidden_size).to(self.dl_studio.device)
                    hidden = hidden.to(self.dl_studio.device)
                    review_tensor,category,sentiment = data['review'], data['category'], data['sentiment']
                    review_tensor = review_tensor.to(self.dl_studio.device)
                    sentiment = sentiment.to(self.dl_studio.device)
                    optimizer.zero_grad()
                    input = torch.zeros(1,review_tensor.shape[2]).to(self.dl_studio.device)
                    input = input.to(self.dl_studio.device)
                    for k in range(review_tensor.shape[1]):
                        input[0,:] = review_tensor[0,k]#.to(self.dl_studio.device)
                        output, hidden = net(input, hidden)
                    loss = criterion(output, torch.argmax(sentiment,1))
                    running_loss += loss.item()
                    loss.backward(retain_graph=True)
                    torch.autograd.set_detect_anomaly(True)
                    optimizer.step()
                    if i % 100==99:
                        avg_loss = running_loss / float(100)
                        current_time = time.clock()
                        time_elapsed = current_time-start_time
                        print("[epoch:%d  iter:%4d  elapsed_time: %4d secs]     loss: %.3f" % (epoch+1,i+1, time_elapsed,avg_loss))
                        accum_times.append(current_time-start_time)
                        FILE.write("%.3f\n" % avg_loss)
                        FILE.flush()
                        running_loss = 0.0
            print("\nFinished Training\n")
            self.save_model(net)

        def run_code_for_training_for_text_classification_with_gru(self, net, hidden_size):
            filename_for_out = "performance_numbers_" + str(self.dl_studio.epochs) + ".txt"
            FILE = open(filename_for_out, 'w')
            net = copy.deepcopy(net)
            net = net.to(self.dl_studio.device)
            ##  Note that the GREnet now produces the LogSoftmax output:
            criterion = nn.NLLLoss()
#            criterion = nn.MSELoss()
#            criterion = nn.CrossEntropyLoss()
            accum_times = []
            optimizer = optim.SGD(net.parameters(),
                         lr=self.dl_studio.learning_rate, momentum=self.dl_studio.momentum)
            for epoch in range(self.dl_studio.epochs):
                print("")
                running_loss = 0.0
                start_time = time.clock()
                for i, data in enumerate(self.train_dataloader):
                    review_tensor,category,sentiment = data['review'], data['category'], data['sentiment']
                    review_tensor = review_tensor.to(self.dl_studio.device)
                    sentiment = sentiment.to(self.dl_studio.device)
                    ## The following type conversion needed for MSELoss:
                    ##sentiment = sentiment.float()
                    optimizer.zero_grad()
                    hidden = net.init_hidden(3).to(self.dl_studio.device)
                    for k in range(review_tensor.shape[0]):
                        output, hidden = net(torch.unsqueeze(review_tensor[:,k],1), hidden)
                    ## If using NLLLoss, CrossEntropyLoss
                    loss = criterion(output, torch.argmax(sentiment, 1))
                    ## If using MSELoss:
                    ## loss = criterion(output, sentiment)
                    running_loss += loss.item()
                    loss.backward()
                    optimizer.step()
                    if i % 100 == 99:
                        avg_loss = running_loss / float(100)
                        current_time = time.clock()
                        time_elapsed = current_time-start_time
                        print("[epoch:%d  iter:%4d  elapsed_time:%4d secs]     loss: %.3f" % (epoch+1,i+1, time_elapsed,avg_loss))
                        accum_times.append(current_time-start_time)
                        FILE.write("%.3f\n" % avg_loss)
                        FILE.flush()
                        running_loss = 0.0
            print("Total Training Time: {}".format(str(sum(accum_times))))
            print("\nFinished Training\n")
            self.save_model(net)


        def run_code_for_testing_text_classification_with_gru_0(self, net, hidden_size):
            net.load_state_dict(torch.load(self.dl_studio.path_saved_model))
            classification_accuracy = 0.0
            negative_total = 0
            positive_total = 0
            confusion_matrix = torch.zeros(2,2)
            with torch.no_grad():
                for i, data in enumerate(self.test_dataloader):
                    review_tensor,category,sentiment = data['review'], data['category'], data['sentiment']
                    hidden = net.init_hidden(1)
                    for k in range(review_tensor.shape[1]):
                        output, hidden = net(torch.unsqueeze(torch.unsqueeze(review_tensor[0,k],0),0), hidden)
                    predicted_idx = torch.argmax(output).item()
                    gt_idx = torch.argmax(sentiment).item()
                    if i % 100 == 99:
                        print("   [i=%d]    predicted_label=%d       gt_label=%d\n\n" % (i+1, predicted_idx,gt_idx))
                    if predicted_idx == gt_idx:
                        classification_accuracy += 1
                    if gt_idx is 0:
                        negative_total += 1
                    elif gt_idx is 1:
                        positive_total += 1
                    confusion_matrix[gt_idx,predicted_idx] += 1
            out_percent = np.zeros((2,2), dtype='float')
            print("\n\nNumber of positive reviews tested: %d" % positive_total)
            print("\n\nNumber of negative reviews tested: %d" % negative_total)
            print("\n\nDisplaying the confusion matrix:\n")
            out_str = "                      "
            out_str +=  "%18s    %18s" % ('predicted negative', 'predicted positive')
            print(out_str + "\n")
            for i,label in enumerate(['true negative', 'true positive']):
                out_percent[0,0] = "%.3f" % (100 * confusion_matrix[0,0] / float(negative_total))
                out_percent[0,1] = "%.3f" % (100 * confusion_matrix[0,1] / float(negative_total))
                out_percent[1,0] = "%.3f" % (100 * confusion_matrix[1,0] / float(positive_total))
                out_percent[1,1] = "%.3f" % (100 * confusion_matrix[1,1] / float(positive_total))
                out_str = "%12s:  " % label
                for j in range(2):
                    out_str +=  "%18s" % out_percent[i,j]
                print(out_str)

        def run_code_for_training_for_text_classification_with_gru_0(self, net, hidden_size):
            filename_for_out = "performance_numbers_" + str(self.dl_studio.epochs) + ".txt"
            FILE = open(filename_for_out, 'w')
            net = copy.deepcopy(net)
            net = net.to(self.dl_studio.device)
            ##  Note that the GREnet now produces the LogSoftmax output:
            criterion = nn.NLLLoss()
#            criterion = nn.MSELoss()
#            criterion = nn.CrossEntropyLoss()
            accum_times = []
            optimizer = optim.SGD(net.parameters(),
                         lr=self.dl_studio.learning_rate, momentum=self.dl_studio.momentum)
            for epoch in range(self.dl_studio.epochs):
                print("")
                running_loss = 0.0
                start_time = time.clock()
                for i, data in enumerate(self.train_dataloader):
                    review_tensor,category,sentiment = data['review'], data['category'], data['sentiment']
                    review_tensor = review_tensor.to(self.dl_studio.device)
                    sentiment = sentiment.to(self.dl_studio.device)
                    ## The following type conversion needed for MSELoss:
                    ##sentiment = sentiment.float()
                    optimizer.zero_grad()
                    hidden = net.init_hidden(1).to(self.dl_studio.device)
                    for k in range(review_tensor.shape[1]):
                        output, hidden = net(torch.unsqueeze(torch.unsqueeze(review_tensor[0,k],0),0), hidden)
                    ## If using NLLLoss, CrossEntropyLoss
                    loss = criterion(output, torch.argmax(sentiment, 1))
                    ## If using MSELoss:
                    ## loss = criterion(output, sentiment)
                    running_loss += loss.item()
                    loss.backward()
                    optimizer.step()
                    if i % 100 == 99:
                        avg_loss = running_loss / float(100)
                        current_time = time.clock()
                        time_elapsed = current_time-start_time
                        print("[epoch:%d  iter:%4d  elapsed_time:%4d secs]     loss: %.3f" % (epoch+1,i+1, time_elapsed,avg_loss))
                        accum_times.append(current_time-start_time)
                        FILE.write("%.3f\n" % avg_loss)
                        FILE.flush()
                        running_loss = 0.0
            print("Total Training Time: {}".format(str(sum(accum_times))))
            print("\nFinished Training\n")
            self.save_model(net)

        def run_code_for_testing_text_classification_with_gru(self, net, hidden_size):
                    net.load_state_dict(torch.load(self.dl_studio.path_saved_model))
                    classification_accuracy = 0.0
                    negative_total = 0
                    positive_total = 0
                    predicted_idx=[]
                    gt_idx=[]
                    #outputlist=[]
                    #senlist=[]
                    confusion_matrix = torch.zeros(2,2)
                    with torch.no_grad():
                        for i, data in enumerate(self.test_dataloader):
                            review_tensor,category,sentiment = data['review'], data['category'], data['sentiment']
                            hidden = net.init_hidden(3)

                            for k in range(review_tensor.shape[0]):
                                output, hidden = net(torch.unsqueeze(review_tensor[:,k],1), hidden)
                                #print(output.shape)
                                predicted_idx=torch.argmax(output,dim=1).tolist()
                                #print(outputlist)
                                gt_idx=torch.argmax(sentiment,dim=1).tolist()
                                for m in range(self.dl_studio.batch_size):

                                    #if i % 100 == 99:
                                        #print("   [i=%d]    predicted_label=%d       gt_label=%d\n\n" % (i+1, predicted_idx,gt_idx))
                                    if predicted_idx[m] == gt_idx[m]:
                                        classification_accuracy += 1
                                    if gt_idx[m] is 0:
                                        negative_total += 1
                                    elif gt_idx[m] is 1:
                                        positive_total += 1
                                    confusion_matrix[gt_idx[m],predicted_idx[m]] += 1
                    out_percent = np.zeros((2,2), dtype='float')
                    print("\n\nNumber of positive reviews tested: %d" % positive_total)
                    print("\n\nNumber of negative reviews tested: %d" % negative_total)
                    print("\n\nDisplaying the confusion matrix:\n")
                    out_str = "                      "
                    out_str +=  "%18s    %18s" % ('predicted negative', 'predicted positive')
                    print(out_str + "\n")
                    for i,label in enumerate(['true negative', 'true positive']):
                        out_percent[0,0] = "%.3f" % (100 * confusion_matrix[0,0] / float(negative_total))
                        out_percent[0,1] = "%.3f" % (100 * confusion_matrix[0,1] / float(negative_total))
                        out_percent[1,0] = "%.3f" % (100 * confusion_matrix[1,0] / float(positive_total))
                        out_percent[1,1] = "%.3f" % (100 * confusion_matrix[1,1] / float(positive_total))
                        out_str = "%12s:  " % label
                        for j in range(2):
                            out_str +=  "%18s" % out_percent[i,j]
                        print(out_str)

        def run_code_for_testing_text_classification_no_gru(self, net, hidden_size):
            net.load_state_dict(torch.load(self.dl_studio.path_saved_model))
            classification_accuracy = 0.0
            negative_total = 0
            positive_total = 0
            confusion_matrix = torch.zeros(2,2)
            with torch.no_grad():
                for i, data in enumerate(self.test_dataloader):
                    review_tensor,category,sentiment = data['review'], data['category'], data['sentiment']
                    input = torch.zeros(1,review_tensor.shape[2]).to(self.dl_studio.device)
                    hidden = torch.zeros(1, hidden_size).to(self.dl_studio.device)
                    for k in range(review_tensor.shape[1]):
                        input[0,:] = review_tensor[0,k]
                        output, hidden = net(input, hidden).to(self.dl_studio.device)
                    predicted_idx = torch.argmax(output).item()
                    gt_idx = torch.argmax(sentiment).item()
                    if i % 100 == 99:
                        print("   [i=%4d]    predicted_label=%d       gt_label=%d" % (i+1, predicted_idx,gt_idx))
                    if predicted_idx == gt_idx:
                        classification_accuracy += 1
                    if gt_idx is 0:
                        negative_total += 1
                    elif gt_idx is 1:
                        positive_total += 1
                    confusion_matrix[gt_idx,predicted_idx] += 1
            out_percent = np.zeros((2,2), dtype='float')
            print("\n\nNumber of positive reviews tested: %d" % positive_total)
            print("\n\nNumber of negative reviews tested: %d" % negative_total)
            print("\n\nDisplaying the confusion matrix:\n")
            out_str = "                      "
            out_str +=  "%18s    %18s" % ('predicted negative', 'predicted positive')
            print(out_str + "\n")
            for i,label in enumerate(['true negative', 'true positive']):
                out_percent[0,0] = "%.3f" % (100 * confusion_matrix[0,0] / float(negative_total))
                out_percent[0,1] = "%.3f" % (100 * confusion_matrix[0,1] / float(negative_total))
                out_percent[1,0] = "%.3f" % (100 * confusion_matrix[1,0] / float(positive_total))
                out_percent[1,1] = "%.3f" % (100 * confusion_matrix[1,1] / float(positive_total))
                out_str = "%12s:  " % label
                for j in range(2):
                    out_str +=  "%18s" % out_percent[i,j]
                print(out_str)




In [None]:
#!/usr/bin/env python

##  text_classification_with_gru.py

"""
This script is an attempt at solving the sentiment classification problem
with an RNN that uses a GRU to get around the problem of vanishing gradients
that are common to neural networks with feedback.
"""

import random
import numpy
import torch
import os, sys


seed = 0
random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
numpy.random.seed(seed)
torch.backends.cudnn.deterministic=True
torch.backends.cudnn.benchmarks=False
os.environ['PYTHONHASHSEED'] = str(seed)


##  watch -d -n 0.5 nvidia-smi


dls = DLStudio(
#                  dataroot = "/home/kak/TextDatasets/",
                  dataroot = '/content/gdrive/My Drive/ECE695/DLStudio/DLStudio-1.1.0/Examples/data/',
                  path_saved_model = "./saved_model",
                  momentum = 0.9,
#                  learning_rate =  0.004,
                  learning_rate =  1e-7,
                  epochs = 1,
                  batch_size = 1,
                  classes = ('negative','positive'),
                  debug_train = 1,
                  debug_test = 1,
                  use_gpu = True,
              )


text_cl = TextClassification( dl_studio = dls )
dataserver_train = TextClassification.SentimentAnalysisDataset(
                                 train_or_test = 'train',
                                 dl_studio = dls,
 #                               dataset_file = "sentiment_dataset_train_3.tar.gz",
                               dataset_file = "sentiment_dataset_train_200.tar.gz",
#                                dataset_file = "sentiment_dataset_train_40.tar.gz",
                                                                      )
dataserver_test = TextClassification.SentimentAnalysisDataset(
                                 train_or_test = 'test',
                                 dl_studio = dls,
 #                            dataset_file = "sentiment_dataset_test_3.tar.gz",
                               dataset_file = "sentiment_dataset_test_200.tar.gz",
#                                dataset_file = "sentiment_dataset_test_40.tar.gz",
                                                                  )
text_cl.dataserver_train = dataserver_train
text_cl.dataserver_test = dataserver_test

text_cl.load_SentimentAnalysisDataset(dataserver_train, dataserver_test)

vocab_size = 1 #dataserver_train.get_vocab_size()
hidden_size = 512
output_size = 2                            # for positive and negative sentiments
#,dlsn_layers = 2

model = text_cl.TEXTnetOrder2(vocab_size, hidden_size, output_size,dls)

number_of_learnable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)

num_layers = len(list(model.parameters()))

print("\n\nThe number of layers in the model: %d" % num_layers)
print("\nThe number of learnable parameters in the model: %d" % number_of_learnable_params)
print("\nThe size of the vocabulary (which is also the size of the one-hot vecs for words): %d\n\n" % vocab_size)

## TRAINING:
print("\nStarting training --- BE VERY PATIENT, PLEASE!  The first report will be at 100th iteration. May take around 5 minutes.\n")
text_cl.run_code_for_training_for_text_classification_no_gru(model, hidden_size)

## TESTING:
#import pymsgbox
#response = pymsgbox.confirm("Finished training.  Start testing on unseen data?")
#if response == "OK":







The number of layers in the model: 8

The number of learnable parameters in the model: 890770

The size of the vocabulary (which is also the size of the one-hot vecs for words): 1



Starting training --- BE VERY PATIENT, PLEASE!  The first report will be at 100th iteration. May take around 5 minutes.


[epoch:1  iter: 100  elapsed_time:   59 secs]     loss: 70.919
[epoch:1  iter: 200  elapsed_time:  129 secs]     loss: 46.936
[epoch:1  iter: 300  elapsed_time:  184 secs]     loss: 33.881
[epoch:1  iter: 400  elapsed_time:  251 secs]     loss: 25.247
[epoch:1  iter: 500  elapsed_time:  316 secs]     loss: 23.838
[epoch:1  iter: 600  elapsed_time:  373 secs]     loss: 17.526
[epoch:1  iter: 700  elapsed_time:  426 secs]     loss: 13.662
[epoch:1  iter: 800  elapsed_time:  494 secs]     loss: 8.105
[epoch:1  iter: 900  elapsed_time:  549 secs]     loss: 9.786
[epoch:1  iter:1000  elapsed_time:  608 secs]     loss: 10.911
[epoch:1  iter:1100  elapsed_time:  655 secs]     loss: 6.446
[ep

In [None]:
#!/usr/bin/env python

##  text_classification_with_gru.py

"""
This script is an attempt at solving the sentiment classification problem
with an RNN that uses a GRU to get around the problem of vanishing gradients
that are common to neural networks with feedback.
"""

import random
import numpy
import torch
import os, sys


seed = 0
random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
numpy.random.seed(seed)
torch.backends.cudnn.deterministic=True
torch.backends.cudnn.benchmarks=False
os.environ['PYTHONHASHSEED'] = str(seed)


##  watch -d -n 0.5 nvidia-smi


dls = DLStudio(
#                  dataroot = "/home/kak/TextDatasets/",
                  dataroot = '/content/gdrive/My Drive/ECE695/DLStudio/DLStudio-1.1.0/Examples/data/',
                  path_saved_model = "./saved_model",
                  momentum = 0.9,
#                  learning_rate =  0.004,
                  learning_rate =  1e-4,
                  epochs = 1,
                  batch_size = 3,
                  classes = ('negative','positive'),
                  debug_train = 1,
                  debug_test = 1,
                  use_gpu = True,
              )


text_cl = TextClassification( dl_studio = dls )
dataserver_train = TextClassification.SentimentAnalysisDataset(
                                 train_or_test = 'train',
                                 dl_studio = dls,
  #                              dataset_file = "sentiment_dataset_train_3.tar.gz",
                               dataset_file = "sentiment_dataset_train_200.tar.gz",
  #                            dataset_file = "sentiment_dataset_train_40.tar.gz",
                                                                      )
dataserver_test = TextClassification.SentimentAnalysisDataset(
                                 train_or_test = 'test',
                                 dl_studio = dls,
  #                          dataset_file = "sentiment_dataset_test_3.tar.gz",
                                dataset_file = "sentiment_dataset_test_200.tar.gz",
  #                           dataset_file = "sentiment_dataset_test_40.tar.gz",
                                                                  )
text_cl.dataserver_train = dataserver_train
text_cl.dataserver_test = dataserver_test

text_cl.load_SentimentAnalysisDataset(dataserver_train, dataserver_test)

vocab_size = 1 #dataserver_train.get_vocab_size()
hidden_size = 512
output_size = 2                            # for positive and negative sentiments
n_layers = 2

model = text_cl.GRUnet(vocab_size, hidden_size, output_size, n_layers)

number_of_learnable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)

num_layers = len(list(model.parameters()))

print("\n\nThe number of layers in the model: %d" % num_layers)
print("\nThe number of learnable parameters in the model: %d" % number_of_learnable_params)
print("\nThe size of the vocabulary (which is also the size of the one-hot vecs for words): %d\n\n" % vocab_size)

## TRAINING:
print("\nStarting training --- BE VERY PATIENT, PLEASE!  The first report will be at 100th iteration. May take around 5 minutes.\n")
text_cl.run_code_for_training_for_text_classification_with_gru(model, hidden_size)

## TESTING:
#import pymsgbox
#response = pymsgbox.confirm("Finished training.  Start testing on unseen data?")
#if response == "OK":







The number of layers in the model: 10

The number of learnable parameters in the model: 2368002

The size of the vocabulary (which is also the size of the one-hot vecs for words): 1



Starting training --- BE VERY PATIENT, PLEASE!  The first report will be at 100th iteration. May take around 5 minutes.


[epoch:1  iter: 100  elapsed_time:   1 secs]     loss: 0.692
[epoch:1  iter: 200  elapsed_time:   3 secs]     loss: 0.699
[epoch:1  iter: 300  elapsed_time:   5 secs]     loss: 0.696
[epoch:1  iter: 400  elapsed_time:   7 secs]     loss: 0.693
[epoch:1  iter: 500  elapsed_time:   8 secs]     loss: 0.695
Total Training Time: 27.054418999999143

Finished Training



In [None]:
text_cl.run_code_for_testing_text_classification_with_gru(model, hidden_size)



Number of positive reviews tested: 2937


Number of negative reviews tested: 2688


Displaying the confusion matrix:

                      predicted negative    predicted positive

true negative:               3.385            96.615
true positive:               3.235            96.765
