## CMSC 498L: Introduction to Deep Learning- HW3

#### Name: Shantam Bajpai
#### UID: 116831956


## Import the required libraries

In [54]:
import torch
import torch.nn as nn
import torch.nn.functional as f
from torchvision import datasets,transforms
import numpy as np
import matplotlib.pyplot as plt
import h5py
import scipy
from scipy import ndimage
from PIL import Image
%matplotlib inline

## Load the dataset and convert to PyTorch Tensors

Below we load our training and testing data and convert the input and the output labels to pytorch tensors

In [55]:
def load_data(train_file, test_file):
    # Load the training data
    train_dataset = h5py.File(train_file,'r')
    
    # Separate features(x) and labels(y) for training set
    train_set_x_orig = np.array(train_dataset['train_set_x'])
    train_set_y_orig = np.array(train_dataset['train_set_y'])

    # Load the test data
    test_dataset = h5py.File(test_file,'r')
    
    # Separate features(x) and labels(y) for training set
    test_set_x_orig = np.array(test_dataset['test_set_x'])
    test_set_y_orig = np.array(test_dataset['test_set_y'])

    classes = np.array(test_dataset["list_classes"][:]) # the list of classes
    
    train_set_y_orig = train_set_y_orig.reshape((1, train_set_y_orig.shape[0]))
    test_set_y_orig = test_set_y_orig.reshape((1, test_set_y_orig.shape[0]))
    
    return train_set_x_orig, train_set_y_orig, test_set_x_orig, test_set_y_orig, classes

train_file="data/train_catvnoncat.h5"
test_file="data/test_catvnoncat.h5"
train_x_orig, train_y, test_x_orig, test_y, classes = load_data(train_file, test_file)

# Reshape the training and testing examples
train_x_flatten = train_x_orig.reshape(train_x_orig.shape[0], -1)   # The "-1" makes reshape flatten the remaining dimensions
test_x_flatten = test_x_orig.reshape(test_x_orig.shape[0], -1)

# Standardize data to have feature values between 0 and 1.
train_x = train_x_flatten/255.
test_x = test_x_flatten/255.

# Convert the input and output to PyTorch Tensors

train_x = torch.from_numpy(train_x).float()
test_x = torch.from_numpy(test_x).float()
train_y = torch.from_numpy(train_y).float()
test_y = torch.from_numpy(test_y).float()

## Class for the Network

Below is the implementation for the 2 layer neural network and the subsequent initialization for the number of input neurons, the number of hidden neurons and the neurons in the output layer.

The first layer is passed through a ReLu activation unit followed by sigmoid activation function.

In [56]:
class TwoLayerNetwork(nn.Module):
    
    """
    n_x: The number of neurons in the input layer
    n_h: The number of neurons in the hidden layer
    n_y: The number of neurons in the output layer
    
    """
    def __init__(self,n_x = 12288, n_h = 7, n_y = 1):
        
        super().__init__()
        
        self.fc1 = nn.Linear(n_x,n_h)
        self.fc2 = nn.Linear(n_h,n_y)
    
    """
    Function for computing the forward pass with a relu activation unit in fully connected layer 1
    and returns an output from the sigmoid activation unit
    """
    def forward(self, X):
        
        X = f.relu(self.fc1(X))
        X = self.fc2(X)
        
        return torch.sigmoid(X)

In [57]:
# Set a random seed to random initilization of the weights and biases
torch.manual_seed(101)

# Create an object of the network
model = TwoLayerNetwork()

# Print out the model parameters
model.parameters

<bound method Module.parameters of TwoLayerNetwork(
  (fc1): Linear(in_features=12288, out_features=7, bias=True)
  (fc2): Linear(in_features=7, out_features=1, bias=True)
)>

Here tried using Cross Entropy loss for the binary classification problem but it didnot work. After some brainstorming and researching on stackoverflow and pytorch forums, BCELoss() worked perfectly.

In [58]:
# We define our optimizer as a Cross Entropy Loss Function 
criterion = nn.BCELoss()


# We define our criterion to be Gradient Descent 
#torch.optim.SGD(model.parameters(),lr = 0.01,momentum=0.9, weight_decay=0.001)
optimizer = torch.optim.Adam(model.parameters(),lr=0.01)

In [59]:
# Training our model
import time

# Set the start time for model execution
start_time  = time.time()

# Set the number of epochs
epochs = 3000

# Trackers
train_correct = [] # What we get correct during training phase
train_losses = []  # What we get wrong during the training phase
test_correct = []  # what we get correct during the testing phase
test_losses = []   # What we get wrong during the testing phase

for i in range(epochs):
    
    # Predicted Labels
    y_prediction = model(train_x)
    
    # Computed Loss using cross entropy
    loss = criterion(y_prediction,train_y)
    
    # Append the losses in a list
    train_losses.append(loss)
    
    if i%100 == 0:
        
        print(f'epoch :{i} loss:{loss.item()}')
    
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    
# Calculates the time elapsed
total_time = time.time() - start_time
print(f'Duration: {total_time/60}mins')

  return F.binary_cross_entropy(input, target, weight=self.weight, reduction=self.reduction)


epoch :0 loss:0.7448181509971619
epoch :100 loss:0.371491014957428
epoch :200 loss:0.14398454129695892
epoch :300 loss:0.05015343055129051
epoch :400 loss:0.02215004153549671
epoch :500 loss:0.012089536525309086
epoch :600 loss:0.007572536822408438
epoch :700 loss:0.005184698849916458
epoch :800 loss:0.0037714517675340176
epoch :900 loss:0.0028652031905949116
epoch :1000 loss:0.002248611766844988
epoch :1100 loss:0.0018097207648679614
epoch :1200 loss:0.0014859653310850263
epoch :1300 loss:0.0012401430867612362
epoch :1400 loss:0.0010490057757124305
epoch :1500 loss:0.0008974351221695542
epoch :1600 loss:0.000775194144807756
epoch :1700 loss:0.0006751795881427824
epoch :1800 loss:0.0005923277349211276
epoch :1900 loss:0.0005229392554610968
epoch :2000 loss:0.00046425912296399474
epoch :2100 loss:0.00041420882917009294
epoch :2200 loss:0.0003711914469022304
epoch :2300 loss:0.000333966949256137
epoch :2400 loss:0.00030155619606375694
epoch :2500 loss:0.00027316881460137665
epoch :2600 l

## Testing Phase for the Classification problem

In the testing phase we perform a single forward pass through the layer after training has been performed for a set number of iterations. torch.no_grad() has been used as we do not require gradient calculation while doing a single forward pass.

In [60]:
test_corr = 0
y_val = model(test_x)
# Accuracy of our prediction
# We use torch.zero_grad top conserve memory as we do not require gradient calculation while running our model on the test data
with torch.no_grad():
    
    for i in range(0,len(y_val)):
        
        # Threshold value
        if(y_val[i] > 0.5):
            y_val[i] = 1
        else:
            y_val[i] = 0
    
        # Test correct
        test_corr += (y_val[i]==test_y[0,i]).sum()
print(f'Testing Accuracy: {100*test_corr.item() / (len(y_val))}%')

Testing Accuracy: 70.0%


## Hyperparameters tested and the optimal Hyperparameters found

learning rate: 0.01 iterations: 4000 loss: 0.0052 momentum: 0 testing accuracy: 78%

learning rate: 0.01 iterations: 4000 loss: 0.00345 momentum: 0.9 testing accuracy: 70%

learning rate: 0.01 iterations: 5000 loss: 0.00262 momentum: 0.9 testing accuracy: 70%

learning rate: 0.001 iterations: 5000 loss: 0.00411 momentum: 0.9 testing accuracy: 72%

learning rate: 0.01 iterations: 5000 loss: 0.64829 momentum: 0.9 weight decay: 0.1 testing accuracy: 34%

learning rate: 0.1 iterations: 5000 loss: 0.6439 momentum: 0.9 weight decay: 0.1 testing accuracy: 34%

learning rate: 0.1 iterations: 4000 loss: 0.64397 momentum: 0.9 weight decay: 0 testing accuracy: 34%

learning rate: 0.01 iterations: 7000 loss: 0.001838 momentum: 0.9 testing accuracy: 70%

learning rate: 0.001 iterations: 8000 loss: 0.00204 momentum: 0.9 testing accuracy: 72%

learning rate: 0.05 iterations: 8000 loss: 0.64397 momentum: 0.9 testing accuracy: 34%

learning rate: 0.5 iterations: 5000 loss: 18.1122 momentum: 0.9 testing accuracy: 66%

learning rate: 0.01 iterations: 5000 loss: 18.0321 momentum: 0.9 weight decay:0.1 testing accuracy: 34%

learning rate: 0.01 iterations: 5000 loss: 0.0322 momentum: 0.99 testing accuracy: 58%

learning rate: 0.01 iterations: 5000 loss: 0.01227 momentum: 0.99 weight decay: 0.01 testing accuracy: 74%

learning rate: 0.01 iterations: 5000 loss: 0.012275 momentum: 0 weight decay:0.01 testing accuracy: 74%

### The best Hyperparameter Values found for the Classification problem

**learning rate: 0.01 iterations: 4000 loss: 0.0052 momentum: 0 testing accuracy: 78%**

## Sentiment analysis

In [61]:
import re
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split

## Loading the dataset

The first part for implementing sentiment analysis is importing the dataset and and then preprocessing the noisy dataset

In [62]:
def load_data(train_file, test_file):
    train_dataset = []
    test_dataset = []
    
    # Read the training dataset file line by line
    for line in open(train_file, 'r',encoding = "utf8"):
        train_dataset.append(line.strip())
        
    for line in open(test_file, 'r',encoding="utf8"):
        test_dataset.append(line.strip())
    return train_dataset, test_dataset

### Pre-process Noisy Dataset

In [63]:
def preprocess_reviews(reviews):
    
    reviews = [REPLACE_NO_SPACE.sub(NO_SPACE, line.lower()) for line in reviews]
    reviews = [REPLACE_WITH_SPACE.sub(SPACE, line) for line in reviews]
    
    return reviews

# Load the train and the test files
train_file = "data/train_imdb.txt"
test_file = "data/test_imdb.txt"
train_dataset, test_dataset = load_data(train_file, test_file)

# This is just how the data is organized. The first 50% data is positive and the rest 50% is negative for both train and test splits.
y = [1 if i < len(train_dataset)*0.5 else 0 for i in range(len(train_dataset))]

# Preprocessing
REPLACE_NO_SPACE = re.compile("(\.)|(\;)|(\:)|(\!)|(\')|(\?)|(\,)|(\")|(\()|(\))|(\[)|(\])|(\d+)")
REPLACE_WITH_SPACE = re.compile("(<br\s*/><br\s*/>)|(\-)|(\/)")
NO_SPACE = ""
SPACE = " "

# Pre-Processing
train_dataset_clean = preprocess_reviews(train_dataset)
test_dataset_clean = preprocess_reviews(test_dataset)

In [64]:
cv = CountVectorizer(binary=True, stop_words="english", max_features=2000)
cv.fit(train_dataset_clean)
X = cv.transform(train_dataset_clean)
X_test = cv.transform(test_dataset_clean)
X = np.array(X.todense()).astype(float)
X_test = np.array(X_test.todense()).astype(float)
y = np.array(y)

In [65]:
# Train-Test Split
X_train, X_val, y_train, y_val = train_test_split(X, y, train_size = 0.80)

# This is just to correct the shape of the arrays as required by the two_layer_model
# In PyTorch the batch size comes first
y_train = y_train.reshape(1,-1)
y_val = y_val.reshape(1,-1)

# Convert to Pytorch tensors
X_train = torch.from_numpy(X_train).float()
y_train = torch.from_numpy(y_train).float()
X_val = torch.from_numpy(X_val).float()
y_val = torch.from_numpy(y_val).float()

In [66]:
y_train.shape

torch.Size([1, 800])

In [67]:
X_val.shape

torch.Size([201, 2000])

## Two Layer Class Model

Below is the 2 layer neural network to perform sentiment analysis on the dataset. Here n_x is the number of input neurons, n_h is the number of hidden neurons and n_y is the number of output neurons(As this is a binary classification problem output neuron will be 1)

In [68]:
class TwoLayerNetworkSentiment(nn.Module):
    
    """
    n_x: The number of neurons in the input layer
    n_h: The number of neurons in the hidden layer
    n_y: The number of neurons in the output layer
    
    """
    def __init__(self,n_x = X_train.shape[1], n_h = 200, n_y = 1):
        
        super().__init__()
        
        self.fc1 = nn.Linear(n_x,n_h)
        self.fc2 = nn.Linear(n_h,n_y)
    
    """
    Function for computing the forward pass with a relu activation unit in fully connected layer 1
    and returns an output from the sigmoid activation unit
    """
    def forward(self, X):
        
        X = f.relu(self.fc1(X))
        X = self.fc2(X)
        
        return torch.sigmoid(X)

In [69]:
# Set a random seed to random initilization of the weights and biases
torch.manual_seed(101)

# Create an object of the network
Sentiment = TwoLayerNetworkSentiment()

# Print out the model parameters
Sentiment.parameters

<bound method Module.parameters of TwoLayerNetworkSentiment(
  (fc1): Linear(in_features=2000, out_features=200, bias=True)
  (fc2): Linear(in_features=200, out_features=1, bias=True)
)>

## Criterion and Optimizer

The criterion that I have used here is the Binary Cross Entropy Loss function and the optimizer that I have used is the Stochastic Gradient Descent Optimizer in which I will be vary =ing the learning rate, Momentum value and L2 Penalty(weight decay) to obtain the highest test dataset prediction accuracy.

In [70]:
# We define our optimizer as a Cross Entropy Loss Function 
criterion = nn.BCELoss()

# We define our criterion to be Gradient Descent 
optimizer = torch.optim.SGD(model.parameters(),lr = 0.01,momentum=0.9, weight_decay=0.001)

## Training our model

In [53]:
# Training our model
import time

# Set the start time for model execution
start_time  = time.time()

# Set the number of epochs
epochs = 3000

# Trackers
train_losses = []  # What we get wrong during the training phase

for i in range(epochs):
    
    # Predicted Labels
    y_prediction = Sentiment(X_train)
    
    # Computed Loss using cross entropy
    loss = criterion(y_prediction,y_train)
    
    # Append the losses in a list
    train_losses.append(loss)
    
    if i%100 == 0:
        
        print(f'epoch :{i} loss:{loss.item()}')
    
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    
# Calculates the time elapsed
total_time = time.time() - start_time
print(f'Duration: {total_time/60}mins')

epoch :0 loss:0.6935747265815735
epoch :100 loss:0.6935747265815735
epoch :200 loss:0.6935747265815735
epoch :300 loss:0.6935747265815735
epoch :400 loss:0.6935747265815735
epoch :500 loss:0.6935747265815735
epoch :600 loss:0.6935747265815735
epoch :700 loss:0.6935747265815735
epoch :800 loss:0.6935747265815735
epoch :900 loss:0.6935747265815735
epoch :1000 loss:0.6935747265815735
epoch :1100 loss:0.6935747265815735
epoch :1200 loss:0.6935747265815735
epoch :1300 loss:0.6935747265815735
epoch :1400 loss:0.6935747265815735
epoch :1500 loss:0.6935747265815735
epoch :1600 loss:0.6935747265815735
epoch :1700 loss:0.6935747265815735
epoch :1800 loss:0.6935747265815735
epoch :1900 loss:0.6935747265815735
epoch :2000 loss:0.6935747265815735
epoch :2100 loss:0.6935747265815735
epoch :2200 loss:0.6935747265815735
epoch :2300 loss:0.6935747265815735
epoch :2400 loss:0.6935747265815735
epoch :2500 loss:0.6935747265815735
epoch :2600 loss:0.6935747265815735
epoch :2700 loss:0.6935747265815735
epoc

## Testing Phase

In [32]:
test_corr = 0
y_eval = Sentiment(X_val)
# Accuracy of our prediction
# We use torch.zero_grad top conserve memory as we do not require gradient calculation while running our model on the test data
with torch.no_grad():
    
    for i in range(0,len(y_eval)):
        
        # Threshold value
        if(y_eval[i] > 0.5):
            y_eval[i] = 1
        else:
            y_eval[i] = 0
    
        # Test correct
        test_corr += (y_eval[i]==y_val[0,i]).sum()
print(f'Testing Accuracy: {100*test_corr.item() / (len(y_eval))}%')

Testing Accuracy: 56.21890547263681%


## Hyperparameter Testing

learning rate: 0.01 iterations: 3000 loss: 0.693967 momentum: 0 weight decay: 0 testing accuracy: 56.218%

learning rate: 0.01 iterations: 3000 loss: 0.693967 momentum: 0 weight decay: 0 testing accuracy: 56.218%