<b>Name: Arpit Aggarwal</b> <br>
<b>UID: 116747189</b>

# 1. Packages

In [1]:
# header files
import numpy as np
import torch
import h5py
from matplotlib import pyplot as plt
import re
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from torch.utils.data.sampler import SubsetRandomSampler

# Cat vs Non-Cat Image Classification

# 2. Loading Dataset

Using hw2.ipynb load_data() function. The load_data() function loads data from the training and testing files. Next step, is to flatten the image so that they can be fed as an input to the neural network. Lastly, the training and testing data is normalized between 0 and 1 which will be used for the neural network.

In [2]:
def load_data(train_file, test_file):
    # Load the training data
    train_dataset = h5py.File(train_file, 'r')
    
    # Separate features(x) and labels(y) for training set
    train_set_x_orig = np.array(train_dataset['train_set_x'])
    train_set_y_orig = np.array(train_dataset['train_set_y'])

    # Load the test data
    test_dataset = h5py.File(test_file, 'r')
    
    # Separate features(x) and labels(y) for training set
    test_set_x_orig = np.array(test_dataset['test_set_x'])
    test_set_y_orig = np.array(test_dataset['test_set_y'])
    classes = np.array(test_dataset["list_classes"][:]) # the list of classes
    
    train_set_y_orig = train_set_y_orig.reshape((train_set_y_orig.shape[0]))
    test_set_y_orig = test_set_y_orig.reshape((test_set_y_orig.shape[0]))
    return train_set_x_orig, train_set_y_orig, test_set_x_orig, test_set_y_orig, classes

# training and testing files
train_file = "data/train_catvnoncat.h5"
test_file = "data/test_catvnoncat.h5"
train_x_orig, train_output, test_x_orig, test_output, classes = load_data(train_file, test_file) 
train_x_flatten = train_x_orig.reshape(train_x_orig.shape[0], -1)
test_x_flatten = test_x_orig.reshape(test_x_orig.shape[0], -1)

# Standardize data to have feature values between 0 and 1.
train_input = train_x_flatten / 255.
test_input = test_x_flatten / 255.

# print data length
print ("train_input's shape: " + str(train_input.shape))
print ("test_input's shape: " + str(test_input.shape))

train_input's shape: (209, 12288)
test_input's shape: (50, 12288)


# 3. Convert dataset to Tensor form

Convert the dataset to Tensor form so that it can be fed into the PyTorch neural network.

In [3]:
# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# use torch.from_numpy() to get the tensor form of the numpy array
train_input = torch.from_numpy(train_input).float().to(device)
test_input = torch.from_numpy(test_input).float().to(device)
train_output = torch.from_numpy(train_output).float().to(device)
test_output = torch.from_numpy(test_output).float().to(device)
valid_input = train_input[169:]
train_input = train_input[:169]
valid_output = train_output[169:]
train_output = train_output[:169]

# 4. Hyper-parameters

Set the hyper-parameters of the two-layer neural net.

In [11]:
learning_rate = 0.005
num_epochs = 1000
weight_decay = 0.001
momentum = 0.9

# 5. Model-Architecture

The model-architecture is defined using pytorch Net class. The __init__ function is where we define the architecture of the neural network, i.e in this it is two layers. The forward function is where the forward pass step of the neural network takes place.

In [12]:
# neural network class
class Net(torch.nn.Module):
    # init function
    def __init__(self, num_input_neurons, num_hidden_neurons_1, num_hidden_neurons_2, num_output_neurons):
        super(Net, self).__init__()
        self.fc1 = torch.nn.Linear(num_input_neurons, num_hidden_neurons_1)
        self.fc2 = torch.nn.Linear(num_hidden_neurons_1, num_hidden_neurons_2)
        self.fc3 = torch.nn.Linear(num_hidden_neurons_2, num_output_neurons)
        self.dropout = torch.nn.Dropout(0.2)
        
    # forward pass step of the neural network
    def forward(self, input):
        x = torch.nn.functional.relu(self.fc1(input))
        x = self.dropout(x)
        x = torch.nn.functional.relu(self.fc2(x))
        x = self.dropout(x)
        output = torch.nn.functional.sigmoid(self.fc3(x))
        return output

# create object of the model
net = Net(int(train_input.shape[1]), 512, 512, 1).to(device)
print(net)

Net(
  (fc1): Linear(in_features=12288, out_features=512, bias=True)
  (fc2): Linear(in_features=512, out_features=512, bias=True)
  (fc3): Linear(in_features=512, out_features=1, bias=True)
  (dropout): Dropout(p=0.2, inplace=False)
)


# 7. Loss function

We will use Binary Cross-entropy loss as we are doing image classification (cat vs non-cat)

In [13]:
# loss function
criterion = torch.nn.BCELoss()

# 8. Gradient Descent

Next step is to define the optimizer we will be using for training the neural net. We will use gradient descent (full-batch) as out optimizer.<br>

In [14]:
# optimizers
optimizer = torch.optim.SGD(net.parameters(), lr = learning_rate, momentum = momentum, weight_decay = weight_decay)

# 9. Training phase

Now we will be training the neural network to get the optimal set of weights and biases required for this problem.

In [15]:
# training phase
for epoch in range(0, num_epochs):
    optimizer.zero_grad()
        
    # forward step
    pred_output = net(train_input)
        
    # find loss
    loss = criterion(pred_output.squeeze(), train_output)
    
    # backpropagation step
    loss.backward()
    optimizer.step()
        
    # update train loss
    train_loss = loss.item()
    
    net.eval()
    pred_output = net(valid_input)
        
    # find loss
    loss = criterion(pred_output.squeeze(), valid_output)
    
    # update train loss
    valid_loss = loss.item()
    
    if((epoch + 1)%100 == 0):
        print('Train Loss: {:.4f}, Valid Loss: {:.4f}' .format(train_loss, valid_loss))

Train Loss: 0.3391, Valid Loss: 0.6948
Train Loss: 0.0288, Valid Loss: 1.3186
Train Loss: 0.0087, Valid Loss: 1.6460
Train Loss: 0.0047, Valid Loss: 1.8148
Train Loss: 0.0032, Valid Loss: 1.9228
Train Loss: 0.0024, Valid Loss: 1.9998
Train Loss: 0.0019, Valid Loss: 2.0574
Train Loss: 0.0016, Valid Loss: 2.1020
Train Loss: 0.0014, Valid Loss: 2.1377
Train Loss: 0.0012, Valid Loss: 2.1668


# 10. Testing Phase

Evaluating model on testing data

In [16]:
# testing phase
net.eval()
pred_output = net(test_input)
loss = criterion(pred_output.squeeze(), test_output)
#print("Testing Loss: " + str(loss.item()))

# accuracy
correct = 0
for index in range(0, len(pred_output)):
    if(pred_output[index] > 0.5):
        pred_output[index] = 1
    else:
        pred_output[index] = 0
    
    if(pred_output[index] == test_output[index]):
        correct = correct + 1
print("Testing accuracy is: " + str(100.0 * float(float(correct) / len(pred_output))) + "%")

Testing accuracy is: 76.0%


# 11. Results

This section contains all the hyper-parameters I tried and the corresponding accuracies.

1. learning_rate = 0.001, num_epochs = 4000, momentum = 0, weight_decay = 0, Testing Accuracy = 72%
2. learning_rate = 0.005, num_epochs = 4000, momentum = 0, weight_decay = 0, Testing Accuracy = 66%
3. learning_rate = 0.01, num_epochs = 4000, momentum = 0, weight_decay = 0, Testing Accuracy = 68%
4. learning_rate = 0.05, num_epochs = 4000, momentum = 0, weight_decay = 0, Testing Accuracy = 64%
5. learning_rate = 0.05, num_epochs = 4000, momentum = 0, weight_decay = 0.01, Testing Accuracy = 34%
6. learning_rate = 0.01, num_epochs = 4000, momentum = 0, weight_decay = 0.01, Testing Accuracy = 74%
7. learning_rate = 0.001, num_epochs = 4000, momentum = 0, weight_decay = 0.01, Testing Accuracy = 66%
8. learning_rate = 0.005, num_epochs = 4000, momentum = 0, weight_decay = 0.01, Testing Accuracy = 70%
9. learning_rate = 0.005, num_epochs = 4000, momentum = 0, weight_decay = 0.001, Testing Accuracy = 72%
10. learning_rate = 0.001, num_epochs = 4000, momentum = 0, weight_decay = 0.001, Testing Accuracy = 64%
11. learning_rate = 0.01, num_epochs = 4000, momentum = 0, weight_decay = 0.001, Testing Accuracy = 70%
12. learning_rate = 0.05, num_epochs = 4000, momentum = 0, weight_decay = 0.001, Testing Accuracy = 64%
13. learning_rate = 0.05, num_epochs = 4000, momentum = 0.9, weight_decay = 0.01, Testing Accuracy = 34%
14. learning_rate = 0.01, num_epochs = 4000, momentum = 0.9, weight_decay = 0.01, Testing Accuracy = 58%
15. learning_rate = 0.001, num_epochs = 4000, momentum = 0.9, weight_decay = 0.01, Testing Accuracy = 74%
16. learning_rate = 0.005, num_epochs = 4000, momentum = 0.9, weight_decay = 0.01, Testing Accuracy = 72%
17. learning_rate = 0.005, num_epochs = 4000, momentum = 0.9, weight_decay = 0.001, Testing Accuracy = 72%
18. learning_rate = 0.001, num_epochs = 4000, momentum = 0.9, weight_decay = 0.001, Testing Accuracy = 74%
19. learning_rate = 0.01, num_epochs = 4000, momentum = 0.9, weight_decay = 0.001, Testing Accuracy = 34%
20. learning_rate = 0.05, num_epochs = 4000, momentum = 0.9, weight_decay = 0.001, Testing Accuracy = 34%

# 12. Optimal Hyper-parameters obtained

The optimal hyper-parameters obtained were as follows: <br><br>
<b>learning_rate = 0.001, num_epochs = 4000, momentum = 0.9, weight_decay = 0.01, Testing Accuracy = 74%</b><br>
<b>learning_rate = 0.01, num_epochs = 4000, momentum = 0, weight_decay = 0.01, Testing Accuracy = 74%</b><br>
<b>learning_rate = 0.001, num_epochs = 4000, momentum = 0.9, weight_decay = 0.001, Testing Accuracy = 74%</b>

# Predicting sentiment of movie reviews

# 13. Loading data

Using the load_data function of hw2.ipynb and then preprocessing the data as done in the hw2.ipynb notebook

In [None]:
def load_data(train_file, test_file):
    train_dataset = []
    test_dataset = []
    
    # Read the training dataset file line by line
    for line in open(train_file, 'r'):
        train_dataset.append(line.strip())
        
    for line in open(test_file, 'r'):
        test_dataset.append(line.strip())
    return train_dataset, test_dataset

def preprocess_reviews(reviews):
    reviews = [REPLACE_NO_SPACE.sub(NO_SPACE, line.lower()) for line in reviews]
    reviews = [REPLACE_WITH_SPACE.sub(SPACE, line) for line in reviews]
    return reviews

# loading data
train_file = "data/train_imdb.txt"
test_file = "data/test_imdb.txt"
train_dataset, test_dataset = load_data(train_file, test_file)
y = [1 if i < len(train_dataset)*0.5 else 0 for i in range(len(train_dataset))]

# pre-processing
REPLACE_NO_SPACE = re.compile("(\.)|(\;)|(\:)|(\!)|(\')|(\?)|(\,)|(\")|(\()|(\))|(\[)|(\])|(\d+)")
REPLACE_WITH_SPACE = re.compile("(<br\s*/><br\s*/>)|(\-)|(\/)")
NO_SPACE = ""
SPACE = " "
train_dataset_clean = preprocess_reviews(train_dataset)
test_dataset_clean = preprocess_reviews(test_dataset)

# Vectorization
cv = CountVectorizer(binary=True, stop_words="english", max_features=2000)
cv.fit(train_dataset_clean)
X = cv.transform(train_dataset_clean)
X_test = cv.transform(test_dataset_clean)
X = np.array(X.todense()).astype(float)
X_test = np.array(X_test.todense()).astype(float)
y = np.array(y)

# 14. Splitting of dataset

Using sklearn for splitting dataset into training and testing

In [None]:
X_train, X_val, y_train, y_val = train_test_split(X, y, train_size = 0.80)
y_train = y_train.reshape(1,-1)
y_val = y_val.reshape(1,-1)

# 15. Convert dataset to Tensor form

Convert the dataset to Tensor form so that it can be fed into the PyTorch neural network.

In [None]:
# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# use torch.from_numpy() to get the tensor form of the numpy array
train_input = torch.from_numpy(X_train).float().to(device)
train_output = torch.from_numpy(y_train).float().to(device)
train_output = train_output.squeeze()
test_input = torch.from_numpy(X_val).float().to(device)
test_output = torch.from_numpy(y_val).float().to(device)
test_output = test_output.squeeze()

# 16. Hyper-parameters

Set the hyper-parameters for the network

In [None]:
learning_rate = 0.005
num_epochs = 4000
momentum = 0.9
weight_decay = 0.01

# 17. Model Architecture

The model-architecture is defined using pytorch Net class. The __init__ function is where we define the architecture of the neural network, i.e in this it is two layers. The forward function is where the forward pass step of the neural network takes place.

In [None]:
# neural network class
class Net(torch.nn.Module):
    # init function
    def __init__(self, num_input_neurons, num_hidden_neurons, num_output_neurons):
        super(Net, self).__init__()
        self.fc1 = torch.nn.Linear(num_input_neurons, num_hidden_neurons)
        self.fc2 = torch.nn.Linear(num_hidden_neurons, num_output_neurons)
        
    # forward pass step of the neural network
    def forward(self, input):
        output = torch.nn.functional.sigmoid(self.fc2(torch.nn.functional.relu(self.fc1(input))))
        return output
    
# get the neural net object
net = Net(int(train_input.shape[1]), 200, 1).to(device)
print(net)

# 18. Loss function

We will use Binary Cross-entropy loss as we are doing sentiment analysis

In [None]:
# loss function
criterion = torch.nn.BCELoss()

# 19. Gradient Descent

Next step is to define the optimizer we will be using for training the neural net. We will use gradient descent (full-batch) as out optimizer.

In [None]:
# optimizer
optimizer = torch.optim.SGD(net.parameters(), lr = learning_rate, momentum = momentum, weight_decay = weight_decay) 

# 20. Training phase

Now we will be training the neural network to get the optimal set of weights and biases required for this problem.

In [None]:
# training phase
for epoch in range(0, num_epochs):
    
    # forward step
    pred_output = net(train_input)
    
    # find loss
    loss = criterion(pred_output.squeeze(), train_output)
    
    # backpropagation step
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    
    if((epoch + 1)%100 == 0):
        print('Loss after iteration {}: {:.4f}' .format(epoch + 1, loss.item()))

# 21. Testing phase

Evaluating model on testing data

In [None]:
# testing phase
net.eval()
pred_output = net(test_input)
loss = criterion(pred_output.squeeze(), test_output)

# accuracy
correct = 0
for index in range(0, len(pred_output)):
    if(pred_output[index] > 0.5):
        pred_output[index] = 1
    else:
        pred_output[index] = 0
    
    if(pred_output[index] == test_output[index]):
        correct = correct + 1
print("Testing accuracy is: " + str(100.0 * float(float(correct) / len(pred_output))) + "%")

# 22. Results

This section contains all the hyper-parameters I tried and the corresponding accuracies.

1. learning_rate = 0.05, num_epochs = 4000, momentum = 0, weight_decay = 0, Testing Accuracy = 89.05%
2. learning_rate = 0.01, num_epochs = 4000, momentum = 0, weight_decay = 0, Testing Accuracy = 89.05%
3. learning_rate = 0.001, num_epochs = 4000, momentum = 0, weight_decay = 0, Testing Accuracy = 77.11%
4. learning_rate = 0.005, num_epochs = 4000, momentum = 0, weight_decay = 0, Testing Accuracy = 89.05%
5. learning_rate = 0.05, num_epochs = 4000, momentum = 0, weight_decay = 0.01, Testing Accuracy = 89.55%
6. learning_rate = 0.01, num_epochs = 4000, momentum = 0, weight_decay = 0.01, Testing Accuracy = 89.55%
7. learning_rate = 0.001, num_epochs = 4000, momentum = 0, weight_decay = 0.01, Testing Accuracy = 74.62%
8. learning_rate = 0.005, num_epochs = 4000, momentum = 0, weight_decay = 0.01, Testing Accuracy = 90.04%
9. learning_rate = 0.05, num_epochs = 4000, momentum = 0, weight_decay = 0.001, Testing Accuracy = 89.05%
10. learning_rate = 0.01, num_epochs = 4000, momentum = 0, weight_decay = 0.001, Testing Accuracy = 89.05%
11. learning_rate = 0.001, num_epochs = 4000, momentum = 0, weight_decay = 0.001, Testing Accuracy = 68.65%
12. learning_rate = 0.005, num_epochs = 4000, momentum = 0, weight_decay = 0.001, Testing Accuracy = 90.04%
13. learning_rate = 0.05, num_epochs = 4000, momentum = 0.9, weight_decay = 0.001, Testing Accuracy = 89.05%
14. learning_rate = 0.01, num_epochs = 4000, momentum = 0.9, weight_decay = 0.001, Testing Accuracy = 89.05%
15. learning_rate = 0.001, num_epochs = 4000, momentum = 0.9, weight_decay = 0.001, Testing Accuracy = 89.55%
16. learning_rate = 0.005, num_epochs = 4000, momentum = 0.9, weight_decay = 0.001, Testing Accuracy = 89.045%
17. learning_rate = 0.05, num_epochs = 4000, momentum = 0.9, weight_decay = 0.01, Testing Accuracy = 90.04%
18. learning_rate = 0.01, num_epochs = 4000, momentum = 0.9, weight_decay = 0.01, Testing Accuracy = 90.04%
19. learning_rate = 0.001, num_epochs = 4000, momentum = 0.9, weight_decay = 0.01, Testing Accuracy = 89.55%

# 23. Optimal Hyper-parameters obtained

The optimal hyper-parameters obtained were as follows: <br><br>
<b>learning_rate = 0.005, num_epochs = 4000, momentum = 0, weight_decay = 0.01, Testing Accuracy = 90.04%</b><br>
<b>learning_rate = 0.005, num_epochs = 4000, momentum = 0, weight_decay = 0.001, Testing Accuracy = 90.04%</b><br>
<b>learning_rate = 0.05, num_epochs = 4000, momentum = 0.9, weight_decay = 0.01, Testing Accuracy = 90.04%</b><br>
<b>learning_rate = 0.01, num_epochs = 4000, momentum = 0.9, weight_decay = 0.01, Testing Accuracy = 90.04%</b><br>
