<a href="https://colab.research.google.com/github/hevra01/A-Path-Finding-Algorithm/blob/main/multi_class_classification_CIFAR10.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Import the required modules
import torch
torch.manual_seed(0) 
import torch.nn as nn # more object oriented
import torch.nn.functional as F # more functional
# torchvision is used for image and video transformations. It also has its own datasets.
import torchvision.transforms as T
from torchvision.datasets import MNIST
from torch.utils.data import random_split, DataLoader
from tqdm import tqdm
import ssl
import torch.optim as optim
ssl._create_default_https_context = ssl._create_unverified_context

In [None]:
from torchvision.datasets import CIFAR10 # torchvision has its own datasets so we can import from there directly
import torchvision.transforms as T

# preprocessing (normalization) of the training data
train_transform = T. Compose ([
# can add additional transforms on images
T. ToTensor () , # convert images to PyTorch tensors, which are arrays
T. Grayscale () , # RGB to grayscale
T. Normalize ( mean =(0.5 ,) , std=(0.5 ,) ) # normalization
# speeds up the convergence
# and improves the accuracy
])

# preprocessing (normalization) of the testing data
val_transform = test_transform = T. Compose ([
T. ToTensor () ,
T. Grayscale () ,
T. Normalize ( mean =(0.5 ,) , std=(0.5 ,) )
])


# downloading our data separately for both the train and test sets and apply the transformation (preprocessing) on it.
train_set = CIFAR10 ( root = 'CIFAR10', train =True ,transform = train_transform , download = True )
test_set = CIFAR10 ( root = 'CIFAR10', train =False , transform = test_transform , download = True )

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to CIFAR10/cifar-10-python.tar.gz


  0%|          | 0/170498071 [00:00<?, ?it/s]

Extracting CIFAR10/cifar-10-python.tar.gz to CIFAR10
Files already downloaded and verified


In [None]:
# separate the validation and training set. validation set is used to tune 
# the hyper-parameters, e.g learning-rate, number of hidden layers, etc.
# we do the separation before creating batches of the train and test set 
# 80 percent of the train_set will be kept as the train set and 20 percent will be used as the validation.
train_set_size = int(len(train_set) * 0.8)
valid_set_size = len(train_set) - train_set_size
train_set, validation_set = torch.utils.data.random_split(train_set, [train_set_size, valid_set_size])


In [None]:
# divide our data into batches and shuffle them.
batchsize = 32
trainset = DataLoader(train_set, batch_size = batchsize, shuffle=True)
testset = DataLoader(test_set, batch_size = batchsize)
validationset = DataLoader(validation_set, batch_size = batchsize) 

# specify the classes
classes = {'airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 
            'frog', 'horse', 'ship', 'truck'}

In [None]:
# this is our neural network!!
class NeuralNet(nn.Module):
    def __init__(self, input_size, output_size, num_hidden_layers, hidden_layer_size):
        super(NeuralNet, self).__init__() # initialize the super class 
        
        self.num_hidden_layers = num_hidden_layers
        self.hidden_layer_size = hidden_layer_size
        self.deep_nn = nn.Sequential()
        
        # creating the input and the hidden layers. the activation function used is relu.
        for i in range(num_hidden_layers):
            self.deep_nn.add_module(f'ff{i}', nn.Linear(input_size, hidden_layer_size[i]))
            # after creating the input layer, the value of input_size needs to change
            input_size = hidden_layer_size[i] 

        self.deep_nn.add_module(f'classifier', nn.Linear(hidden_layer_size[num_hidden_layers-1], output_size))
   
    # passing of the data
    def forward(self, data, activation_func):
        # the activation function for the input and the hidden layers is relu
        for i in range(self.num_hidden_layers):
            if activation_func == 'relu':
                data = F.relu(self.deep_nn[i](data))
            elif activation_func == 'sigmoid':
                data = F.sigmoid(self.deep_nn[i](data))
            elif activation_func == 'Tanh':
                data = F.tanh(self.deep_nn[i](data))
        # the activation function for the output layer is softmax but we aren't going to initialize that now
        # because the loss function crossEntropyLoss already applies softmax.
        
        return self.deep_nn[self.num_hidden_layers](data)
    
    # loss function is used to measure how well the model (neural network) is doing
    # On the other hand, the optimizer tries to adjust the weights and biases in such a way to minimize the loss function.
    def training_the_model(self, learning_rate, activation_func, trainset, validationset):
        # we chose Adam as our optimizer.
        # the first argument we passed in is the list of parameters that we want the optimizer to work on.
        # e.g if we want out optimizer to only adjust some weights and not the others, we can control that here.
        # the second argument specifies the learning rate.
        optimizer = optim.Adam(self.parameters(), learning_rate)

        # we will keep updating the best loss while training the model. this will be used to check if we have arrived to the point of overfitting
        bestLoss = 20000

        # choosing our loss function
        loss = nn.CrossEntropyLoss()

        # we will continue training the model until the validation loss starts increasing. epoch 200 is a maximum value of epochs we assume
        for epoch in range(250):
            # data is a batch of featuresets and labels
            for data in trainset: 
                # here we are separating the featureset and labels
                X, y = data
                # pass our input through the neural network
                # view(-1) flattens a tensor in PyTorch = brings all the rows one after another
                output = self.forward(X.view(-1, 32*32), activation_func)
                # calculate the loss by comparing the model's predicted guess and the actual label
                # the crossEntropyLoss was the loss function that we were required to use.
                l = loss(output, y)
                optimizer.zero_grad()
                l.backward()
                # this will adjust the weights
                # step() makes the optimizer iterate over all parameters (tensors) it is supposed to update 
                # and use their internally stored grad to update their values.
                optimizer.step() 

            # this will prevent overfitting
            validation_accuracy, validation_loss = self.evaluatingModel(validationset, activation_func)
            if validation_loss < bestLoss: # this would imply overfitting
              bestLoss = validation_loss
            else:
              return (epoch + 1) # return the number of epochs that were needed before the network starts to overfit.
            
    def evaluatingModel(self, validationset, activation_funct):
        correct = 0
        total = 0
        lossTotal = 0

        # choosing our loss function
        loss = nn.CrossEntropyLoss()

        # we don't want to update our weights while evaluating our model
        with torch.no_grad():
            # data is a batch of featuresets and labels
            for data in validationset:
                X, y = data
                output = self.forward(X.view(-1, 32*32), activation_funct)
                # here we are evaluating our model. basically, comparing the models guess to the actual y value
                for idx, i in enumerate(output):
                    if torch.argmax(i) == y[idx]:
                        correct += 1 # increment correct if the model has guessed correctly
                    total += 1
                # find the loss for each batch and add it to the total loss.
                lossTotal += loss(self.forward(X.view(-1, 32*32), activation_funct), y)

        #print("Accuracy: ", round(correct/total, 3))
        # to get the average loss, divide the total loss by the number of batches
        loss = lossTotal / len(validationset)
        #print("Loss: ", loss)
        return round(correct/total, 3), loss
            
        
        


In [None]:
# sanity check
# This is used to see the value of the accuracy and loss on the untrained network and compare it to my guess.
correct = 0
total = 0
lossTotal = 0

# choosing our loss function
loss = nn.CrossEntropyLoss()

net = NeuralNet(32*32, 10, 2, [684, 344])

with torch.no_grad():
    # data is a batch of featuresets and labels
    for data in trainset:
        X, y = data
        output = net.forward(X.view(-1, 32*32), 'relu')
        # here we are evaluating our model. basically, comparing the models guess to the actual y value
        for idx, i in enumerate(output):
            if torch.argmax(i) == y[idx]:
                correct += 1 # increment correct if the model has guessed correctly
            total += 1
        # find the loss for each batch and add it to the total loss.
        lossTotal += loss(net.forward(X.view(-1, 32*32), 'relu'), y)
        
print("Accuracy: ", round(correct/total, 3))
# to get the average loss, divide the total loss by the number of batches
loss = lossTotal / len(trainset)
print("Loss: ", loss)

Accuracy:  0.102
Loss:  tensor(2.3040)


In [None]:
# in this cell, we will do grid search for the hyper-parameters
# in the neuralNet we can change the # of hidden layers, # of neurons
# this dictionary will store the value of the validation_accuracy and validation_loss based on the hyper-parameters
grid_dictionary = {}
# initialization of the hyper-parameters
amount_of_neurons = [[512], [684, 344], [768, 512, 256], [819, 614, 409, 204]] 
activation_functions = ['relu', 'sigmoid']
learning_rates = [0.1, 0.01, 0.001, 0.0001]

# for loop to iterate over every combination of the hyper-parameters

for learning_rate in learning_rates:
    for activation_func in activation_functions:
        for no_layer in range(4):
            net = NeuralNet(32*32, 10, no_layer+1, amount_of_neurons[no_layer])
            epoch = net.training_the_model(learning_rate, activation_func, trainset, validationset)
            validation_accuracy, validation_loss = net.evaluatingModel(validationset, activation_func) 
            print("\n \n")
            print("epoch no: ", epoch, "  learning rate: ", learning_rate, "  activation_func: ", activation_func)
            print("number of layer: ", no_layer + 1, "  validation accuracy: ", validation_accuracy, "  validation loss: ", validation_loss)
            grid_dictionary[epoch, no_layer+1, activation_func, learning_rate] = [validation_accuracy, validation_loss]
        


 

epoch no:  3   learning rate:  0.1   activation_func:  relu
number of layer:  1   validation accuracy:  0.176   validation loss:  tensor(147.7524)

 

epoch no:  3   learning rate:  0.1   activation_func:  relu
number of layer:  2   validation accuracy:  0.098   validation loss:  tensor(2.3258)

 

epoch no:  4   learning rate:  0.1   activation_func:  relu
number of layer:  3   validation accuracy:  0.102   validation loss:  tensor(2.3179)

 

epoch no:  2   learning rate:  0.1   activation_func:  relu
number of layer:  4   validation accuracy:  0.099   validation loss:  tensor(2.3111)





 

epoch no:  4   learning rate:  0.1   activation_func:  sigmoid
number of layer:  1   validation accuracy:  0.174   validation loss:  tensor(8.3740)

 

epoch no:  3   learning rate:  0.1   activation_func:  sigmoid
number of layer:  2   validation accuracy:  0.102   validation loss:  tensor(3.8583)

 

epoch no:  2   learning rate:  0.1   activation_func:  sigmoid
number of layer:  3   validation accuracy:  0.102   validation loss:  tensor(3.7556)

 

epoch no:  3   learning rate:  0.1   activation_func:  sigmoid
number of layer:  4   validation accuracy:  0.102   validation loss:  tensor(2.9972)

 

epoch no:  2   learning rate:  0.01   activation_func:  relu
number of layer:  1   validation accuracy:  0.262   validation loss:  tensor(3.1704)

 

epoch no:  2   learning rate:  0.01   activation_func:  relu
number of layer:  2   validation accuracy:  0.138   validation loss:  tensor(2.2119)

 

epoch no:  4   learning rate:  0.01   activation_func:  relu
number of layer:  3   valid

In [None]:
# finding the training and validation loss of the best three configuration:
# first best
net = NeuralNet(32*32, 10, 4, [819, 614, 409, 204])
epoch = net.training_the_model(0.0001, 'relu', trainset, validationset)
validation_accuracy, validation_loss = net.evaluatingModel(validationset, 'relu') 
training_accuracy, training_loss = net.evaluatingModel(trainset, 'relu')
test_accuracy, test_loss = net.evaluatingModel(testset, 'relu')

print("For the best 1st model")
print("Validation loss: ", validation_loss)
print("Training loss: ", training_loss)
print("Test accuracy: ", test_accuracy)


#second best
net = NeuralNet(32*32, 10, 1, [512])
epoch = net.training_the_model(0.0001, 'relu', trainset, validationset)
validation_accuracy, validation_loss = net.evaluatingModel(validationset, 'relu') 
training_accuracy, training_loss = net.evaluatingModel(trainset, 'relu')
test_accuracy, test_loss = net.evaluatingModel(testset, 'relu')

print("\n \n")
print("For the best 2nd model")
print("Validation loss: ", validation_loss)
print("Training loss: ", training_loss)
print("Test accuracy: ", test_accuracy)

#third best
net = NeuralNet(32*32, 10, 2, [684, 344])
epoch = net.training_the_model(0.0001, 'relu', trainset, validationset)
validation_accuracy, validation_loss = net.evaluatingModel(validationset, 'relu') 
training_accuracy, training_loss = net.evaluatingModel(trainset, 'relu')
test_accuracy, test_loss = net.evaluatingModel(testset, 'relu')

print("\n \n")
print("For the best 3rd model")
print("Validation loss: ", validation_loss)
print("Training loss: ", training_loss)
print("Test accuracy: ", test_accuracy)


For the best 1st model
Validation loss:  tensor(1.6107)
Training loss:  tensor(1.2404)
Test accuracy:  0.45

 

For the best 2nd model
Validation loss:  tensor(1.6166)
Training loss:  tensor(1.2563)
Test accuracy:  0.449

 

For the best 3rd model
Validation loss:  tensor(1.6074)
Training loss:  tensor(1.2320)
Test accuracy:  0.454


In [None]:
import matplotlib.pyplot as plt

plt.plot(x, y)
plt.show()

In [None]:
# printing out the configurations
print("Epoch   ||   # of hidden layers   ||   activation function   ||   learning rate   ||   validation accuracy   ||   validation loss")
for key, value in grid_dictionary.items():
    validation_accuracy, validation_loss = value

    validation_loss = float(validation_loss)
    formatted_float = "{:.2f}".format(validation_loss)

    learning_rate = float(key[3])
    formatted_float = "{:.2f}".format(learning_rate)

    print(key[0], "%20.3f" % key[1], "%25s" % key[2], "%25s" % key[3], "%27f" % validation_accuracy, "%25s" % formatted_float)

{(3, 1, 'relu', 0.1): [0.176, tensor(147.7524)], (3, 2, 'relu', 0.1): [0.098, tensor(2.3258)], (4, 3, 'relu', 0.1): [0.102, tensor(2.3179)], (2, 4, 'relu', 0.1): [0.099, tensor(2.3111)], (4, 1, 'sigmoid', 0.1): [0.174, tensor(8.3740)], (3, 2, 'sigmoid', 0.1): [0.102, tensor(3.8583)], (2, 3, 'sigmoid', 0.1): [0.102, tensor(3.7556)], (3, 4, 'sigmoid', 0.1): [0.102, tensor(2.9972)], (2, 1, 'relu', 0.01): [0.262, tensor(3.1704)], (2, 2, 'relu', 0.01): [0.138, tensor(2.2119)], (4, 3, 'relu', 0.01): [0.142, tensor(2.2635)], (2, 4, 'relu', 0.01): [0.166, tensor(2.1923)], (2, 1, 'sigmoid', 0.01): [0.241, tensor(2.3933)], (2, 2, 'sigmoid', 0.01): [0.229, tensor(2.1025)], (4, 3, 'sigmoid', 0.01): [0.149, tensor(2.2439)], (2, 4, 'sigmoid', 0.01): [0.1, tensor(2.3052)], (3, 1, 'relu', 0.001): [0.384, tensor(1.7973)], (4, 2, 'relu', 0.001): [0.426, tensor(1.6862)], (5, 3, 'relu', 0.001): [0.423, tensor(1.6812)], (5, 4, 'relu', 0.001): [0.419, tensor(1.6835)], (8, 1, 'sigmoid', 0.001): [0.4, tensor(