In [1]:
# Import the required modules
import torch
torch.manual_seed(0) 
import torch.nn as nn # more object oriented
import torch.nn.functional as F # more functional
# torchvision is used for image and video transformations. It also has its own datasets.
import torchvision.transforms as T
from torchvision.datasets import MNIST
from torch.utils.data import random_split, DataLoader
from tqdm import tqdm
import ssl
import torch.optim as optim
ssl._create_default_https_context = ssl._create_unverified_context

In [2]:
from torchvision.datasets import CIFAR10 # torchvision has its own datasets so we can import from there directly
import torchvision.transforms as T

# preprocessing (normalization) of the training data
train_transform = T. Compose ([
# can add additional transforms on images
T. ToTensor () , # convert images to PyTorch tensors, which are arrays
T. Grayscale () , # RGB to grayscale
T. Normalize ( mean =(0.5 ,) , std=(0.5 ,) ) # normalization
# speeds up the convergence
# and improves the accuracy
])

# preprocessing (normalization) of the testing data
val_transform = test_transform = T. Compose ([
T. ToTensor () ,
T. Grayscale () ,
T. Normalize ( mean =(0.5 ,) , std=(0.5 ,) )
])


# downloading our data separately for both the train and test sets and apply the transformation (preprocessing) on it.
train_set = CIFAR10 ( root = 'CIFAR10', train =True ,transform = train_transform , download = True )
test_set = CIFAR10 ( root = 'CIFAR10', train =False , transform = test_transform , download = True )


Files already downloaded and verified
Files already downloaded and verified


In [3]:
# separate the validation and training set. validation set is used to tune 
# the hyper-parameters, e.g learning-rate, number of hidden layers, etc.
# we do the separation before creating batches of the train and test set 
# 80 percent of the train_set will be kept as the train set and 20 percent will be used as the validation.
train_set_size = int(len(train_set) * 0.8)
valid_set_size = len(train_set) - train_set_size
trainset, validationset = torch.utils.data.random_split(train_set, [train_set_size, valid_set_size])


In [4]:
# divide our data into batches and shuffle them.
trainset = DataLoader(trainset, batch_size = 5, shuffle=True)
testset = DataLoader(test_set, batch_size = 5, shuffle=True)
validationset = DataLoader(validationset, batch_size = 5, shuffle=True) 

# specify the classes
classes = {'airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 
            'frog', 'horse', 'ship', 'truck'}

In [5]:
# count the number of varying objects
total = 0
counter_dict = {0:0, 1:0, 2:0, 3:0, 4:0, 5:0, 6:0, 7:0, 8:0, 9:0}

for data in trainset:
    xs, ys = data # input and its label
    for y in ys: 
        # count the number of individual objects inside the dataset
        counter_dict[int(y)] += 1 
        total+=1
        
# it is observed that they are all equally present
# so our data set is balanced.
print(counter_dict)


{0: 4014, 1: 4002, 2: 3995, 3: 3977, 4: 3982, 5: 4019, 6: 3975, 7: 4010, 8: 4010, 9: 4016}


In [6]:
# this is our neural network!!
class NeuralNet(nn.Module):
    def __init__(self, input_size, output_size, num_hidden_layers, hidden_layer_size):
        super(NeuralNet, self).__init__() # initialize the super class 
        
        self.num_hidden_layers = num_hidden_layers
        self.deep_nn = nn.Sequential()
        
        # creating the input and the hidden layers. the activation function used is relu.
        for i in range(num_hidden_layers + 1):
            self.deep_nn.add_module(f'ff{i}', nn.Linear(input_size, hidden_layer_size))
            # after creating the input layer, the value of input_size needs to change
            input_size = hidden_layer_size 
            
        self.deep_nn.add_module(f'classifier', nn.Linear(hidden_layer_size, output_size))
   
    # passing of the data
    def forward(self, data, activation_func):
        # the activation function for the input and the hidden layers is relu
        for i in range((self.num_hidden_layers) + 1):
            if activation_func == 'relu':
                data = F.relu(self.deep_nn[i](data))
            elif activation_func == 'sigmoid':
                data = F.sigmoid(self.deep_nn[i](data))
            elif activation_func == 'Tanh':
                data = F.tanh(self.deep_nn[i](data))
        # the activation function for the output layer is softmax but we aren't going to initialize that now
        # because the loss function crossEntropyLoss already applies softmax.
        return data
    
    # loss function is used to measure how well the model (neural network) is doing
    # On the other hand, the optimizer tries to adjust the weights and biases in such a way to minimize the loss function.
    def training_the_model(self, learning_rate, activation_func, trainset):
        # we chose Adam as our optimizer.
        # the first argument we passed in is the list of parameters that we want the optimizer to work on.
        # e.g if we want out optimizer to only adjust some weights and not the others, we can control that here.
        # the second argument specifies the learning rate.
        optimizer = optim.Adam(self.parameters(), learning_rate)

        # Epoch is a pass through the whole data
        EPOCHS = 3
        # choosing our loss function
        loss = nn.CrossEntropyLoss()
        for epoch in range(EPOCHS):
            # data is a batch of featuresets and labels
            for data in trainset: 
                # here we are separating the featureset and labels
                X, y = data
                # pass our input through the neural network
                # view(-1) flattens a tensor in PyTorch = brings all the rows one after another
                output = self.forward(X.view(-1, 32*32), activation_func)
                # calculate the loss by comparing the model's predicted guess and the actual label
                # the crossEntropyLoss was the loss function that we were required to use.
                l = loss(output, y)
                l.backward()
                # this will adjust the weights
                # step() makes the optimizer iterate over all parameters (tensors) it is supposed to update 
                # and use their internally stored grad to update their values.
                optimizer.step() 
            
    def evaluatingModel(self, validationset, activation_funct):
        correct = 0
        total = 0
        lossTotal = 0

        # choosing our loss function
        loss = nn.CrossEntropyLoss()

        with torch.no_grad():
            # data is a batch of featuresets and labels
            for data in validationset:
                X, y = data
                output = self.forward(X.view(-1, 32*32), activation_funct)
                # here we are evaluating our model. basically, comparing the models guess to the actual y value
                for idx, i in enumerate(output):
                    if torch.argmax(i) == y[idx]:
                        correct += 1 # increment correct if the model has guessed correctly
                    total += 1
                # find the loss for each batch and add it to the total loss.
                lossTotal += loss(self.forward(X.view(-1, 32*32), activation_funct), y)

        #print("Accuracy: ", round(correct/total, 3))
        # to get the average loss, divide the total loss by the number of batches
        loss = lossTotal / len(validationset)
        #print("Loss: ", loss)
        return round(correct/total, 3), loss
            
        
        
net = NeuralNet(32*32, 10, 1, 64)
print(net)

NeuralNet(
  (deep_nn): Sequential(
    (ff0): Linear(in_features=1024, out_features=64, bias=True)
    (ff1): Linear(in_features=64, out_features=64, bias=True)
    (classifier): Linear(in_features=64, out_features=10, bias=True)
  )
)


In [7]:
# sanity check
# This is used to see the value of the accuracy and loss on the untrained network and compare it to my guess.
correct = 0
total = 0
lossTotal = 0

# choosing our loss function
loss = nn.CrossEntropyLoss()

with torch.no_grad():
    # data is a batch of featuresets and labels
    for data in trainset:
        X, y = data
        output = net.forward(X.view(-1, 32*32), 'relu')
        # here we are evaluating our model. basically, comparing the models guess to the actual y value
        for idx, i in enumerate(output):
            if torch.argmax(i) == y[idx]:
                correct += 1 # increment correct if the model has guessed correctly
            total += 1
        # find the loss for each batch and add it to the total loss.
        lossTotal += loss(net.forward(X.view(-1, 32*32), 'relu'), y)
        
print("Accuracy: ", round(correct/total, 3))
# to get the average loss, divide the total loss by the number of batches
loss = lossTotal / len(trainset)
print("Loss: ", loss)

Accuracy:  0.001
Loss:  tensor(4.1862)


In [8]:
# in this cell, we will do grid search for the hyper-parameters
# in the neuralNet we can change the # of hidden layers, # of neurons
# i research about it and it was said that the # of neurons in the hidden layer 
# is approx. equal to (2/3) * # of neurons in the hidden layer, hence I chose the below values
# this dictionary will store the value of the validation_accuracy and validation_loss based on the hyper-parameters
grid_dictionary = {}
# initialization of the hyper-parameters
amount_of_neurons = [450, 550, 650, 750] 
activation_functions = ['relu', 'sigmoid', 'Tanh']
learning_rates = [0.01, 0.001, 0.0001]
# for loop to iterate over every combination of the hyper-parameters
for no_layer in range(5):
    for activation_func in activation_functions:
        for learning_rate in learning_rates:
            for no_neurons in amount_of_neurons:
                net = NeuralNet(32*32, 10, no_layer+1, no_neurons)
                net.training_the_model(learning_rate, activation_func, trainset)
                validation_accuracy, validation_loss = net.evaluatingModel(validationset, activation_func) 
                print(validation_accuracy, validation_loss)
                grid_dictionary[no_layer+1, activation_func, learning_rate, no_neurons] = [validation_accuracy, validation_loss]
        


0.099 tensor(6.1093)
0.099 tensor(6.3098)
0.099 tensor(6.4769)
0.099 tensor(6.6201)
0.099 tensor(6.1093)
0.099 tensor(6.3098)
0.099 tensor(6.4769)
0.099 tensor(6.6201)
0.099 tensor(6.1093)
0.208 tensor(121.3431)
0.099 tensor(6.4769)
0.099 tensor(6.6201)




0.099 tensor(5.4362)
0.099 tensor(5.5328)
0.1 tensor(5.7947)
0.099 tensor(5.8370)
0.099 tensor(5.1466)
0.099 tensor(5.3407)
0.099 tensor(5.5030)
0.099 tensor(5.6426)
0.099 tensor(5.1466)
0.099 tensor(5.3407)
0.099 tensor(5.5030)
0.099 tensor(5.6426)




0.126 tensor(5.7092)
0.14 tensor(5.9062)
0.118 tensor(6.0550)
0.11 tensor(6.4352)
0.139 tensor(5.4457)
0.128 tensor(5.6002)
0.129 tensor(5.8242)
0.128 tensor(5.9480)
0.128 tensor(5.2419)
0.132 tensor(5.4468)
0.127 tensor(5.5980)
0.142 tensor(5.7529)
0.099 tensor(6.1093)
0.099 tensor(6.3098)
0.099 tensor(6.4769)
0.099 tensor(6.6201)
0.099 tensor(6.1093)
0.099 tensor(6.3098)
0.099 tensor(6.4769)
0.099 tensor(6.6201)
0.102 tensor(4.3570)
0.099 tensor(5.0754)
0.102 tensor(4.1485)
0.098 tensor(5.0593)
0.099 tensor(5.2414)
0.099 tensor(5.6346)
0.101 tensor(5.8919)
0.099 tensor(5.9332)
0.099 tensor(5.1466)
0.099 tensor(5.3407)
0.099 tensor(5.5030)
0.099 tensor(5.6426)
0.099 tensor(5.1466)
0.099 tensor(5.3407)
0.099 tensor(5.5030)
0.099 tensor(5.6426)
0.13 tensor(5.8429)
0.129 tensor(5.9153)
0.112 tensor(6.1754)
0.131 tensor(6.3411)
0.136 tensor(5.5349)
0.13 tensor(5.6510)
0.131 tensor(5.8278)
0.132 tensor(6.0264)
0.135 tensor(5.2513)
0.117 tensor(5.4398)
0.143 tensor(5.7263)
0.137 tensor(5.79

In [10]:
print(grid_dictionary)

{(1, 'relu', 0.01, 450): [0.099, tensor(6.1093)], (1, 'relu', 0.01, 550): [0.099, tensor(6.3098)], (1, 'relu', 0.01, 650): [0.099, tensor(6.4769)], (1, 'relu', 0.01, 750): [0.099, tensor(6.6201)], (1, 'relu', 0.001, 450): [0.099, tensor(6.1093)], (1, 'relu', 0.001, 550): [0.099, tensor(6.3098)], (1, 'relu', 0.001, 650): [0.099, tensor(6.4769)], (1, 'relu', 0.001, 750): [0.099, tensor(6.6201)], (1, 'relu', 0.0001, 450): [0.099, tensor(6.1093)], (1, 'relu', 0.0001, 550): [0.208, tensor(121.3431)], (1, 'relu', 0.0001, 650): [0.099, tensor(6.4769)], (1, 'relu', 0.0001, 750): [0.099, tensor(6.6201)], (1, 'sigmoid', 0.01, 450): [0.099, tensor(5.4362)], (1, 'sigmoid', 0.01, 550): [0.099, tensor(5.5328)], (1, 'sigmoid', 0.01, 650): [0.1, tensor(5.7947)], (1, 'sigmoid', 0.01, 750): [0.099, tensor(5.8370)], (1, 'sigmoid', 0.001, 450): [0.099, tensor(5.1466)], (1, 'sigmoid', 0.001, 550): [0.099, tensor(5.3407)], (1, 'sigmoid', 0.001, 650): [0.099, tensor(5.5030)], (1, 'sigmoid', 0.001, 750): [0.0

In [2]:
! pip3 install matplotlib



You should consider upgrading via the 'C:\Users\hevra\AppData\Local\Programs\Python\Python39\python.exe -m pip install --upgrade pip' command.



