<a href="https://colab.research.google.com/github/aviz123/PyTorch-Assignment/blob/main/Session_3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Import Libraries and Modules

In [1]:
import torch
import torchvision # provide access to datasets, models, transforms, utils, etc
import torchvision.transforms as transforms
from torch.utils.data import Dataset, TensorDataset, DataLoader
import numpy as np
import matplotlib.pyplot as plt
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchsummary import summary
from random import randint

torch.set_printoptions(linewidth=120)

# Setting up the device
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# Assuming that we are on a CUDA machine, this should print a CUDA device:
print("The device available is: ", device)

The device available is:  cpu


# Create Combined Dataset

In [2]:
# Create a class to combine MNIST dataset and random numbers between 0 and 9
class Combined_Dataset():

  # We pass the train variable to get train or test data, and batch_size
  def __init__(self, train, batch_size):

      self.batch_size = batch_size
      # Load the MNIST data into the data_loader object
      self.data_loader = torch.utils.data.DataLoader(
          torchvision.datasets.MNIST('/files/', train=train, download=True,
                                transform=torchvision.transforms.Compose([
                                  torchvision.transforms.ToTensor(),
                                  torchvision.transforms.Normalize(
                                    (0.1307,), (0.3081,))
                                ])),
          batch_size=self.batch_size, shuffle=True)

      # Number of samples in the dataaset
      self.dataset = self.data_loader.dataset            

  # getitem function creats batches of our dataset on the fly by calling next(iter())
  def __getitem__(self, index):
      # Extract one batch of the MNIST data_loader
      image, label = next(iter(self.data_loader))

      # Generate randoms numbers between 0 and 9 of size=batch_size. The datatype is float as this is the input required for the network
      random_numbers = torch.tensor([randint(0,9) for _ in range(self.batch_size)], dtype=torch.float32)

      # Combine inputs and outputs as a list after transfering the tensors to the GPU
      x = [image.to(device), random_numbers.to(device)]
      # y labels for addition of number is reshaped to [32,1] as MSE requires it in this format
      y = [label.to(device), (label+random_numbers).reshape([32,1]).to(device)]

      return x, y

  def __len__(self):
      return len(self.data_loader)

# Set the batch size
batch_size = 32

# Create the train and test dataset
train_data = Combined_Dataset(train=True, batch_size=batch_size)
test_data = Combined_Dataset(train=False, batch_size=batch_size)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to /files/MNIST/raw/train-images-idx3-ubyte.gz


HBox(children=(FloatProgress(value=0.0, max=9912422.0), HTML(value='')))


Extracting /files/MNIST/raw/train-images-idx3-ubyte.gz to /files/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to /files/MNIST/raw/train-labels-idx1-ubyte.gz


HBox(children=(FloatProgress(value=0.0, max=28881.0), HTML(value='')))


Extracting /files/MNIST/raw/train-labels-idx1-ubyte.gz to /files/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to /files/MNIST/raw/t10k-images-idx3-ubyte.gz


HBox(children=(FloatProgress(value=0.0, max=1648877.0), HTML(value='')))


Extracting /files/MNIST/raw/t10k-images-idx3-ubyte.gz to /files/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to /files/MNIST/raw/t10k-labels-idx1-ubyte.gz


HBox(children=(FloatProgress(value=0.0, max=4542.0), HTML(value='')))


Extracting /files/MNIST/raw/t10k-labels-idx1-ubyte.gz to /files/MNIST/raw

Processing...
Done!


  return torch.from_numpy(parsed.astype(m[2], copy=False)).view(*s)


In [3]:
print(f"Number of train batches: {len(train_data)}")
print(f"Number of test batches: {len(test_data)}")

print(f"Number of train samples: {len(train_data.dataset)}")
print(f"Number of test samples: {len(test_data.dataset)}")

x, y = next(iter(train_data))

print(f"Shape of input data is: [{x[0].shape}, {x[1].shape}]")
print(f"Shape of output data is: [{y[0].shape}, {y[1].shape}]")

Number of train batches: 1875
Number of test batches: 313
Number of train samples: 60000
Number of test samples: 10000
Shape of input data is: [torch.Size([32, 1, 28, 28]), torch.Size([32])]
Shape of output data is: [torch.Size([32]), torch.Size([32, 1])]


# Building the Network

In [4]:
# Build the classifier and addition network
class Network(nn.Module):
    def __init__(self):
        super().__init__()

        # Classifier Network
        self.input1 = nn.Conv2d(in_channels=1, out_channels=16, kernel_size=3) # output size = 26
        self.conv1 = nn.Conv2d(in_channels=16, out_channels=32, kernel_size=3) # 24
        self.conv2 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3) # 22
        self.pool = nn.MaxPool2d(2, 2) # 11

        # 1x1 convolution
        self.oneconv1 = nn.Conv2d(in_channels=64, out_channels=16, kernel_size=1) # 11
        
        self.conv3 = nn.Conv2d(in_channels=16, out_channels=32, kernel_size=3) # 9
        self.conv4 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3) # 7
        self.conv5 = nn.Conv2d(in_channels=64, out_channels=64, kernel_size=3) # 5
        self.conv6 = nn.Conv2d(in_channels=64, out_channels=10, kernel_size=5) # 1

        # Addition network using fully connected layers
        self.input2 = nn.Linear(in_features=2, out_features=5)
        self.layer1 = nn.Linear(in_features=5, out_features=5)
        self.out2 = nn.Linear(in_features=5, out_features=1)

    def forward(self, data1, data2):
        # Classifier Network forward prop
        # first block
        x = F.relu(self.input1(data1))
        x = F.relu(self.conv1(x))
        x = self.pool(F.relu(self.conv2(x)))
        x = F.relu(self.oneconv1(x))
        
        # second block
        x = F.relu(self.conv3(x))
        x = F.relu(self.conv4(x))
        x = F.relu(self.conv5(x))

        # third block
        x = self.conv6(x)
        output1 = torch.flatten(x, start_dim=1) # flatten all dimensions except batch      

        # Addition Network
        # Collect the output of the classifier network and select the index with maximum value
        x = torch.argmax(output1, dim=1)
        # Use torch.stack to create pairs of network outputs and random numbers
        x  = torch.stack((x.float(), data2), dim=1)
        
        # Pass the data through the addition network. No activation function required as addition of two numbers is a linear function
        x = self.input2(x)
        x = self.layer1(x)
        output2 = self.out2(x)

        # Return outputs from both the classifier and addition network
        return output1, output2

In [5]:
# Create an object of the class Network and transfer it to the GPU
model = Network().to(device)
print(" The model layers are: ")
print(model)

print("\nShape of parameters in each layer is: ")
for name, param in model.named_parameters():
    print(name, '\t\t', param.shape)

# Selecting the loss function and optimizer  for the model
CE_loss = nn.CrossEntropyLoss()
MSE_loss = nn.MSELoss()
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

 The model layers are: 
Network(
  (input1): Conv2d(1, 16, kernel_size=(3, 3), stride=(1, 1))
  (conv1): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1))
  (conv2): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (oneconv1): Conv2d(64, 16, kernel_size=(1, 1), stride=(1, 1))
  (conv3): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1))
  (conv4): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1))
  (conv5): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1))
  (conv6): Conv2d(64, 10, kernel_size=(5, 5), stride=(1, 1))
  (input2): Linear(in_features=2, out_features=5, bias=True)
  (layer1): Linear(in_features=5, out_features=5, bias=True)
  (out2): Linear(in_features=5, out_features=1, bias=True)
)

Shape of parameters in each layer is: 
input1.weight 		 torch.Size([16, 1, 3, 3])
input1.bias 		 torch.Size([16])
conv1.weight 		 torch.Size([32, 16, 3, 3])
conv1.bias 		 torch.Size([32])
conv2.weight 		 tor

# Training the Model

In [6]:
for epoch in range(10):  # Loop over the dataset multiple times

    total_loss = 0.0
    total_correct_1, total_correct_2 = 0, 0
    # Loop over the entire length of train data
    for i in range(len(train_data)):
        # Get the inputs and outputs
        # Input data x is a list of [images, random numbers], output data y is a list of [classes, sum of numbers]
        x, y = next(iter(train_data))

        # Zero the parameter gradients
        optimizer.zero_grad()

        # Forward + Backward + Optimize
        output1, output2 = model(x[0], x[1])
        # Use the CE loss for classification and MSE loss for addition 
        loss = CE_loss(output1, y[0]) + MSE_loss(output2, y[1])
        loss.backward()
        optimizer.step()

        # Calculate statistics
        total_loss += loss.item()
        total_correct_1 += output1.argmax(dim=1).eq(y[0]).sum().item()
        total_correct_2 += (torch.round(output2) == torch.round(y[1])).sum().item()
        
    # Print statistics        
    print(f"Epoch: {epoch+1}, loss: {total_loss}, Classification Acc: {100 * (total_correct_1/(len(train_data.dataset)))}, Addition Acc: {100 * (total_correct_2/(len(train_data.dataset)))}")
    
print('Finished Training')

Epoch: 1, loss: 15730.5960277915, Classification Acc: 39.335, Addition Acc: 24.92
Epoch: 2, loss: 1750.585687068291, Classification Acc: 95.72166666666668, Addition Acc: 88.07166666666667
Epoch: 3, loss: 960.6274697096087, Classification Acc: 97.65, Addition Acc: 95.20166666666667
Epoch: 4, loss: 815.7211702149361, Classification Acc: 98.13499999999999, Addition Acc: 96.22500000000001
Epoch: 5, loss: 646.2290848720586, Classification Acc: 98.52666666666666, Addition Acc: 97.89333333333333
Epoch: 6, loss: 484.78688704257365, Classification Acc: 98.84166666666667, Addition Acc: 98.215
Epoch: 7, loss: 385.7182908653631, Classification Acc: 99.08166666666666, Addition Acc: 98.7
Epoch: 8, loss: 363.83727842749795, Classification Acc: 99.14500000000001, Addition Acc: 98.895
Epoch: 9, loss: 276.718324864225, Classification Acc: 99.33666666666666, Addition Acc: 99.28833333333333
Epoch: 10, loss: 273.10551575793943, Classification Acc: 99.31833333333333, Addition Acc: 99.27833333333334
Finished

# Evaluating the Model

In [8]:
correct_1, correct_2 = 0, 0
total_1, total_2 = 0, 0

# Since we're not training, we don't need to calculate the gradients for our outputs
with torch.no_grad():
    # Loop over the entire length of test data
    for i in range(len(test_data)):
        # Get the inputs and outputs
        # Input data x is a list of [images, random numbers], output data y is a list of [classes, sum of numbers]
        x, y = next(iter(test_data))

        # Calculate outputs by running data through the network 
        output1, output2 = model(x[0], x[1])

        # The class with the highest energy is what we choose as prediction
        _, predicted = torch.max(output1.data, 1)
        total_1 += y[0].size(0)
        # Calculate number of correction predictions for classifier
        correct_1 += (predicted == y[0]).sum().item()

        total_2 += y[1].to(device).size(0)
        # Calculate number of correction predictions for addition
        correct_2 += (torch.round(output2) == torch.round(y[1])).sum().item()

print('Accuracy of the network on the 10,000 test images: ', (100 * correct_1 / total_1))
print('Accuracy of the network on the 10,000 test images: ', (100 * correct_2 / total_2))

Accuracy of the network on the 10,000 test images:  98.44249201277955
Accuracy of the network on the 10,000 test images:  98.44249201277955


# Sample Prediction on a batch

In [10]:
print("Print Prediction for a sample batch\n")
x, y = next(iter(test_data))

# Calculate outputs by running data through the network 
output1, output2 = model(x[0], x[1])

# The class with the highest energy is what we choose as prediction
_, predicted = torch.max(output1.data, 1)

print("Predicted vs. Actual for classifer\n")
print(torch.stack((predicted, y[0]), dim=1)[:10])

Print Prediction for a sample batch

Predicted vs. Actual for classifer

tensor([[7, 7],
        [2, 2],
        [4, 4],
        [9, 9],
        [1, 1],
        [8, 8],
        [2, 2],
        [5, 5],
        [3, 3],
        [3, 3]])


In [11]:
print("Predicted vs. Predicted Rounded off vs. Actual for addition\n")

print(torch.stack((output2, torch.round(output2), y[1]), dim=1)[:5])

Predicted vs. Predicted Rounded off vs. Actual for addition

tensor([[[14.0593],
         [14.0000],
         [14.0000]],

        [[ 6.0757],
         [ 6.0000],
         [ 6.0000]],

        [[13.1248],
         [13.0000],
         [13.0000]],

        [[ 9.9472],
         [10.0000],
         [10.0000]],

        [[ 4.0731],
         [ 4.0000],
         [ 4.0000]]], grad_fn=<SliceBackward>)
