In [1]:
# we import print_function from __future__. This will help us to use newer print utility.

from __future__ import print_function 
import torch 
import torch.nn as nn 
import torch.nn.functional as F 
import torch.optim as optim
import torchvision
from torchvision import datasets, transforms

In [2]:
# defining network architecture

class Net(nn.Module): 
    def __init__(self): 
        super(Net, self).__init__() 

        #Below layers are for extracting image features
        self.conv1 = nn.Conv2d(1, 32, 3, padding=1) 
        self.conv2 = nn.Conv2d(32, 64, 3, padding=1)
        self.pool1 = nn.MaxPool2d(2, 2)
        self.conv3 = nn.Conv2d(64, 128, 3, padding=1)
        self.conv4 = nn.Conv2d(128, 256, 3, padding=1)
        self.pool2 = nn.MaxPool2d(2, 2)
        self.conv5 = nn.Conv2d(256, 512, 3)
        self.conv6 = nn.Conv2d(512, 1024, 3)
        self.conv7 = nn.Conv2d(1024, 10, 3)
        
        #Below are fully connected layers for summation part
        self.fc1 = nn.Linear(10, 10)
        self.fc2 = nn.Linear(20, 19)
        
    #forward pass
    def forward(self, x, x1):
        #converting image and summation to float32
        x = x.type(torch.float32)
        x1 = x1.type(torch.float32)

        #performing forward pass on image via convulation layers
        x = self.pool1(F.relu(self.conv2(F.relu(self.conv1(x)))))
        x = self.pool2(F.relu(self.conv4(F.relu(self.conv3(x)))))
        x = F.relu(self.conv6(F.relu(self.conv5(x))))
        x = self.conv7(x)
        x = x.view(-1, 10)
        
        #first FC layer taking x1 random number as one_hot
        x1 = F.relu(self.fc1(x1))
        #concatinating output from FC1 and tensor from conv layers
        x1 = torch.cat([x,x1], dim=1)
        x1 = F.relu(self.fc2(x1))
        
        #returning two outputs : log_softmax of image prediction x and summation prediction x1
        return F.log_softmax(x), F.log_softmax(x1)

In [3]:
#setting up cuda devices for GPU training

use_cuda = torch.cuda.is_available() 
device = torch.device("cuda" if use_cuda else "cpu")
model = Net().to(device)

In [4]:
#Extracting MNIST dataset 
data_mnist_train = torchvision.datasets.mnist.MNIST(root = '/data', train=True, download=True, 
                                               transform=transforms.Compose([transforms.ToTensor()]))

data_mnist_test = torchvision.datasets.mnist.MNIST(root = '/data', train=False, download=True, 
                                               transform=transforms.Compose([transforms.ToTensor()]))

torch.manual_seed(1)
import random

#custom Dataset module to include random numbers with MNIST dataset
class cus_mnist_data():
    
    # setup dataset for training and testing based on train flag
    def __init__(self, data_mnist):
        self.data_mnist = data_mnist
    
    #Function to generate random number and one_hot encode it 
    def generate_rand_num(label):
        num = random.randint(0,9)
        num_hot = torch.nn.functional.one_hot(torch.tensor(num), num_classes=10)
        output = label + num
        #print(f"summation output {output} and num is {num} \n")
        output_hot = torch.nn.functional.one_hot(torch.tensor(output), num_classes=19)
        label_hot = torch.nn.functional.one_hot(torch.tensor(label), num_classes=10)
        #concat_output = torch.cat([label_hot,output_hot]) 
        #return num_hot, concat_output
        return num_hot, output_hot, label_hot
    
    # getitem with random numbers
    def __getitem__(self, indx):
        image, label = self.data_mnist[indx]
        #num_hot, concat_output = cus_mnist_data.generate_rand_num(label)
        #return (image, num_hot, concat_output)
        num_hot, output_hot, label_hot = cus_mnist_data.generate_rand_num(label)
        #print(f"label is {label}")
        return (image,label,num_hot,output_hot)
    
    def __len__(self):
        return len(self.data_mnist)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to /data/MNIST/raw/train-images-idx3-ubyte.gz


  0%|          | 0/9912422 [00:00<?, ?it/s]

Extracting /data/MNIST/raw/train-images-idx3-ubyte.gz to /data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to /data/MNIST/raw/train-labels-idx1-ubyte.gz


  0%|          | 0/28881 [00:00<?, ?it/s]

Extracting /data/MNIST/raw/train-labels-idx1-ubyte.gz to /data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to /data/MNIST/raw/t10k-images-idx3-ubyte.gz


  0%|          | 0/1648877 [00:00<?, ?it/s]

Extracting /data/MNIST/raw/t10k-images-idx3-ubyte.gz to /data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to /data/MNIST/raw/t10k-labels-idx1-ubyte.gz


  0%|          | 0/4542 [00:00<?, ?it/s]

Extracting /data/MNIST/raw/t10k-labels-idx1-ubyte.gz to /data/MNIST/raw



In [5]:
#testing our custom Dataset module
my_data_train = cus_mnist_data(data_mnist_train)
my_data_test = cus_mnist_data(data_mnist_test)
image, label, one_hot, one_hot_label = my_data_train[1234]
image.shape, label ,one_hot.shape,one_hot_label.shape

print(f"label is {label}, random no. is - {one_hot.argmax().item()} and summation is {one_hot_label.argmax().item()}")
print(f"one_hot of random no. - {one_hot} & summation - {one_hot_label}")
print(f"size of train dataset {len(my_data_train)} and test dataset {len(my_data_test)}")


label is 3, random no. is - 6 and summation is 9
one_hot of random no. - tensor([0, 0, 0, 0, 0, 0, 1, 0, 0, 0]) & summation - tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0])
size of train dataset 60000 and test dataset 10000


In [6]:
#setup data loaders for training and evaluting with batch size 16
torch.manual_seed(1)
batch_size = 128
kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {}

from torch.utils.data import DataLoader
train_loader = DataLoader(my_data_train, batch_size = batch_size, shuffle=True, **kwargs)
test_loader = DataLoader(my_data_test, batch_size = batch_size, shuffle=True, **kwargs)

In [7]:
from tqdm import tqdm # a python library utility libray which provide progress-bar for iterative function

#defining training loop
def train(model, device, train_loader, optimizer, epoch):
    model.train() 
    pbar = tqdm(train_loader) 
    for batch_idx, batch_data in enumerate(pbar): 
        data, target, one_hot, one_hot_label = batch_data
        one_hot_label = one_hot_label.type(torch.float32)
        
        data, target, one_hot, one_hot_label = data.to(device), target.to(device), one_hot.to(device), one_hot_label.to(device) # set computer device for data and target tensor
        optimizer.zero_grad() # initilize gradients of optimizer to zero
        
        #forward pass giving two outputs `image->output` and `random no.->hot_output`
        output, hot_output = model(data, one_hot) 
        
        #calculating loss1 which is cross-entropy loss on image prediction and label
        loss1 = F.cross_entropy(output, target)
        
        #loss2 which is cross-entropy loss on random no. summation prediction and sum label.
        loss2 = F.cross_entropy(hot_output, one_hot_label)
        
        #combining both losses 
        loss = loss1 + loss2
        
        loss.backward() # now we perform backward pass to calculate gradients 
        optimizer.step() # this will update the value of parameters/weights
        pbar.set_description(desc= f'loss1={loss1.item()} loss2={loss2.item()} batch_id={batch_idx}') # display loss of each iteration.

#defining evaluation loop
def test(model, device, test_loader):
    model.eval() # will set model into evaluate state i.e. setting training flag to False
    test_loss1_sum = 0
    test_loss2_sum = 0
    correct_img_sum = 0
    correct_rand_sum = 0
    with torch.no_grad(): # switch off gradient computation
        for indx, batch_data in enumerate(test_loader): # iterate through test_loader datasets
            data,target,one_hot,one_hot_label = batch_data
            one_hot_label = one_hot_label.type(torch.float32)
            data, target, one_hot, one_hot_label = data.to(device), target.to(device), one_hot.to(device), one_hot_label.to(device)
            output, hot_output = model(data, one_hot) # perfrom prediction on test data
            test_loss1 = F.nll_loss(output, target, reduction='sum').item()  # calcuate and sum up batch loss
            test_loss2 = F.cross_entropy(hot_output, one_hot_label)

            #accumulate both losses, loss1 for image prediction and loss2 for summation
            test_loss1_sum += test_loss1
            test_loss2_sum += test_loss2
    
            pred = output.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
            pred_hot = hot_output.argmax(dim=1, keepdim=True)
            one_hot_label = one_hot_label.argmax(dim=1)
            
            correct = pred.eq(target.view_as(pred)).sum().item() # count number of correct predection using target/ground-truth and predicted value.
            correct_hot = pred_hot.eq(one_hot_label.view_as(pred_hot)).sum().item()
            
            #accumulate correct prediction for image and random summation
            correct_img_sum += correct
            correct_rand_sum += correct_hot

            #print information for every batch
            print(f"losses for image and summation is - loss1-{test_loss1}, loss2-{test_loss2} \n correctly predicted for this batch is img-{correct}, sum-{correct_hot}")
            

    test_loss1_sum /= len(test_loader.dataset) # calculate avearge loss across no. of data_mnist_test
    test_loss2_sum /= len(test_loader.dataset)

    print('\nTest set: Average loss for image prediction: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        test_loss1_sum, correct, len(test_loader.dataset),
        100. * correct_img_sum / len(test_loader.dataset)))  # print loss and accuracy on the terminal
    print('\nTest set: Average loss for summation prediction: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        test_loss2_sum, correct, len(test_loader.dataset),
        100. * correct_rand_sum / len(test_loader.dataset)))
     

In [8]:
model = Net().to(device) # initilize instance of Net() and then set which device to use
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9) # set optimizer for training

for epoch in range(1, 2):
    train(model, device, train_loader, optimizer, epoch) # call train() function for training
    test(model, device, test_loader) # call test() function for evaluating model on test dataset
     

  return F.log_softmax(x), F.log_softmax(x1)
loss1=0.08650920540094376 loss2=2.5102756023406982 batch_id=468: 100%|██████████| 469/469 [00:24<00:00, 19.09it/s]


losses for image and summation is - loss1-9.805788040161133, loss2-2.575316905975342 
 correctly predicted for this batch is img-126, sum-20
losses for image and summation is - loss1-14.42479419708252, loss2-2.5599513053894043 
 correctly predicted for this batch is img-124, sum-16
losses for image and summation is - loss1-13.498323440551758, loss2-2.620546340942383 
 correctly predicted for this batch is img-124, sum-12
losses for image and summation is - loss1-11.002345085144043, loss2-2.5966908931732178 
 correctly predicted for this batch is img-125, sum-19
losses for image and summation is - loss1-7.437155246734619, loss2-2.5997414588928223 
 correctly predicted for this batch is img-127, sum-16
losses for image and summation is - loss1-22.626998901367188, loss2-2.5979952812194824 
 correctly predicted for this batch is img-122, sum-15
losses for image and summation is - loss1-18.54871368408203, loss2-2.653792381286621 
 correctly predicted for this batch is img-123, sum-11
losses

## Conclusion
1. First we created a custom module from `nn.module` to define our architecture. We used 7 convolutaion layers and 2 pooling layers to extract features from MNIST image and 2 Fully Connected layers for summation part. 
<br>
First we pass image tensor `x` from conv layers and then we also pass random number `x1` from FC1. Then we concat `x` and `x1` and then pass it to FC2 and return softmax of x and x1 as output 
<br>
<br>
2. Then we created a custom dataset where we define function `generate_rand_num()`to generate random number and we merged this with existing MNIST dataset. Each index in dataset contains a tuple with four values - (image, image label, random number in onehot, random+label in onehot)
<br>
<br>
3. Finally we create functions for training and testing which will iterate through batches.