<a href="https://colab.research.google.com/github/askmuhsin/weights_heist_eva7/blob/main/S2.5/mnist_plus_random_num_nn.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
! pip install torchsummary



In [2]:
import torch
import torchvision
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torchsummary import summary

from torchvision import datasets, transforms
from torch.utils.data import Dataset, DataLoader

from tqdm.notebook import tqdm
import matplotlib.pyplot as plt
import numpy as np

# Setup

In [3]:
use_cuda = torch.cuda.is_available()  ## on which hardware the model should be trained on
device = torch.device("cuda" if use_cuda else "cpu")  ## on which hardware the model should be trained on
device

device(type='cuda')

# Dataset

In [4]:
class MnistPlusNum(Dataset):
    
    def __init__(self, train=True):
        self.train = train
        self.mnist = datasets.MNIST(
            root='./datasets/mnist_data/',
            train=self.train,
            download=True,
            transform=transforms.Compose([
              transforms.ToTensor(),
              transforms.Normalize((0.1307,), (0.3081,)),
            ])
        )
        print(self)
        
    def __len__(self):
        return len(self.mnist)
    
    def __getitem__(self, idx):
        img_data = self.mnist[idx][0]
        img_label = self.mnist[idx][1]
        
        rand_num = torch.randint(low=0, high=10, size=(1,))
        num_data = torch.zeros(1, 10)
        num_data[0, rand_num] = 1
        
        num_label = rand_num + img_label
        
        return img_data, img_label, num_data, num_label
    
    def __repr__(self):
        return 'Loaded MNIST "{}" dataset with random number added to it. Data size - {}'.format(
            'train' if self.train else 'test', self.__len__()
        )


In [5]:
dataset = MnistPlusNum(train=False)
loader = DataLoader(dataset=dataset, batch_size=1, shuffle=False)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ./datasets/mnist_data/MNIST/raw/train-images-idx3-ubyte.gz


  0%|          | 0/9912422 [00:00<?, ?it/s]

Extracting ./datasets/mnist_data/MNIST/raw/train-images-idx3-ubyte.gz to ./datasets/mnist_data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ./datasets/mnist_data/MNIST/raw/train-labels-idx1-ubyte.gz


  0%|          | 0/28881 [00:00<?, ?it/s]

Extracting ./datasets/mnist_data/MNIST/raw/train-labels-idx1-ubyte.gz to ./datasets/mnist_data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ./datasets/mnist_data/MNIST/raw/t10k-images-idx3-ubyte.gz


  0%|          | 0/1648877 [00:00<?, ?it/s]

Extracting ./datasets/mnist_data/MNIST/raw/t10k-images-idx3-ubyte.gz to ./datasets/mnist_data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ./datasets/mnist_data/MNIST/raw/t10k-labels-idx1-ubyte.gz


  0%|          | 0/4542 [00:00<?, ?it/s]

Extracting ./datasets/mnist_data/MNIST/raw/t10k-labels-idx1-ubyte.gz to ./datasets/mnist_data/MNIST/raw

Loaded MNIST "test" dataset with random number added to it. Data size - 10000


  return torch.from_numpy(parsed.astype(m[2], copy=False)).view(*s)


In [6]:
for batch_num, (img_data, img_label, num_data, out_label) in enumerate(loader):
    print('img_data : ', img_data.shape)
    print('img_label : ', img_label)
    print('num_data : \n\tvalue - {}\n\tshape - {}\n\ttensor - {}'.format(
      num_data.argmax(dim=-1), num_data.shape, num_data)
    )
    print('out_label : \n\tvalue - {}\n\tshape - {}\n\ttensor - {}'.format(
      out_label.item(), out_label.shape, out_label)
    )
    break

img_data :  torch.Size([1, 1, 28, 28])
img_label :  tensor([7])
num_data : 
	value - tensor([[4]])
	shape - torch.Size([1, 1, 10])
	tensor - tensor([[[0., 0., 0., 0., 1., 0., 0., 0., 0., 0.]]])
out_label : 
	value - 11
	shape - torch.Size([1, 1])
	tensor - tensor([[11]])


# Model

In [7]:
class Net(nn.Module): ## this is the standard way of creating
    def __init__(self): ## the fn gets called at the time of class creation
        super(Net, self).__init__() ## provide access of parent class methods nn.Module.__init__(self, 'Net')
        self.conv1 = nn.Conv2d(1, 32, 3, padding=1)     #input 28X28X1    | Output  28X28X32    | RF 3X3
        self.conv2 = nn.Conv2d(32, 64, 3, padding=1)    #input 28X28X32   | Output  28X28X64    | RF 5X5
        self.pool1 = nn.MaxPool2d(2, 2)                 #input 28X28X64   | Output  14X14X64    | RF 7X7
        self.conv3 = nn.Conv2d(64, 128, 3, padding=1)   #input 14X14X64   | Output  14X14X128   | RF 14X14
        self.conv4 = nn.Conv2d(128, 256, 3, padding=1)  #input 14X14X128  | Output  14X14X256   | RF 16X16
        self.pool2 = nn.MaxPool2d(2, 2)                 #input 14X14X256  | Output  7X7X256     | RF 18X18
        self.conv5 = nn.Conv2d(256, 512, 3)             #input 7X7X256    | Output  5X5X512     | RF 36X36
        self.conv6 = nn.Conv2d(512, 1024, 3)            #input 5X5X512    | Output  3X3X1024    | RF 40X40
        self.conv7 = nn.Conv2d(1024, 10, 3)             #input 3X3X1024   | Output  1X1X10      | RF 42X42
        
        self.fc1 = nn.Linear(20, 30)
        self.fc2 = nn.Linear(30, 30)
        self.fc3 = nn.Linear(30, 19)

    def forward(self, x1, x2):
        x1 = self.pool1(F.relu(self.conv2(F.relu(self.conv1(x1))))) ## 1 layer of operation
        x1 = self.pool2(F.relu(self.conv4(F.relu(self.conv3(x1))))) ## 2nd layer of ops
        x1 = F.relu(self.conv6(F.relu(self.conv5(x1)))) ## 4th layer of operation
        x1 = self.conv7(x1)   ## final layer of conv operation
        x1 = x1.view(-1, 10)  ## changing the shape of the array
        out_1 = F.log_softmax(x1, dim=0) ## out of image

        x3 = torch.cat([x1, x2.view(-1, 10)], dim=1) ## this layer combines out of CNN with the random number
        x3 = F.relu(self.fc1(x3)) ## fully connected layer to learn summing
        x3 = F.relu(self.fc2(x3)) ## fully connected layer to learn summing
        x3 = self.fc3(x3)
        x3 = x3.view(-1, 19)
        out_2 = F.log_softmax(x3, dim=0) ## out of sum'mer
        return out_1, out_2

model = Net().to(device)

In [8]:
## debug Model
summary(model, input_size=[(1, 28, 28), (1, 1, 10)])  ## view model summary

print('\n\n')
print("input 1 (image) : ", img_data.shape)
print("input 2 (random number) : ", num_data.shape)

out = model(img_data.to(device), num_data.to(device))
print("output : ", out[0].shape, out[1].shape)
# print(out, out.argmax())

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 32, 28, 28]             320
            Conv2d-2           [-1, 64, 28, 28]          18,496
         MaxPool2d-3           [-1, 64, 14, 14]               0
            Conv2d-4          [-1, 128, 14, 14]          73,856
            Conv2d-5          [-1, 256, 14, 14]         295,168
         MaxPool2d-6            [-1, 256, 7, 7]               0
            Conv2d-7            [-1, 512, 5, 5]       1,180,160
            Conv2d-8           [-1, 1024, 3, 3]       4,719,616
            Conv2d-9             [-1, 10, 1, 1]          92,170
           Linear-10                   [-1, 30]             630
           Linear-11                   [-1, 30]             930
           Linear-12                   [-1, 19]             589
Total params: 6,381,935
Trainable params: 6,381,935
Non-trainable params: 0
---------------------------

  return torch.max_pool2d(input, kernel_size, stride, padding, dilation, ceil_mode)


# Train Model

In [9]:
## test loop
def test(model, device, test_loader):
    model.eval()
    test_loss = 0
    correct_1, correct_2 = 0, 0
    with torch.no_grad():
      for batch_num, (img_data, img_label, num_data, out_label) in enumerate(test_loader):
        img_data, img_label, num_data, out_label = (
            img_data.to(device),
            img_label.to(device),
            num_data.to(device), 
            out_label.squeeze().to(device),
        )
        output = model(img_data, num_data)

        out_1, out_2 = model(img_data, num_data)
        loss_1 = criterion(out_1, img_label)
        loss_2 = criterion(out_2, out_label)
        loss = loss_1 + loss_2
        test_loss += loss.item()

        pred_1 = out_1.argmax(dim=1, keepdim=True)
        pred_2 = out_2.argmax(dim=1, keepdim=True)
        
        correct_1 += pred_1.eq(img_label.view_as(pred_1)).sum().item()
        correct_2 += pred_2.eq(out_label.view_as(pred_2)).sum().item()

    test_loss /= len(test_loader.dataset)

    print('\nTest set: Average loss: {:.4f}, Acc task 1 : {}/{} ({:.0f}%), Acc task 2 : {}/{} ({:.0f}%)\n'.format(
      test_loss, 
      correct_1, len(test_loader.dataset),
      100. * correct_1 / len(test_loader.dataset),
      correct_2, len(test_loader.dataset),
      100. * correct_2 / len(test_loader.dataset),
      )
    )


def train(model, device, train_loader, optimizer): ## train fn
    model.train() ## set model to training, params will be updated
    pbar = tqdm(train_loader) ## progress
    for batch_idx, (img_data, img_label, num_data, out_label) in enumerate(train_loader):
        img_data, img_label, num_data, out_label = (
            img_data.to(device),
            img_label.to(device),
            num_data.to(device), 
            out_label.squeeze().to(device),
        )
        optimizer.zero_grad() ## clean the gradients

        out_1, out_2 = model(img_data, num_data)
        loss_1 = criterion(out_1, img_label)
        loss_2 = criterion(out_2, out_label)
        loss = loss_1 + loss_2
        
        loss.backward() ## take the loss and compute gradient for all parameters
        optimizer.step()  ## update parameters
        pbar.set_description(desc= f'loss={loss.item()} batch_id={batch_idx}')  ## visualize progress

In [10]:
torch.manual_seed(1)  ## fixed start point
batch_size = 128  ## number of images in a batch
epochs = 10

criterion = nn.CrossEntropyLoss()

test_dataset = MnistPlusNum(train=False)
test_loader = DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False)

train_dataset = MnistPlusNum(train=True)
train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)

# optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)
optimizer = optim.Adam(model.parameters())

Loaded MNIST "test" dataset with random number added to it. Data size - 10000
Loaded MNIST "train" dataset with random number added to it. Data size - 60000


In [11]:
for epoch in range(epochs):
  print(f"EPOCH [ {epoch} / {epochs} ]")
  train(model, device, train_loader, optimizer)
  test(model, device, test_loader)

EPOCH [ 0 / 10 ]


  0%|          | 0/469 [00:00<?, ?it/s]


Test set: Average loss: 0.0141, Acc task 1 : 9910/10000 (99%), Acc task 2 : 3053/10000 (31%)

EPOCH [ 1 / 10 ]


  0%|          | 0/469 [00:00<?, ?it/s]


Test set: Average loss: 0.0077, Acc task 1 : 9926/10000 (99%), Acc task 2 : 7559/10000 (76%)

EPOCH [ 2 / 10 ]


  0%|          | 0/469 [00:00<?, ?it/s]


Test set: Average loss: 0.0037, Acc task 1 : 9927/10000 (99%), Acc task 2 : 9284/10000 (93%)

EPOCH [ 3 / 10 ]


  0%|          | 0/469 [00:00<?, ?it/s]


Test set: Average loss: 0.0019, Acc task 1 : 9943/10000 (99%), Acc task 2 : 9687/10000 (97%)

EPOCH [ 4 / 10 ]


  0%|          | 0/469 [00:00<?, ?it/s]


Test set: Average loss: 0.0013, Acc task 1 : 9944/10000 (99%), Acc task 2 : 9768/10000 (98%)

EPOCH [ 5 / 10 ]


  0%|          | 0/469 [00:00<?, ?it/s]


Test set: Average loss: 0.0009, Acc task 1 : 9948/10000 (99%), Acc task 2 : 9822/10000 (98%)

EPOCH [ 6 / 10 ]


  0%|          | 0/469 [00:00<?, ?it/s]


Test set: Average loss: 0.0011, Acc task 1 : 9922/10000 (99%), Acc task 2 : 9774/10000 (98%)

EPOCH [ 7 / 10 ]


  0%|          | 0/469 [00:00<?, ?it/s]


Test set: Average loss: 0.0007, Acc task 1 : 9941/10000 (99%), Acc task 2 : 9842/10000 (98%)

EPOCH [ 8 / 10 ]


  0%|          | 0/469 [00:00<?, ?it/s]


Test set: Average loss: 0.0007, Acc task 1 : 9947/10000 (99%), Acc task 2 : 9855/10000 (99%)

EPOCH [ 9 / 10 ]


  0%|          | 0/469 [00:00<?, ?it/s]


Test set: Average loss: 0.0007, Acc task 1 : 9945/10000 (99%), Acc task 2 : 9839/10000 (98%)

