Downloading the MNIST data and filtering specific digits to train

In [0]:
from random import randrange
import torch
from torch.autograd import Variable
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
import numpy as np
from numba import jitclass,jit,njit, cuda 

batch_size=200
criterion = nn.NLLLoss()
use_cuda = True

#To transform the MNIST data into tensor datatype and normalize the values
transform=transforms.Compose([
                        transforms.ToTensor(),
                        transforms.Normalize((0.1307,), (0.3081,))
                    ])

#Download the training data and apply the above transformations
mnist1 = datasets.MNIST('../data', train=True, download=True, transform = transform)

#Filter the training images with just '0' and '1' digits
filtered_train = []
for data in enumerate(mnist1):
    if(data[1][1] == 1 or data[1][1] == 0):
        filtered_train.append(list(data[1]))

#Similarly storing the testing data with digits '0' and '1' in filtered_test
mnist = datasets.MNIST('../data', train=False, transform=transform)

filtered_test = []
for data in enumerate(mnist):
    if(data[1][1] == 1 or data[1][1] == 0):
        # print((data[0],data[1]))
        filtered_test.append(list(data[1]))

#Readjusting the data to a size of multiple of 'batch_size' by slicing
filtered_train = filtered_train[0:-(len(filtered_train)%batch_size)]
filtered_test = filtered_test[0:-(len(filtered_test)%batch_size)]

# filtered_train = mnist1
# filtered_test = mnist

#storing the filtered data in the torch's dataLoader with batch size and shuffle capabilities
#The data loader provides easier management of the data with automatic facilities like shuffling data and dividing it into batches.
train_loader = torch.utils.data.DataLoader(
    filtered_train,
    batch_size=batch_size, shuffle=True)

test_loader = torch.utils.data.DataLoader(
    filtered_test,
    batch_size=batch_size, shuffle=True)


# Creating the fully connected sub network

Once trained, this network will take in input image of size 28X28 and give an output through a 10 nodes output layer representing the digit in the image.

In [0]:
def create_subnet(learning_rate=0.01, epochs=10,
              log_interval=10):
    #Inheriting nn.Module class of PyTorch
    class SubNet(nn.Module):
        #Defining the network architecture of 4 fully connected layers, input and output layer inclusive
        #Layer 1 size = 784 (for 28X28 image)
        #Layer 2 size = 200
        #Layer 3 size = 200
        #Layer 4 size = 10 (representing the output digit 0 to 9)
        def __init__(self):
            super(SubNet, self).__init__()
            self.fc1 = nn.Linear(28 * 28, 200)
            self.fc2 = nn.Linear(200, 200)
            self.fc3 = nn.Linear(200, 10)

        #The output of every hidden layer is subject to the relu function as configured in the forward method below
        #The final output is subject to log_softmax function as a way to normalize our output
        def forward(self, x):
            x = F.relu(self.fc1(x))
            x = F.relu(self.fc2(x))
            x = self.fc3(x)
            return F.log_softmax(x)

    #The subnet is stored in a variable for later use in the composite network
    subnet = SubNet()
    print(subnet)

    #This is to enable GPU processing of our program for enhanced performance speed
    if use_cuda and torch.cuda.is_available():
        subnet.cuda()

    #This is a stochastic gradient descent optimizer
    optimizer = optim.SGD(subnet.parameters(), lr=learning_rate, momentum=0.9)

    # run the main training loop
    for epoch in range(epochs):
        for batch_idx, (data, target) in enumerate(train_loader):
            data, target = Variable(data), Variable(target)
            # resshape data from (batch_size, 1, 28, 28) to (batch_size, 28*28), because the fully connected input layer is one dimensional
            data = data.view(-1,28*28)
            if use_cuda and torch.cuda.is_available():
                data = data.cuda()
                target = target.cuda()
            
            #This zeroes / resets all the gradients in the model, so that it is ready to go for the next back propagation pass
            optimizer.zero_grad()
            subnet_out = subnet(data)

            #This set of statements runs the backpropogation based on the loss and gradient calculated
            loss = criterion(subnet_out, target)
            loss.backward()
            optimizer.step()

            if batch_idx % log_interval == 0:
                print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                    epoch, batch_idx * len(data), len(train_loader.dataset),
                           100. * batch_idx / len(train_loader), loss.data))
    return subnet


if __name__ == "__main__":
      subnet = create_subnet()
      # test_nn()

Use this block to test the accuracy of subnet

In [0]:
def test_nn():
    test_loss = 0
    correct = 0
    for data, target in test_loader:
        data, target = Variable(data, volatile=True), Variable(target)
        data = data.view(-1,28*28)

        if use_cuda and torch.cuda.is_available():
            data = data.cuda()
            target = target.cuda()
        subnet_out = subnet(data)
        # sum up batch loss
        test_loss += criterion(subnet_out, target).data
        pred = subnet_out.data.max(1)[1]  # get the index of the max log-probability
        correct += pred.eq(target.data).sum()

    test_loss /= len(test_loader.dataset)
    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))

test_nn()

#The Composite network

This network aims at using the subnet's capabilities and train itself according to the output format of subnet in order to find the 28X28 size digits on a 56X56 input image.

This basically will be a convolution mechanism in which instead of a regular filter, we shall use the subnet. i.e., instead of associating weights with a regular 28X28 sized filter, as is the case in convolution, here we are mapping the 28X28 filter with the subnet.

And as we get the output value from a normal filter by taking the sum of all the activation values multiplied by their corresponding weights, here the output is the result of whatever value is given by the subnet.

The output channel is again input into the next hidden layer of the network.

##Data Augmentation
Another aspect of the following network is the data augmentation part where we create the 56X56 input image.
For this we use the 28X28 image and add it to an array of zeroes of size 56X56. For details refer to "Shuffle" method.

In [0]:
import matplotlib.pyplot as plt
import time
from numba import jit,njit, cuda 
import tensorflow as tf
from google.colab import files

# rndx and rndy determine the position of the 28X28 'image' in the 56X56 grid(here stored in 'a')
def shuffle(rndx,rndy,image):
    a = np.zeros((56,56))
    image = image.view(-1, 28)
    # Adding the 'image' to the part of 'a' sliced from 'rndx', 'rndy'
    a[rndx:28+rndx,rndy:28+rndy]+=np.array(image)
    return torch.as_tensor(a).float()

# this method performs the custom convolution using the 'subnet' over the input batch 'img_batch' of 56X56 size images
def custom_conv(img_batch):
    # this is also the shape of 'img_batch', except 56X56 image size
    ret = torch.zeros([batch_size,1,28,28], dtype=torch.float)
    print('A',time.process_time())
    for b in range(batch_size):
        img = img_batch[b]
        input_to_subnet = torch.zeros([0,28*28], dtype=torch.float)
        if use_cuda and torch.cuda.is_available():
            input_to_subnet = input_to_subnet.cuda()
        # plt.imshow(img)
        # plt.show()
        # print('I',time.process_time())
        # Iterations simulate the convolution over the 56X56 'img'
        for x in range(0,28):
            for y in range(0,28):
                input1 = torch.Tensor([784])
                # slicing out the 'input' from 'img' at location x,y to x+28,y+28
                input1=img[x:x+28, y:y+28].reshape(-1,28*28)
                input_to_subnet = torch.cat((input_to_subnet, input1), 0)
        # Sending the 'input' to subnet to get an output number which will act as activation value for the resulting channel at location 'x', 'y'
        subnet_result=subnet(input_to_subnet)
        ret[b,0] = torch.argmax(subnet_result, dim=1).view(28,28)             
    return ret



In this section we use the above 2 utility methods to actually transform the data and store it into files for training of the second network and the composite network

In [0]:
filtered_train_shuffled = []
filtered_test_shuffled = []
for data in enumerate(filtered_train):
    # Use shuffle method for data augmentation of each image in the batch
    # temp = torch.as_tensor(np.zeros((200, 1, 28*2, 28*2))).float()
    image = data[1][0]
    # print(data)
    rndx = randrange(29)
    rndy = randrange(29)
    temp2 = list(data[1])
    temp2[0] = shuffle(rndx,rndy,image)
    filtered_train_shuffled.append(temp2)
for data in enumerate(filtered_test):
    # Use shuffle method for data augmentation of each image in the batch
    # temp = torch.as_tensor(np.zeros((200, 1, 28*2, 28*2))).float()
    image = data[1][0]
    rndx = randrange(29)
    rndy = randrange(29)
    temp2 = list(data[1])
    temp2[0] = shuffle(rndx,rndy,image)
    filtered_test_shuffled.append(temp2)


train_loader_shuffled = torch.utils.data.DataLoader(
    filtered_train_shuffled,
    batch_size=batch_size, shuffle=False)

test_loader_shuffled = torch.utils.data.DataLoader(
    filtered_test_shuffled,
    batch_size=batch_size, shuffle=False)

#these datatypes will go into our file storage and be used for training of the subsequent network
final_out = torch.zeros([0,1,28,28], dtype=torch.float)
final_out_targets = torch.zeros([0])
final_test = torch.zeros([0,1,28,28], dtype=torch.float)
final_test_tagerts = torch.zeros([0])
if use_cuda and torch.cuda.is_available():
    final_out = final_out.cuda()
    final_out_targets = final_out_targets.cuda()
    final_test = final_out.cuda()
    final_test_tagerts = final_out_targets.cuda()


for batch_idx, (data, target) in enumerate(train_loader_shuffled):
    data, target = Variable(data), Variable(target)

    if use_cuda and torch.cuda.is_available():
        data = data.cuda()
        target = target.cuda()
    
    out = custom_conv(data)
    
    if use_cuda and torch.cuda.is_available():
        out = out.cuda()
    
    final_out = torch.cat((final_out, out), 0)
    final_out_targets = torch.cat((final_out_targets, target.float()), 0)

    if batch_idx % 10 == 0:
        print('Train Epoch: [{}/{} ({:.0f}%)]'.format(
            batch_idx * len(data), len(train_loader_shuffled.dataset),
                    100. * batch_idx / len(train_loader_shuffled)))

torch.save(final_out, 'file2.pt')
torch.save(final_out_targets, 'file_label.pt')
# files.download('file.pt') 

for batch_idx, (data, target) in enumerate(test_loader_shuffled):
    data, target = Variable(data), Variable(target)

    if use_cuda and torch.cuda.is_available():
        data = data.cuda()
        target = target.cuda()

    out = custom_conv(data)

    if use_cuda and torch.cuda.is_available():
        out = out.cuda()

    final_test = torch.cat((final_test, out), 0)
    final_test_tagerts = torch.cat((final_test_tagerts, target.float()), 0)

    if batch_idx % 10 == 0:
        print('Test Epoch: [{}/{} ({:.0f}%)]'.format(
            batch_idx * len(data), len(test_loader_shuffled.dataset),
                    100. * batch_idx / len(test_loader_shuffled)))
torch.save(final_test, 'test.pt')
torch.save(final_test_tagerts, 'test_label.pt')


This section loads from the files and merges the training datasets with their corresponding labels

In [0]:
final_out = torch.load('file2.pt')
final_out_targets = torch.load('file_label.pt')
final_test = torch.load('test.pt')
final_test_tagerts = torch.load('test_label.pt')

train_data = []
test_data = []
for index,(data) in enumerate(final_out):
    a=[]
    #append the augmented training data and label in a list
    a.append(data)
    a.append(int(final_out_targets[index]))
    #and append that list into the final training set
    train_data.append(a)
for index,(data) in enumerate(final_test):
    a=[]
    #append the augmented testing data and label in a list
    a.append(data)
    a.append(int(final_test_tagerts[index]))
    #and append that list into the final testing set
    test_data.append(a)

train_data_loader = torch.utils.data.DataLoader(
    train_data,
    batch_size=batch_size, shuffle=True)

test_data_loader = torch.utils.data.DataLoader(
    test_data,
    batch_size=batch_size, shuffle=True)


In [0]:

# The composite network which has one custom convolution layer using the subnet and 2 subsequent fully connected layers of 200 nodes each followed by output layer of 10 nodes
def create_nn(learning_rate=0.01, epochs=10,
              log_interval=10):
    class Net(nn.Module):
        #Defining the network architecture of 4 fully connected layers, input and output layer inclusive
        #Layer 1 size = 56X56 input converted to 28X28 output
        #Layer 2 size = 200
        #Layer 3 size = 200
        #Layer 4 size = 10 (representing the output digit 0 to 9)
        def __init__(self):
            super(Net, self).__init__()
            self.layer1 = nn.Sequential(
                nn.Conv2d(1, 5, kernel_size=5, stride=1, padding=2),
                nn.ReLU()
                ,nn.MaxPool2d(kernel_size=2, stride=2)
                )
            self.layer2 = nn.Sequential(
                nn.Conv2d(5, 10, kernel_size=5, stride=1, padding=2),
                nn.ReLU()
                ,nn.MaxPool2d(kernel_size=2, stride=2)
                )
            self.fc1 = nn.Linear(490, 200)
            self.fc2 = nn.Linear(200, 200)
            self.f7 = nn.Linear(200, 10)

        def forward(self, x):
            # excempting the subnet from backpropogation
            # sending the input 'x' to the subnet for evaluation
            # with torch.no_grad():
            #     x = custom_conv(x)
            if use_cuda and torch.cuda.is_available():
              x = x.cuda()
            x = self.layer1(x)
            x = self.layer2(x)
            x = x.reshape(x.size(0), -1)
            x = x.reshape(x.size(0), -1)#TODO: Try commenting this and see if it is still needed - YES
            #The output of every hidden layer is subject to the relu function as configured in the forward method below
            #The final output is subject to log_softmax function as a way to normalize our output
            x = F.relu(self.fc1(x))
            x = F.relu(self.fc2(x))
            x = self.f7(x)
            return F.log_softmax(x)

    net = Net()
    if use_cuda and torch.cuda.is_available():
        net.cuda()
    print(net)

    optimizer = optim.SGD(net.parameters(), lr=learning_rate, momentum=0.9)

    # run the main training loop
    for epoch in range(epochs):
        for batch_idx, (data1, target) in enumerate(train_data_loader):
            data1, target = Variable(data1), Variable(target)

            if use_cuda and torch.cuda.is_available():
                data1 = data1.cuda()
                target = target.cuda()
            
            #This zeroes / resets all the gradients in the model, so that it is ready to go for the next back propagation pass
            optimizer.zero_grad()
            net_out = net(data1)

            #This set of statements runs the backpropogation based on the loss and gradient calculated
            loss = criterion(net_out, target)
            loss.backward()
            optimizer.step()

            if batch_idx % log_interval == 0:
                print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                    epoch, batch_idx * len(data1), len(train_data_loader.dataset),
                           100. * batch_idx / len(train_data_loader), loss.data))
    return net

# run a test loop
def test_nn():
    test_loss = 0
    correct = 0
    for data, target in test_data_loader:
        data, target = Variable(data, volatile=True), Variable(target)

        if use_cuda and torch.cuda.is_available():
            data = data.cuda()
            target = target.cuda()
        net_out = net(data)
        # sum up batch loss
        test_loss += criterion(net_out, target).data
        pred = net_out.data.max(1)[1]  # get the index of the max log-probability
        correct += pred.eq(target.data).sum()

    test_loss /= len(test_data_loader.dataset)
    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        test_loss, correct, len(test_data_loader.dataset),
        100. * correct / len(test_data_loader.dataset)))


if __name__ == "__main__":
    run_opt = 2
    if run_opt == 1:
        simple_gradient()
    elif run_opt == 2:
        net = create_nn()
        test_nn()

In [0]:
class Net3(nn.Module):
    def __init__(self):
        super(Net3, self).__init__()

    def forward(self, x):
        if use_cuda and torch.cuda.is_available():
          x = x.cuda()
        x = custom_conv(x)
        x = net(x)
        return x


# Final testing Section




In [0]:
filtered_test1 = []
# print(data)
for data in enumerate(mnist):
    if(data[1][1] == 1 or data[1][1] == 0):
        # print((data[0],data[1]))
        filtered_test1.append(list(data[1]))

#Readjusting the data to a size of multiple of 'batch_size' by slicing
filtered_test1 = filtered_test1[0:-(len(filtered_test1)%batch_size)]

#storing the filtered data in the torch's dataLoader with batch size and shuffle capabilities
#The data loader provides easier management of the data with automatic facilities like shuffling data and dividing it into batches.
test_loader1 = torch.utils.data.DataLoader(
    filtered_test1,
    batch_size=batch_size, shuffle=True)

test_loss = 0
correct = 0
net3 = Net3()
for data, target in test_loader1:
    data, target = Variable(data, volatile=True), Variable(target)

    temp = torch.as_tensor(np.zeros((200, 1, 28*2, 28*2))).float()
    for i,(image) in enumerate(data):
        rndx = randrange(29)
        rndy = randrange(29)
        temp[i][0] = shuffle(rndx,rndy,image)
    data = temp

    if use_cuda and torch.cuda.is_available():
        data = data.cuda()
        target = target.cuda()
    data = data.view(-1,56,56)

    net_out = net3(data)

    # sum up batch loss
    test_loss += criterion(net_out, target).data
    pred = net_out.data.max(1)[1]  # get the index of the max log-probability
    correct += pred.eq(target.data).sum()

test_loss /= len(test_loader1.dataset)
print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
    test_loss, correct, len(test_loader1.dataset),
    100. * correct / len(test_loader1.dataset)))

# torch.save(model.state_dict(), MODEL_STORE_PATH + 'conv_net_model.ckpt')
