# Interpretabble image classifier using deep learning

In [21]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchsummary import summary

In [22]:
x = torch.rand(100, 100, 3)
q = 10
x.shape

torch.Size([100, 100, 3])

In [25]:
def img_to_patch(img, patch_size):
    num_patches = int(img.shape[1] / patch_size)
    img1 = torch.stack(torch.split(img, num_patches, dim=2))
    img2 = torch.stack(torch.split(img1, num_patches, dim=2))
    return img2
    
img = torch.randn(3, 200, 200)
img_to_patch(img, 10).shape

torch.Size([10, 10, 3, 20, 20])

In [41]:
class AdhocNet(nn.Module):
    def img_to_patch(self, img, patch_size):
        '''
        Args
            img - (3, W, H)
            patch_size - (Q)
        Returns
            Patches (W/Q, H/Q, 3, Q, Q)
        '''
        num_patches = int(img.shape[1] / patch_size)
        img1 = torch.stack(torch.split(img, num_patches, dim=2))
        img2 = torch.stack(torch.split(img1, num_patches, dim=2))
        return img2
    
    def patch_to_representation(self, patch):
        '''
        Args
            Patch - (3, Q, Q)
        Returns
            Representation - (V)
        '''
        
    def patches_to_representations(self, patches):
        '''
        Args
            patches - (W/Q, H/Q, 3, Q, Q)
        Returns
            representations - (W/Q, H/Q, V)
        '''
        representations = []
        for i in range(patches.shape[0]):
            row = [self.patch_to_representation(patches[i,j]) for j in range(patches.shape[1])]
            representations.append(torch) 
        

    def __init__(self, patch_size):
        super(AdhocNet, self).__init__()
        self.patch_size = patch_size
    def forward(self, x):
        patches = torch.stack([self.img_to_patch(img, self.patch_size) for img in x])
        representations = torch.stack([ self.patches_to_representations(patch) for patch in patches])
        return x
        

model = AdhocNet(10)
model(torch.randn(1, 3, 200, 200))
'asd'

torch.Size([1, 10, 10, 3, 20, 20])


'asd'

In [30]:
class BagNet9(nn.Module):
    def get_resblock(self, conv_parameters):
        res_layers = [ nn.Conv2d(*conv_param) for conv_param in conv_parameters]
        res_block = nn.Sequential(*res_layers)
        return res_block
        
    def __init__(self):
        super(BagNet9, self).__init__()
        self.c1 = nn.Conv2d(3, 64, 3)
        
        self.res1 = self.get_resblock([(64, 64, 1, 2), (64, 64, 3), (64, 64, 1)])
        self.res2 = self.get_resblock([(64, 64, 1), (64, 64, 1), (64, 64, 1)])
        self.res3 = self.get_resblock([(64, 64, 1), (64, 64, 1), (64, 64, 1)])
        
        self.res4 = self.get_resblock([(64, 128, 1, 2), (128, 128, 3), (128, 128, 1)])
        self.res5 = self.get_resblock([(128, 128, 1), (128, 128, 1), (128, 128, 1)])
        self.res6 = self.get_resblock([(128, 128, 1), (128, 128, 1), (128, 128, 1)])
        
        self.res7 = self.get_resblock([(128, 256, 1, 2), (256, 256, 1), (256, 256, 1)])
        self.res8 = self.get_resblock([(256, 256, 1), (256, 256, 1), (256, 256, 1)])
        self.res9 = self.get_resblock([(256, 256, 1), (256, 256, 1), (256, 256, 1)])
        
        self.res10 = self.get_resblock([(256, 512, 1, 2), (512, 512, 1), (512, 512, 1)])
        self.res11 = self.get_resblock([(512, 512, 1), (512, 512, 1), (512, 512, 1)])
        self.res12 = self.get_resblock([(512, 512, 1), (512, 512, 1), (512, 512, 1)])
        self.fc1 = nn.Linear(512 * 14 * 14, 1000)
        
        
        
    def forward(self, x):
        h1 = self.c1(x)
        h2 = self.res1(h1)
        h3 = self.res2(h2)
        h4 = self.res3(h3)
        
        h5 = self.res4(h4)
        h6 = self.res5(h5)
        h7 = self.res6(h6)
        
        h8 = self.res7(h7)
        h9 = self.res8(h8)
        h10 = self.res9(h9)
        
        h11 = self.res10(h10)
        h12 = self.res11(h11)
        h13 = self.res12(h12)
        predictions = self.fc1(h13.view(-1, 512 * 14 * 14))
        return predictions



model = BagNet9()
summary(model, input_size=(3, 224, 224))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 64, 222, 222]           1,792
            Conv2d-2         [-1, 64, 111, 111]           4,160
            Conv2d-3         [-1, 64, 109, 109]          36,928
            Conv2d-4         [-1, 64, 109, 109]           4,160
            Conv2d-5         [-1, 64, 109, 109]           4,160
            Conv2d-6         [-1, 64, 109, 109]           4,160
            Conv2d-7         [-1, 64, 109, 109]           4,160
            Conv2d-8         [-1, 64, 109, 109]           4,160
            Conv2d-9         [-1, 64, 109, 109]           4,160
           Conv2d-10         [-1, 64, 109, 109]           4,160
           Conv2d-11          [-1, 128, 55, 55]           8,320
           Conv2d-12          [-1, 128, 53, 53]         147,584
           Conv2d-13          [-1, 128, 53, 53]          16,512
           Conv2d-14          [-1, 128,