In [1]:
import torch.nn as nn
import torch
import numpy as np
import matplotlib.pyplot as plt 
import random
from torchsummary import summary

random.seed('Mudit')

In [2]:
class NASModuleConv(nn.Module) :
    def __init__(self, input, output):
        super(NASModuleConv, self).__init__()
        
        self.input = input
        self.output = output
        
        c1 = nn.Conv2d(input[0],32, kernel_size=3)
        b1 = nn.BatchNorm2d(32)
        r1 = nn.ReLU()
        
        self.layers = [c1,b1,r1] 
        self.nns = nn.Sequential(*self.layers)
        self.flag = False
        
        self.updateModel()
        
    
    def f_forward(self,x) :
        return self.nns(x)

    def forward(self,x) :
        out = self.f_forward(x)
        out = out.view(-1,self.convert)
        out = self.fc(out)
        return out
      

    def updateModel(self) :
        
        self.nns = nn.Sequential(*self.layers)
        
        x = torch.randn((1,self.input[0],self.input[1],self.input[2]))
        t = self.f_forward(x)
        x = t.shape
        x = x[1]*x[2]*x[3]
        
#         print 'Convert Val :',x
        self.convert = x
        
        final = nn.Linear(self.convert,self.output)
        
        if self.flag  :
            mi = min(self.convert,self.fc.weight.data.shape[1])
            final.weight.data[:,:mi] = self.fc.weight.data[:,:mi]
        
        self.fc = final
#         self.layers.append(final)
        self.flag = True 
        

    def widen(self) :
        le = len(self.layers)/3 - 1
        layer = random.randint(0,le)
        layer *= 3 
        
        factor = 2
        if random.random() > 0.5 :
            factor = 4
            
        self.widen_conv(layer,factor)
        
        
        
    def widen_conv(self, layer,factor):
        #affects channels only, no effect on kernel size.
        le = len(self.layers)
        
        flag = True
        if layer+3>=le :
            flag = False
        
        current_layer = self.layers[layer]
        bn_current = self.layers[layer+1]
        
        if flag :
            next_layer = self.layers[layer+3]
#         bn_next = self.layers[layer+4]
        
#         print 'Current :',current_layer.weight.data.shape, ' & Next :', next_layer.weight.data.shape

        orig_channels = current_layer.out_channels
        weights = [current_layer.weight.data]
        if flag :
            weights += [next_layer.weight.data]
        bn_weights = [bn_current.weight.data]
        
        current_layer = nn.Conv2d(current_layer.in_channels,
                                  current_layer.out_channels*factor,
                                  kernel_size=current_layer.kernel_size,
                                  stride=current_layer.stride)
        if flag:
            next_layer = nn.Conv2d(current_layer.out_channels,
                               next_layer.out_channels,
                               kernel_size=next_layer.kernel_size,
                               stride=next_layer.stride)
        
        bn_current = nn.BatchNorm2d(current_layer.out_channels)

#         print bn_current.weight.data[:orig_channels]
#         print current_layer.weight.shape,next_layer.weight.shape
    
        current_layer.weight.data[0:orig_channels,:] = weights[0]
        if flag:
            next_layer.weight.data[:,0:orig_channels] = weights[1]
        
        bn_current.weight.data[:orig_channels] = bn_weights[0][:]
        
        
        self.layers[layer] = current_layer
        if flag : 
            self.layers[layer+3] = next_layer
        
        self.layers[layer+1] = bn_current
        
        self.updateModel() 
       
    
    def prepareConvBlock(self,channels_prev) :
        kernel = 3
        if random.random() > 0.5 :
            kernel = 5
            
        #channels of closest preceeding conv is passed 
        c = nn.Conv2d(channels_prev,channels_prev,kernel_size=kernel)
        b = nn.BatchNorm2d(channels_prev)
        r = nn.ReLU()
        
        return c,b,r
        
        
    def addLayer(self,position):    
        current_layer = self.layers[position] 
        c,b,r = self.prepareConvBlock(current_layer.out_channels)
        self.layers.insert(position+3,c)
        self.layers.insert(position+4,b)
        self.layers.insert(position+5,r)


        self.updateModel()

In [3]:
# a = [1,2,3,4,5,6,7,8,9]
# position = 4
# a.insert(position+1,10)
# a.insert(position+2,20)
# a.insert(position+3,30)

# position = 9
# a.insert(position+1,40)
# a.insert(position+2,50)
# a.insert(position+3,60)

# print a

In [4]:
model = NASModuleConv((1,28,28),5)

In [5]:
model

NASModuleConv(
  (nns): Sequential(
    (0): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1))
    (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
  )
  (fc): Linear(in_features=21632, out_features=5, bias=True)
)

In [6]:
model.widen()

In [7]:
model

NASModuleConv(
  (nns): Sequential(
    (0): Conv2d(1, 128, kernel_size=(3, 3), stride=(1, 1))
    (1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
  )
  (fc): Linear(in_features=86528, out_features=5, bias=True)
)

In [8]:
print 'Model Output:',model(torch.randn(10,1,28,28)).shape
print model
print '-'*20
print model.layers
print '-'*20
model.addLayer(0)
print model.layers
print '-'*20
model.addLayer(3)
print model.layers
print '-'*20
print model

Model Output: torch.Size([10, 5])
NASModuleConv(
  (nns): Sequential(
    (0): Conv2d(1, 128, kernel_size=(3, 3), stride=(1, 1))
    (1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
  )
  (fc): Linear(in_features=86528, out_features=5, bias=True)
)
--------------------
[Conv2d(1, 128, kernel_size=(3, 3), stride=(1, 1)), BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True), ReLU()]
--------------------
[Conv2d(1, 128, kernel_size=(3, 3), stride=(1, 1)), BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True), ReLU(), Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1)), BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True), ReLU()]
--------------------
[Conv2d(1, 128, kernel_size=(3, 3), stride=(1, 1)), BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True), ReLU(), Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1)), BatchNorm2d(1

In [9]:
model.widen()

In [10]:
model.layers[1].weight.data

tensor([0.9744, 0.3039, 0.0735, 0.9002, 0.4251, 0.9258, 0.8461, 0.2636, 0.1351,
        0.2947, 0.5732, 0.2671, 0.6787, 0.0910, 0.1207, 0.8773, 0.1132, 0.3930,
        0.5699, 0.6902, 0.7684, 0.4977, 0.2437, 0.0654, 0.2657, 0.1693, 0.6511,
        0.0983, 0.9934, 0.3837, 0.3323, 0.0646, 0.0853, 0.9672, 0.0455, 0.4368,
        0.8746, 0.8337, 0.6089, 0.6594, 0.4513, 0.8022, 0.7127, 0.1325, 0.0099,
        0.3402, 0.7322, 0.1100, 0.4493, 0.5946, 0.7925, 0.1680, 0.2141, 0.8052,
        0.4475, 0.0757, 0.4888, 0.3264, 0.0525, 0.8132, 0.8985, 0.2353, 0.2911,
        0.6189, 0.5303, 0.0211, 0.4687, 0.3855, 0.6118, 0.5997, 0.0178, 0.6757,
        0.1807, 0.5961, 0.1106, 0.4579, 0.5325, 0.4014, 0.7732, 0.2008, 0.8049,
        0.3666, 0.5971, 0.1736, 0.6626, 0.8407, 0.6090, 0.7426, 0.8039, 0.0661,
        0.9682, 0.3602, 0.4471, 0.1682, 0.9046, 0.2718, 0.8901, 0.6059, 0.4456,
        0.2044, 0.3003, 0.6540, 0.1662, 0.9437, 0.1218, 0.0242, 0.7199, 0.3299,
        0.6854, 0.3582, 0.7174, 0.0570, 

In [11]:
model = NASModuleConv((1,19,19),5)
print model

NASModuleConv(
  (nns): Sequential(
    (0): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1))
    (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
  )
  (fc): Linear(in_features=9248, out_features=5, bias=True)
)


In [12]:
model.addLayer(0)
print model

NASModuleConv(
  (nns): Sequential(
    (0): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1))
    (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): Conv2d(32, 32, kernel_size=(5, 5), stride=(1, 1))
    (4): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (5): ReLU()
  )
  (fc): Linear(in_features=5408, out_features=5, bias=True)
)


In [13]:
model.addLayer(0)
print model

NASModuleConv(
  (nns): Sequential(
    (0): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1))
    (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): Conv2d(32, 32, kernel_size=(5, 5), stride=(1, 1))
    (4): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (5): ReLU()
    (6): Conv2d(32, 32, kernel_size=(5, 5), stride=(1, 1))
    (7): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (8): ReLU()
  )
  (fc): Linear(in_features=2592, out_features=5, bias=True)
)


In [14]:
model.addLayer(0)
print model

NASModuleConv(
  (nns): Sequential(
    (0): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1))
    (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1))
    (4): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (5): ReLU()
    (6): Conv2d(32, 32, kernel_size=(5, 5), stride=(1, 1))
    (7): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (8): ReLU()
    (9): Conv2d(32, 32, kernel_size=(5, 5), stride=(1, 1))
    (10): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (11): ReLU()
  )
  (fc): Linear(in_features=1568, out_features=5, bias=True)
)


In [15]:
model.addLayer(0)
print model

NASModuleConv(
  (nns): Sequential(
    (0): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1))
    (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): Conv2d(32, 32, kernel_size=(5, 5), stride=(1, 1))
    (4): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (5): ReLU()
    (6): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1))
    (7): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (8): ReLU()
    (9): Conv2d(32, 32, kernel_size=(5, 5), stride=(1, 1))
    (10): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (11): ReLU()
    (12): Conv2d(32, 32, kernel_size=(5, 5), stride=(1, 1))
    (13): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (14): ReLU()
  )
  (fc): Linear(in_features=288, out_features=5, bias=True)
)


In [16]:
model.widen()

In [17]:
print model

NASModuleConv(
  (nns): Sequential(
    (0): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1))
    (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): Conv2d(32, 64, kernel_size=(5, 5), stride=(1, 1))
    (4): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (5): ReLU()
    (6): Conv2d(64, 32, kernel_size=(3, 3), stride=(1, 1))
    (7): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (8): ReLU()
    (9): Conv2d(32, 32, kernel_size=(5, 5), stride=(1, 1))
    (10): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (11): ReLU()
    (12): Conv2d(32, 32, kernel_size=(5, 5), stride=(1, 1))
    (13): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (14): ReLU()
  )
  (fc): Linear(in_features=288, out_features=5, bias=True)
)


## Skip Connections TODO :

Find pairs of ConvNet blocks, where input channels for conv1 == output channels for conv2 

then simply perform torch.cat((x,model(x)),1)

https://medium.com/@14prakash/understanding-and-implementing-architectures-of-resnet-and-resnext-for-state-of-the-art-image-cc5d0adf648e

https://stackoverflow.com/questions/51773208/pytorch-skip-connection-in-a-sequential-model

In [19]:
# torch.cat?

In [48]:
x = torch.randn((10,1,28,28))

maxp = nn.MaxPool2d(kernel_size=2)

one = maxp(p(x))
print one.shape

p = nn.Conv2d(1,32,kernel_size=5)
p1 = nn.Conv2d(32,32,kernel_size=5)
p2 = nn.Conv2d(32,32,kernel_size=5)
p3 = nn.Conv2d(32,32,kernel_size=5)
p4 = nn.Conv2d(32,32,kernel_size=5)
p5 = nn.Conv2d(32,32,kernel_size=5)

two = p3(p2(p1(p(x))))
print two.shape

torch.Size([10, 32, 12, 12])
torch.Size([10, 32, 12, 12])


In [22]:
p

Conv2d(1, 32, kernel_size=(5, 5), stride=(1, 1))

In [25]:
x.shape

torch.Size([10, 1, 28, 28])

In [26]:
p(x).shape

torch.Size([10, 32, 24, 24])

In [23]:
torch.cat([x, p(x)], 1)

RuntimeError: invalid argument 0: Sizes of tensors must match except in dimension 1. Got 28 and 24 in dimension 2 at /Users/soumith/code/builder/wheel/pytorch-src/aten/src/TH/generic/THTensorMath.cpp:3616