<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc"><ul class="toc-item"><li><span><a href="#INCEPTION" data-toc-modified-id="INCEPTION-1"><span class="toc-item-num">1&nbsp;&nbsp;</span>INCEPTION</a></span></li></ul></div>

In [1]:
import mxnet
from mxnet import gluon,npx,np,autograd
from mxnet.gluon import nn
npx.set_np()

## INCEPTION
<img src="../images/inception.jpg" />

In [2]:
class Inception_block(nn.Block):
    def __init__(self,c1,c2,c3,c4,**kwargs):
        super().__init__(**kwargs)
        # Path 1 is a single 1 x 1 convolutional layer
        self.p1_1=nn.Conv2D(c1,kernel_size=1,activation='relu')
        # Path 2 is a 1 x 1 convolutional layer followed by a 3 x 3
        # convolutional layer
        self.p2_1=nn.Conv2D(c2[0],kernel_size=1,activation='relu')
        self.p2_2=nn.Conv2D(c2[1],kernel_size=3,padding=1,activation='relu')
        # Path 2 is a 1 x 1 convolutional layer followed by a 5 x 5
        # convolutional layer
        self.p3_1=nn.Conv2D(c3[0],kernel_size=1,activation='relu')
        self.p3_2=nn.Conv2D(c3[1],kernel_size=5,padding=2,activation='relu')
        # Path 4 is a 3 x 3 maximum pooling layer followed by a 1 x 1
        # convolutional layer
        self.p4_1=nn.MaxPool2D(pool_size=3,padding=1,strides=1)
        self.p4_2=nn.Conv2D(c4,kernel_size=1,activation='relu')
    def forward(self,x):
        p1=self.p1_1(x)
        p2=self.p2_2(self.p2_1(x))
        p3=self.p3_2(self.p3_1(x))
        p4=self.p4_2(self.p4_1(x))
        return np.concatenate((p1,p2,p3,p4),axis=1)

GoogLeNet uses a stack of a total of 9 inception blocks and global average pooling to generate its estimates. Maximum pooling between inception blocks reduced the
dimensionality. The first part is identical to AlexNet and LeNet, the stack of blocks is inherited
from VGG and the global average pooling avoids a stack of fully-connected layers at the end. The
architecture is depicted below
<img src="../images/inception1.jpg" />
<img src="../images/inception2.jpg" />

In [3]:
inception=nn.Sequential()
inception.add(nn.Conv2D(64,kernel_size=7,strides=2,padding=1,activation='relu'),
              nn.MaxPool2D(pool_size=3,padding=1,strides=2),
              
              nn.Conv2D(64,kernel_size=1,activation='relu'),
              nn.Conv2D(192,kernel_size=3,padding=1,activation='relu'),
              nn.MaxPool2D(pool_size=3,padding=1,strides=2),
              # inception(3a)
              Inception_block(c1=64,c2=(96,128),c3=(16,32),c4=32),
              # inception(3b)
              Inception_block(c1=128,c2=(128,192),c3=(32,96),c4=64),
              nn.MaxPool2D(pool_size=3,strides=2,padding=1),
              # inception(4a)
              Inception_block(c1=192,c2=(96,208),c3=(16,48),c4=64),
              # inception(4b)
              Inception_block(c1=160,c2=(112,224),c3=(24,64),c4=64),
              # inception(4c)
              Inception_block(c1=128,c2=(128,256),c3=(24,64),c4=64),
              # inception(4d)
              Inception_block(112,(144,288),(32,64),64),
              # inception(4e)
              Inception_block(256,(160,320),(32,128),128),
              nn.MaxPool2D(pool_size=3,strides=2,padding=1),
              # inception(5a)
              Inception_block(256, (160, 320), (32, 128), 128),
              # inception(5b)
              Inception_block(384, (192, 384), (48, 128), 128),
              nn.GlobalAvgPool2D(),
              nn.Dense(10)
              )

In [4]:
X = np.random.uniform(size=(1, 1, 96, 96))
inception.initialize()
for layer in inception:
    X = layer(X)
    print(layer.name, 'output shape:\t', X.shape)

conv0 output shape:	 (1, 64, 46, 46)
pool0 output shape:	 (1, 64, 23, 23)
conv1 output shape:	 (1, 64, 23, 23)
conv2 output shape:	 (1, 192, 23, 23)
pool1 output shape:	 (1, 192, 12, 12)
inception_block0 output shape:	 (1, 256, 12, 12)
inception_block1 output shape:	 (1, 480, 12, 12)
pool4 output shape:	 (1, 480, 6, 6)
inception_block2 output shape:	 (1, 512, 6, 6)
inception_block3 output shape:	 (1, 512, 6, 6)
inception_block4 output shape:	 (1, 512, 6, 6)
inception_block5 output shape:	 (1, 528, 6, 6)
inception_block6 output shape:	 (1, 832, 6, 6)
pool10 output shape:	 (1, 832, 3, 3)
inception_block7 output shape:	 (1, 832, 3, 3)
inception_block8 output shape:	 (1, 1024, 3, 3)
pool13 output shape:	 (1, 1024, 1, 1)
dense0 output shape:	 (1, 10)
