In [1]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers,models

## LENET NETWORK
<img src='../images/lenet.svg'>
<img src='../images/lenet.jpg'>


 (source: Dive into Deep Learning by Aston Zhang, Zachary C. Lipton, Mu Li, and Alexander J. Smola page 252-3)
  

To read more on LENET visit :

<a href='http://yann.lecun.com/exdb/publis/pdf/lecun-98.pdf'>GradientBased Learning Applied to Document Recognition</a>

In [None]:
lenet=keras.models.Sequential([
    layers.Conv2D(6,kernel_size=5,padding='same',activation='sigmoid',input_shape=(28,28,1,)),
    layers.AvgPool2D(pool_size=2,strides=2),
    layers.Conv2D(16,kernel_size=5,activation='sigmoid'),
    layers.AvgPool2D(pool_size=2,strides=2),
    layers.Flatten(),
    layers.Dense(120,activation='sigmoid'),
    layers.Dense(84,activation='sigmoid'),
    layers.Dense(10,activation='sigmoid'),
    
])

As compared to the original network, we took the liberty of replacing the Gaussian activation in
the last layer by a regular dense layer, which tends to be significantly more convenient to train.
Other than that, this network matches the historical definition of LeNet5.
Next, let us take a look of an example. As shown in Fig. 6.6.2, we feed a single-channel example
of size 28 × 28 into the network and perform a forward computation layer by layer printing the
output shape at each layer to make sure we understand what is happening here

in keras the first convolution layer takes a feature map of size <b>(batch_size , height , width , depth axis)</b >which represent the height, width and the depth axis

In [None]:
X=tf.random.uniform(shape=(1,28,28,1))
for layer in lenet.layers:
    X=layer(X)
    print(layer.__class__.__name__,'output shape: \t', X.shape)

## ALEXNET
<img src='../images/alexnet.jpg'>
 (source: Dive into Deep Learning by Aston Zhang, Zachary C. Lipton, Mu Li, and Alexander J. Smola page 261)
  
To read more on Alexnet visit :

<a href='https://papers.nips.cc/paper/2012/file/c399862d3b9d6b76c8436e924a68c45b-Paper.pdf'>ImageNet Classification with Deep Convolutional
Neural Networks
</a>

In [None]:
def alexnet():
    return tf.keras.models.Sequential([
        layers.Conv2D(filters=96,kernel_size=11,strides=4,activation='relu'),
        layers.MaxPool2D(pool_size=3,strides=2),
        layers.Conv2D(filters=256,kernel_size=5,padding='same',activation='relu'),
        layers.MaxPool2D(pool_size=3,strides=2),
        layers.Conv2D(filters=384,kernel_size=3,padding='same',activation='relu'),
        layers.Conv2D(filters=384,kernel_size=3,padding='same',activation='relu'),
        layers.Conv2D(filters=384,kernel_size=3,padding='same',activation='relu'),
        layers.MaxPool2D(pool_size=3,strides=2),
        layers.Flatten(),
        layers.Dense(4096,activation='relu'),
        layers.Dropout(0.5),
        layers.Dense(4096,activation='relu'),
        layers.Dropout(0.5),
        layers.Dense(10)
    ])

We construct a single-channel data instance with both height and width of 224 to observe the output shape of each layer. It matches our diagram above

In [None]:
X = tf.random.uniform(shape=(1, 224, 224,1))
for layer in alexnet().layers:
    X = layer(X)
    print(layer.__class__.__name__, 'output shape:\t', X.shape)

## VGG Blocks
<img src='../images/vvg.jpg'>
The function takes two arguments corresponding to the number of convolutional layers num_convs and the number of output channels num_channels


To read more on VGG visit :

<a href='https://arxiv.org/pdf/1409.1556.pdf'>VERY DEEP CONVOLUTIONAL NETWORKS FOR LARGE-SCALE IMAGE RECOGNITION</a>

<img src='../images/vgg.png'>
The original VGG network had 5 convolutional blocks, among which the first two have one convolutional layer each and the latter three contain two convolutional layers each but we will implement the vgg diagram above of which the first two have two convolutional layer each and the latter three contain three convolutional layers each.

The first block has 64 output channels and each subsequent block doubles the number of output channels, until that number reaches 512. Since this network uses 8 convolutional layers and 3 fully-connected layers.

In [None]:
def vgg_1():
    return tf.keras.models.Sequential([
        layers.Conv2D(filters=64,kernel_size=3,padding='same',activation='relu'),
        layers.Conv2D(filters=64,kernel_size=3,padding='same',activation='relu'),
        layers.MaxPool2D(pool_size=2,strides=2),
        
        layers.Conv2D(128,kernel_size=3,padding='same',activation='relu'),
        layers.Conv2D(128,kernel_size=3,padding='same',activation='relu'),
        layers.MaxPool2D(pool_size=2,strides=2),
        
        layers.Conv2D(256,kernel_size=3,padding='same',activation='relu'),
        layers.Conv2D(256,kernel_size=3,padding='same',activation='relu'),
        layers.Conv2D(256,kernel_size=3,padding='same',activation='relu'),
        layers.MaxPool2D(pool_size=2,strides=2),
        
        layers.Conv2D(512,kernel_size=3,padding='same',activation='relu'),
        layers.Conv2D(512,kernel_size=3,padding='same',activation='relu'),
        layers.Conv2D(512,kernel_size=3,padding='same',activation='relu'),
        layers.MaxPool2D(pool_size=2,strides=2),
        
        layers.Conv2D(512,kernel_size=3,padding='same',activation='relu'),
        layers.Conv2D(512,kernel_size=3,padding='same',activation='relu'),
        layers.Conv2D(512,kernel_size=3,padding='same',activation='relu'),
        layers.MaxPool2D(pool_size=2,strides=2),
        
        layers.Flatten(),
        layers.Dense(4096,activation='relu'),
        layers.Dropout(0.5),
        layers.Dense(4096,activation='relu'),
        layers.Dropout(0.5),
        layers.Dense(10)
        
    ])

# Or implemented as

In [None]:
def vgg_block(num_convs,filters):
    blk=tf.keras.models.Sequential()
    for _ in range(num_convs):
        blk.add(layers.Conv2D(filters,kernel_size=3,padding='same',activation='relu'))
    blk.add(layers.MaxPool2D(pool_size=2,strides=2))
    return blk

conv_arch = ((2, 64), (2, 128), (3, 256), (3, 512), (3, 512))

In [None]:
def vgg(conv_arch):
    blk=tf.keras.models.Sequential()
    for (num_convs,filters) in conv_arch:
        blk.add(vgg_block(num_convs,filters))
    blk.add(layers.Flatten())
    blk.add(layers.Dense(4096,activation='relu'))
    blk.add(layers.Dropout(0.5))
    blk.add(layers.Dense(4096,activation='relu'))
    blk.add(layers.Dropout(0.5))
    blk.add(layers.Dense(10))
    return blk

vgg_2 = vgg(conv_arch)

Next, we will construct a single-channel data example with a height and width of 224 to observe
the output shape of each layer.

In [None]:
X = tf.random.uniform(shape=(1, 224, 224,1))
for layer in vgg_1().layers:
    X = layer(X)
    print(layer.__class__.__name__, 'output shape:\t', X.shape)

In [None]:
X = tf.random.uniform(shape=(1, 224, 224,1))
for layer in vgg_2.layers:
    X = layer(X)
    print(layer.__class__.__name__, 'output shape:\t', X.shape)

##  Network in Network (NiN) BLOCK
<img src='../images/nin.jpg'/>
The NiN block consists of one convolutional layer followed by two 1 × 1 convolutional layers that
act as per-pixel fully-connected layers with ReLU activations. The convolution width of the first
layer is typically set by the user. The subsequent widths are fixed to 1 × 1

In [None]:
def nin_block(filters, kernel_size, strides,padding):
    return tf.keras.models.Sequential([
        layers.Conv2D(filters,kernel_size,strides,padding,activation='relu'),
        layers.Conv2D(filters,kernel_size=1,activation='relu'),
         layers.Conv2D(filters,kernel_size=1,activation='relu')    
    ])

In [None]:

def nin():
    return tf.keras.models.Sequential([
        nin_block(filters=96,kernel_size=11,strides=4,padding='valid'),
        layers.MaxPool2D(pool_size=3,strides=2),
        
        nin_block(filters=256,kernel_size=5,strides=1,padding='same'),
        layers.MaxPool2D(pool_size=3,strides=2),
        
        nin_block(filters=384,kernel_size=5,strides=1,padding='same'),
        layers.MaxPool2D(pool_size=3, strides=2),
        layers.Dropout(0.5),
        
        nin_block(filters=10,kernel_size=3,strides=1,padding='same'),
        layers.GlobalAvgPool2D(),
        tf.keras.layers.Reshape((1, 1, 10)),
        layers.Flatten()
        
    ])

In [None]:
X = tf.random.uniform(shape=(1, 224, 224,1))
for layer in nin().layers:
    X = layer(X)
    print(layer.__class__.__name__, 'output shape:\t', X.shape)

## INCEPTION
<img src="../images/inception.jpg" />

GoogLeNet uses a stack of a total of 9 inception blocks and global average pooling to generate its estimates. Maximum pooling between inception blocks reduced the
dimensionality. The first part is identical to AlexNet and LeNet, the stack of blocks is inherited
from VGG and the global average pooling avoids a stack of fully-connected layers at the end. The
architecture is depicted below
<img src="../images/inception1.jpg" />
<img src="../images/inception2.jpg" />

In [24]:
class Inception_block(tf.keras.Model):
    def __init__(self,c1,c2,c3,c4,**kwargs):
        super().__init__(**kwargs)
        # Path 1 is a single 1 x 1 convolutional layer
        self.p1_1=layers.Conv2D(filters=c1,kernel_size=1,activation='relu')
        # Path 2 is a 1 x 1 convolutional layer followed by a 3 x 3
        # convolutional layer
        self.p2_1=layers.Conv2D(filters=c2[0],kernel_size=1,activation='relu')
        self.p2_2=layers.Conv2D(filters=c2[1],kernel_size=3,padding='same',activation='relu')
         # Path 2 is a 1 x 1 convolutional layer followed by a 5 x 5
        # convolutional layer
        self.p3_1=layers.Conv2D(filters=c3[0],kernel_size=1,activation='relu')
        self.p3_2=layers.Conv2D(filters=c3[1],kernel_size=5,padding='same',activation='relu')
         # Path 4 is a 3 x 3 maximum pooling layer followed by a 1 x 1
        # convolutional layer
        self.p4_1=layers.MaxPool2D(pool_size=3,padding='same',strides=1)
        self.p4_2=layers.Conv2D(filters=c4,kernel_size=1,activation='relu')
    def call(self,x):
        p1=self.p1_1(x)
        p2=self.p2_2(self.p2_1(x))
        p3=self.p3_2(self.p3_1(x))
        p4=self.p4_2(self.p4_1(x))
        return layers.Concatenate()([p1, p2, p3, p4])

In [25]:
def inception():
    return tf.keras.models.Sequential([
        layers.Conv2D(filters=64,kernel_size=7,strides=2,padding='same',activation='relu'),
        layers.MaxPool2D(pool_size=3,padding='same',strides=2),
        
        layers.Conv2D(64,kernel_size=1,activation='relu'),
        layers.Conv2D(192,kernel_size=3,padding='same',activation='relu'),
        layers.MaxPool2D(pool_size=3,padding='same',strides=2),
        # inception(3a)
        Inception_block(c1=64,c2=(96,128),c3=(16,32),c4=32),
               # inception(3b)
        Inception_block(c1=128,c2=(128,192),c3=(32,96),c4=64),
        layers.MaxPool2D(pool_size=3,padding='same',strides=2),
        # inception(4a)
        Inception_block(c1=192,c2=(96,208),c3=(16,48),c4=64),
              # inception(4b)
        Inception_block(c1=160,c2=(112,224),c3=(24,64),c4=64),
        # inception(4c)
        Inception_block(c1=128,c2=(128,256),c3=(24,64),c4=64),
        # inception(4d)
        Inception_block(112,(144,288),(32,64),64),
        # inception(4e)
        Inception_block(256,(160,320),(32,128),128),
        layers.MaxPool2D(pool_size=3,padding='same',strides=2),
         # inception(5a)
        Inception_block(256, (160, 320), (32, 128), 128),
              # inception(5b)
        Inception_block(384, (192, 384), (48, 128), 128),
        layers.MaxPool2D(pool_size=3,padding='same',strides=2),
        layers.GlobalAvgPool2D(),
        layers.Flatten(),
        layers.Dropout(0.4),
        layers.Dense(10,activation='softmax')
    ])

In [26]:
X = tf.random.uniform(shape=(1, 1, 96, 96))
for layer in inception().layers:
    X = layer(X)
    print(layer.__class__.__name__, 'output shape:\t', X.shape)

Conv2D output shape:	 (1, 1, 48, 64)
MaxPooling2D output shape:	 (1, 1, 24, 64)
Conv2D output shape:	 (1, 1, 24, 64)
Conv2D output shape:	 (1, 1, 24, 192)
MaxPooling2D output shape:	 (1, 1, 12, 192)
Inception_block output shape:	 (1, 1, 12, 256)
Inception_block output shape:	 (1, 1, 12, 480)
MaxPooling2D output shape:	 (1, 1, 6, 480)
Inception_block output shape:	 (1, 1, 6, 512)
Inception_block output shape:	 (1, 1, 6, 512)
Inception_block output shape:	 (1, 1, 6, 512)
Inception_block output shape:	 (1, 1, 6, 528)
Inception_block output shape:	 (1, 1, 6, 832)
MaxPooling2D output shape:	 (1, 1, 3, 832)
Inception_block output shape:	 (1, 1, 3, 832)
Inception_block output shape:	 (1, 1, 3, 1024)
MaxPooling2D output shape:	 (1, 1, 2, 1024)
GlobalAveragePooling2D output shape:	 (1, 1024)
Flatten output shape:	 (1, 1024)
Dropout output shape:	 (1, 1024)
Dense output shape:	 (1, 10)


# RESNET
<p>To understand how resnet work read </p>
<a href='https://arxiv.org/pdf/1512.03385.pdf'>Deep Residual Learning for Image Recognition<a/>
    
<a href='https://arxiv.org/pdf/1603.05027.pdf'>Identity Mappings in Deep Residual Networks</a>
<img src="../images/resnet.jpg"  width='1000px'>
Source:  <a href='https://arxiv.org/pdf/1512.03385.pdf'>Deep Residual Learning for Image Recognition<a/>
  

In [27]:
class Residual(keras.models.Model):
    def __init__(self,num_filters,stride=1,downsample=False,**kwargs):
        super().__init__(**kwargs)
        self.cov1=layers.Conv2D(num_filters,kernel_size=3,strides=stride,padding='same')
        self.cov2=layers.Conv2D(num_filters,kernel_size=3,padding='same')
        if downsample:
            self.downsample=layers.Conv2D(num_filters,kernel_size=1,strides=stride)
        else:
            self.downsample=None
        self.bn1=layers.BatchNormalization()
        self.bn2=layers.BatchNormalization()
        self.relu=layers.ReLU()
    def call(self,x):
        hx=self.relu(self.bn1(self.cov1(x)))
        hx=self.bn2(self.cov2(hx))
        if self.downsample:
            x=self.downsample(x)
        return self.relu(x+hx)

In [28]:
def resnet_block(num_filters,num_residuals,first_block=False):
    blk=models.Sequential()
    for residual in range(num_residuals):
        if residual==0 and not first_block:
            blk.add(Residual(num_filters=num_filters,downsample=True,stride=2))
        else:
            blk.add(Residual(num_filters=num_filters))
    return blk
            

In [29]:
resnet34=models.Sequential([
          layers.Conv2D(64,kernel_size=7,strides=2,padding='same'),
          layers.BatchNormalization(),
          layers.Activation('relu'),
          layers.MaxPool2D(pool_size=3,strides=2,padding='same'),
          resnet_block(64,3,first_block=True),
          resnet_block(128,4),
          resnet_block(256,6),
          resnet_block(512,3),
          layers.GlobalAvgPool2D(),
          layers.Flatten(),
          layers.Dense(10)
         ])

In [30]:
X = tf.random.uniform(shape=(1, 224, 224,1))
for layer in resnet34.layers:
    X = layer(X)
    print(layer.__class__.__name__, 'output shape:\t', X.shape)

Conv2D output shape:	 (1, 112, 112, 64)
BatchNormalization output shape:	 (1, 112, 112, 64)
Activation output shape:	 (1, 112, 112, 64)
MaxPooling2D output shape:	 (1, 56, 56, 64)
Sequential output shape:	 (1, 56, 56, 64)
Sequential output shape:	 (1, 28, 28, 128)
Sequential output shape:	 (1, 14, 14, 256)
Sequential output shape:	 (1, 7, 7, 512)
GlobalAveragePooling2D output shape:	 (1, 512)
Flatten output shape:	 (1, 512)
Dense output shape:	 (1, 10)
