<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc"><ul class="toc-item"><li><span><a href="#VGG-Blocks" data-toc-modified-id="VGG-Blocks-1"><span class="toc-item-num">1&nbsp;&nbsp;</span>VGG Blocks</a></span></li></ul></div>

In [1]:
import torch
from torchsummary import summary
import torch.nn.functional as F
from torch import nn

## VGG Blocks
<img src='../images/vvg.jpg'>
The function takes two arguments corresponding to the number of convolutional layers num_convs and the number of output channels num_channels


To read more on VGG visit :

<a href='https://arxiv.org/pdf/1409.1556.pdf'>VERY DEEP CONVOLUTIONAL NETWORKS FOR LARGE-SCALE IMAGE RECOGNITION</a>

<img src='../images/vgg.png'>

In [2]:
vgg_1=nn.Sequential(
    nn.Conv2d(1,64,kernel_size=3,padding=1),nn.ReLU(),
    nn.Conv2d(64,64,kernel_size=3,padding=1),nn.ReLU(),nn.MaxPool2d(kernel_size=2, stride=2),
    
    nn.Conv2d(64,128,kernel_size=3,padding=1),nn.ReLU(),
    nn.Conv2d(128,128,kernel_size=3,padding=1),nn.ReLU(),nn.MaxPool2d(kernel_size=2, stride=2),
    
    nn.Conv2d(128,256,kernel_size=3,padding=1),nn.ReLU(),
    nn.Conv2d(256,256,kernel_size=3,padding=1),nn.ReLU(),
    nn.Conv2d(256,256,kernel_size=3,padding=1),nn.ReLU(),nn.MaxPool2d(kernel_size=2, stride=2),
    
    nn.Conv2d(256,512,kernel_size=3,padding=1),nn.ReLU(),
    nn.Conv2d(512,512,kernel_size=3,padding=1),nn.ReLU(),
    nn.Conv2d(512,512,kernel_size=3,padding=1),nn.ReLU(),nn.MaxPool2d(kernel_size=2, stride=2),
      
    nn.Conv2d(512,512,kernel_size=3,padding=1),nn.ReLU(),
    nn.Conv2d(512,512,kernel_size=3,padding=1),nn.ReLU(),
    nn.Conv2d(512,512,kernel_size=3,padding=1),nn.ReLU(),nn.MaxPool2d(kernel_size=2, stride=2),
    nn.Flatten(),
        # The fully-connected part
    nn.Linear(512 * 7 * 7, 4096), nn.ReLU(), nn.Dropout(0.5),
    nn.Linear(4096, 4096), nn.ReLU(), nn.Dropout(0.5),
    nn.Linear(4096, 10))

In [3]:
def vgg_block(num_convs, in_channels,out_channels):
    layers=[]
    for _ in range(num_convs):
        layers.append(nn.Conv2d(in_channels,out_channels, kernel_size=3,padding=1))
        layers.append(nn.ReLU())
        in_channels=out_channels
    layers.append(nn.MaxPool2d(kernel_size=2, stride=2))
    blk=nn.Sequential(*layers)
    return blk

The original VGG network had 5 convolutional blocks, among which the first two have one convolutional layer each and the latter three contain two convolutional layers each but we will implement the vgg diagram above of which the first two have two convolutional layer each and the latter three contain three convolutional layers each.

The first block has 64 output channels and each subsequent block doubles the number of output channels, until that number reaches 512. Since this network uses 8 convolutional layers and 3 fully-connected layers.

In [4]:
conv_arch = ((2, 64), (2, 128), (3, 256), (3, 512), (3, 512))

In [5]:
def vgg(conv_arch):
    # The convolutional part
    conv_blks=[]
    in_channels=1
    for (num_convs, out_channels) in conv_arch:
        conv_blks.append(vgg_block(num_convs, in_channels, out_channels))
        in_channels = out_channels

    return nn.Sequential(
        *conv_blks, 
        nn.Flatten(),
        # The fully-connected part
        nn.Linear(out_channels * 7 * 7, 4096), nn.ReLU(), nn.Dropout(0.5),
        nn.Linear(4096, 4096), nn.ReLU(), nn.Dropout(0.5),
        nn.Linear(4096, 10))

Next, we will construct a single-channel data example with a height and width of 224 to observe
the output shape of each layer.

In [6]:
vgg_2 = vgg(conv_arch)

In [7]:
X = torch.randn(size=(1, 1, 224, 224))
summary(vgg_2,X)

Layer (type:depth-idx)                   Output Shape              Param #
├─Sequential: 1-1                        [-1, 64, 112, 112]        --
|    └─Conv2d: 2-1                       [-1, 64, 224, 224]        640
|    └─ReLU: 2-2                         [-1, 64, 224, 224]        --
|    └─Conv2d: 2-3                       [-1, 64, 224, 224]        36,928
|    └─ReLU: 2-4                         [-1, 64, 224, 224]        --
|    └─MaxPool2d: 2-5                    [-1, 64, 112, 112]        --
├─Sequential: 1-2                        [-1, 128, 56, 56]         --
|    └─Conv2d: 2-6                       [-1, 128, 112, 112]       73,856
|    └─ReLU: 2-7                         [-1, 128, 112, 112]       --
|    └─Conv2d: 2-8                       [-1, 128, 112, 112]       147,584
|    └─ReLU: 2-9                         [-1, 128, 112, 112]       --
|    └─MaxPool2d: 2-10                   [-1, 128, 56, 56]         --
├─Sequential: 1-3                        [-1, 256, 28, 28]         --
|

Layer (type:depth-idx)                   Output Shape              Param #
├─Sequential: 1-1                        [-1, 64, 112, 112]        --
|    └─Conv2d: 2-1                       [-1, 64, 224, 224]        640
|    └─ReLU: 2-2                         [-1, 64, 224, 224]        --
|    └─Conv2d: 2-3                       [-1, 64, 224, 224]        36,928
|    └─ReLU: 2-4                         [-1, 64, 224, 224]        --
|    └─MaxPool2d: 2-5                    [-1, 64, 112, 112]        --
├─Sequential: 1-2                        [-1, 128, 56, 56]         --
|    └─Conv2d: 2-6                       [-1, 128, 112, 112]       73,856
|    └─ReLU: 2-7                         [-1, 128, 112, 112]       --
|    └─Conv2d: 2-8                       [-1, 128, 112, 112]       147,584
|    └─ReLU: 2-9                         [-1, 128, 112, 112]       --
|    └─MaxPool2d: 2-10                   [-1, 128, 56, 56]         --
├─Sequential: 1-3                        [-1, 256, 28, 28]         --
|

In [8]:
X = torch.randn(size=(1, 1, 224, 224))
summary(vgg_1,X)

Layer (type:depth-idx)                   Output Shape              Param #
├─Conv2d: 1-1                            [-1, 64, 224, 224]        640
├─ReLU: 1-2                              [-1, 64, 224, 224]        --
├─Conv2d: 1-3                            [-1, 64, 224, 224]        36,928
├─ReLU: 1-4                              [-1, 64, 224, 224]        --
├─MaxPool2d: 1-5                         [-1, 64, 112, 112]        --
├─Conv2d: 1-6                            [-1, 128, 112, 112]       73,856
├─ReLU: 1-7                              [-1, 128, 112, 112]       --
├─Conv2d: 1-8                            [-1, 128, 112, 112]       147,584
├─ReLU: 1-9                              [-1, 128, 112, 112]       --
├─MaxPool2d: 1-10                        [-1, 128, 56, 56]         --
├─Conv2d: 1-11                           [-1, 256, 56, 56]         295,168
├─ReLU: 1-12                             [-1, 256, 56, 56]         --
├─Conv2d: 1-13                           [-1, 256, 56, 56]        

Layer (type:depth-idx)                   Output Shape              Param #
├─Conv2d: 1-1                            [-1, 64, 224, 224]        640
├─ReLU: 1-2                              [-1, 64, 224, 224]        --
├─Conv2d: 1-3                            [-1, 64, 224, 224]        36,928
├─ReLU: 1-4                              [-1, 64, 224, 224]        --
├─MaxPool2d: 1-5                         [-1, 64, 112, 112]        --
├─Conv2d: 1-6                            [-1, 128, 112, 112]       73,856
├─ReLU: 1-7                              [-1, 128, 112, 112]       --
├─Conv2d: 1-8                            [-1, 128, 112, 112]       147,584
├─ReLU: 1-9                              [-1, 128, 112, 112]       --
├─MaxPool2d: 1-10                        [-1, 128, 56, 56]         --
├─Conv2d: 1-11                           [-1, 256, 56, 56]         295,168
├─ReLU: 1-12                             [-1, 256, 56, 56]         --
├─Conv2d: 1-13                           [-1, 256, 56, 56]        