<center><h1>VGG</h1></center>

<center><p><a href="http://arxiv.org/abs/1409.1556">Very Deep Convolutional Networks for Large-Scale Image Recognition</a></p></center>

<img src="https://neurohive.io/wp-content/uploads/2018/11/vgg16-1-e1542731207177.png" width="1000"/>

In [1]:
import torch
from torch import nn

# VGG Blocks

In [2]:
def make_layers(cfg: list):
    layers = []
    in_channels = 3
    for i in cfg:
        if i == "M":
            layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
        else:
            conv2d = nn.Conv2d(in_channels, i, kernel_size=3, padding=1)
            layers += [conv2d, nn.ReLU(inplace=True)]
            in_channels = i
    return nn.Sequential(*layers)


cfgs = {
    'vgg11': [64, 'M', 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
    'vgg13': [64, 64, 'M', 128, 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
    'vgg16': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M'],
    'vgg19': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512, 512, 512, 'M', 512, 512, 512, 512, 'M'],
}

# VGG Network

In [3]:
class VGG(nn.Module):
    def __init__(self, features, num_classes=1000, init_weights=True, dropout=0.5):
        super().__init__()
        self.features = features  # 224 -> 112 -> 56 -> 28 -> 14 -> 7
        self.classifier = nn.Sequential(
            nn.Flatten(),
            nn.Linear(512 * 7 * 7, 4096),
            nn.ReLU(inplace=True),
            nn.Dropout(p=dropout),
            nn.Linear(4096, 4096),
            nn.ReLU(inplace=True),
            nn.Dropout(p=dropout),
            nn.Linear(4096, num_classes),
        )
        if init_weights:
            self._initialize_weights()

    def forward(self, x):
        x = self.features(x)
        x = self.classifier(x)
        return x

    def _initialize_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
                if m.bias is not None:
                    nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.Linear):
                nn.init.xavier_uniform_(m.weight)
                nn.init.constant_(m.bias, 0)

# Summary

## Data

In [4]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

data = torch.randn((32, 3, 224, 224)).to(device)

## VGG11

In [5]:
from torchkeras import summary

net = VGG(
    features=make_layers(cfg=cfgs['vgg11']),
    num_classes=1000,
).to(device)

summary(net, input_data=data)
del net

--------------------------------------------------------------------------
Layer (type)                            Output Shape              Param #
Conv2d-1                          [-1, 64, 224, 224]                1,792
ReLU-2                            [-1, 64, 224, 224]                    0
MaxPool2d-3                       [-1, 64, 112, 112]                    0
Conv2d-4                         [-1, 128, 112, 112]               73,856
ReLU-5                           [-1, 128, 112, 112]                    0
MaxPool2d-6                        [-1, 128, 56, 56]                    0
Conv2d-7                           [-1, 256, 56, 56]              295,168
ReLU-8                             [-1, 256, 56, 56]                    0
Conv2d-9                           [-1, 256, 56, 56]              590,080
ReLU-10                            [-1, 256, 56, 56]                    0
MaxPool2d-11                       [-1, 256, 28, 28]                    0
Conv2d-12                          [-

## VGG16

In [6]:
net = VGG(
    features=make_layers(cfg=cfgs['vgg16']),
    num_classes=1000,
).to(device)

summary(net, input_data=data)
del net

--------------------------------------------------------------------------
Layer (type)                            Output Shape              Param #
Conv2d-1                          [-1, 64, 224, 224]                1,792
ReLU-2                            [-1, 64, 224, 224]                    0
Conv2d-3                          [-1, 64, 224, 224]               36,928
ReLU-4                            [-1, 64, 224, 224]                    0
MaxPool2d-5                       [-1, 64, 112, 112]                    0
Conv2d-6                         [-1, 128, 112, 112]               73,856
ReLU-7                           [-1, 128, 112, 112]                    0
Conv2d-8                         [-1, 128, 112, 112]              147,584
ReLU-9                           [-1, 128, 112, 112]                    0
MaxPool2d-10                       [-1, 128, 56, 56]                    0
Conv2d-11                          [-1, 256, 56, 56]              295,168
ReLU-12                            [-