## VGG-16 (11 to 19 available)
### Reference
Karen Simonyan, Andrew Zisserman, Very Deep Convolutional Networks for Large-scale Image Recognition, ICLR, 2015.
[link](https://arxiv.org/pdf/1409.1556.pdf)
### Contents
* One of the classic convolutional networks
* Classifying the ILSVRC classification (1000 class)
### Keys
1. input is a fixed-size 224*224 RGB(3 channels) image
2. substracting the mean RGB value, computed on the training set
3. 3*3 filters with stride 1, padding 1 (same)
4. 2*2 max-pooling with stride 2
5. 3 fully-connected layers with channels of 4096, 4096, 1000
6. using ReLU for non-linearity
7. dropout for first two FC layers p 0.5
8. mini-batch gradient descent with momentum of batch size 256, momentum 0.9
9. L2 Reg lambda 5e-4
10. learning rate 1e-2, decreased by a factor of 10 for 3 times, during total of 74 epochs

<img src='../etc/images/VGG16_model-summary.png' width='500'>

In [3]:
import torch
import torchvision
from torchinfo import summary

import numpy as np


In [111]:
torch.manual_seed(42)

def make_conv2d(in_channels, out_channels):
    return torch.nn.Conv2d(
        in_channels=in_channels,
        out_channels=out_channels,
        kernel_size=3,
        stride=1,
        padding=1
    )

def make_maxpool2d():
    return torch.nn.MaxPool2d(
        kernel_size=2,
        stride=2
    )

class VGG16(torch.nn.Module):
    def __init__(self) -> None:
        super().__init__()
        self.features = torch.nn.Sequential(
            make_conv2d(in_channels=3, out_channels=64),
            torch.nn.ReLU(inplace=True),
            make_conv2d(in_channels=64, out_channels=64),
            torch.nn.ReLU(inplace=True),
            make_maxpool2d(),
            make_conv2d(in_channels=64, out_channels=128),
            torch.nn.ReLU(inplace=True),
            make_conv2d(in_channels=128, out_channels=128),
            torch.nn.ReLU(inplace=True),
            make_maxpool2d(),
            make_conv2d(in_channels=128, out_channels=256),
            torch.nn.ReLU(inplace=True),
            make_conv2d(in_channels=256, out_channels=256),
            torch.nn.ReLU(inplace=True),
            make_conv2d(in_channels=256, out_channels=256),
            torch.nn.ReLU(inplace=True),
            make_maxpool2d(),
            make_conv2d(in_channels=256, out_channels=512),
            torch.nn.ReLU(inplace=True),
            make_conv2d(in_channels=512, out_channels=512),
            torch.nn.ReLU(inplace=True),
            make_conv2d(in_channels=512, out_channels=512),
            torch.nn.ReLU(inplace=True),
            make_maxpool2d(),
            make_conv2d(in_channels=512, out_channels=512),
            torch.nn.ReLU(inplace=True),
            make_conv2d(in_channels=512, out_channels=512),
            torch.nn.ReLU(inplace=True),
            make_conv2d(in_channels=512, out_channels=512),
            torch.nn.ReLU(inplace=True),
            make_maxpool2d()
        )
        self.avgpool = torch.nn.Sequential(
            # torch.nn.AdaptiveAvgPool2d((7, 7)), # not found in the reference paper.
            torch.nn.Flatten()
        )
        self.classifier = torch.nn.Sequential(
            torch.nn.Linear(in_features=7*7*512, out_features=4096),
            torch.nn.ReLU(inplace=True),
            torch.nn.Dropout(p=0.5, inplace=False),
            torch.nn.Linear(in_features=4096, out_features=4096),
            torch.nn.ReLU(inplace=True),
            torch.nn.Dropout(p=0.5, inplace=False),
            torch.nn.Linear(in_features=4096, out_features=1000),
        )

    def forward(self, x):
        x = self.features(x)
        x = self.avgpool(x)
        x = self.classifier(x)
        return x

In [112]:
torch.manual_seed(42)
test = VGG16()
test

VGG16(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation

In [113]:
torch.manual_seed(42)
pre_trained = torchvision.models.vgg16()
pre_trained

VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1

In [114]:
torch.manual_seed(42)
test.eval()
pre_trained.eval()
with torch.inference_mode():
  inputx = torch.randn((2, 3, 224, 224)) * 100
  output_test = test(inputx)
  output_pre_trained = pre_trained(inputx)
print(output_test)
print(output_pre_trained)

tensor([[-0.0048,  0.0112,  0.0148,  ...,  0.0008, -0.0056,  0.0096],
        [-0.0048,  0.0112,  0.0148,  ...,  0.0008, -0.0056,  0.0096]])
tensor([[ -2.6868,   7.8514,   2.3464,  ..., -11.8453,  -2.2057,   2.6994],
        [ -2.5410,   8.1047,   0.3298,  ..., -12.0852,  -1.8896,   3.3896]])


In [115]:
print(next(test.parameters()))
print(next(pre_trained.parameters()))

Parameter containing:
tensor([[[[ 0.1471,  0.1597, -0.0451],
          [ 0.1768, -0.0422,  0.0388],
          [-0.0937,  0.1130,  0.1697]],

         [[-0.1412,  0.1673,  0.0360],
          [ 0.1422,  0.0261,  0.0928],
          [-0.0272,  0.1484,  0.0284]],

         [[-0.0898,  0.0491, -0.0887],
          [-0.0226, -0.0782,  0.1277],
          [-0.1519, -0.0887, -0.0543]]],


        [[[-0.1157,  0.0182, -0.1901],
          [ 0.1738, -0.1635,  0.1486],
          [ 0.0320, -0.0625,  0.1189]],

         [[ 0.0300,  0.1555,  0.0210],
          [-0.0607,  0.0517, -0.0522],
          [ 0.0810,  0.1718,  0.1112]],

         [[-0.0841,  0.1111,  0.0344],
          [ 0.0977, -0.1173, -0.1905],
          [-0.0744, -0.1476,  0.1579]]],


        [[[ 0.0554,  0.0797,  0.0609],
          [-0.0033,  0.1506, -0.1367],
          [ 0.0121, -0.1314,  0.0593]],

         [[-0.0663,  0.0590, -0.0401],
          [ 0.1596, -0.1141, -0.1148],
          [-0.1148,  0.1731,  0.0641]],

         [[ 0.1852, -0

In [116]:
test.load_state_dict(pre_trained.state_dict())

<All keys matched successfully>

In [117]:
print(next(test.parameters()))
print(next(pre_trained.parameters()))

Parameter containing:
tensor([[[[-6.6805e-02, -4.5083e-02, -2.7569e-02],
          [-1.1783e-01,  7.5383e-02, -1.1022e-03],
          [-7.4973e-02, -1.1999e-01, -2.6937e-02]],

         [[ 4.5549e-02, -2.9937e-02, -1.4615e-02],
          [ 6.5884e-03, -5.9312e-02, -1.5833e-02],
          [ 5.1877e-02,  8.2847e-02,  6.3982e-02]],

         [[-2.4044e-02,  9.8194e-02, -8.6355e-02],
          [ 1.0757e-01, -8.6304e-02,  1.1728e-02],
          [-6.1874e-02,  6.2070e-02,  1.9360e-02]]],


        [[[ 1.1409e-01, -1.6961e-02,  2.7671e-02],
          [ 3.7187e-03, -3.2966e-02,  8.5590e-02],
          [ 1.8437e-04, -2.8864e-02, -5.4187e-02]],

         [[ 2.4765e-02, -6.0196e-02,  5.3115e-05],
          [ 5.5539e-03,  4.4751e-02, -2.2867e-02],
          [-1.3001e-02,  2.5194e-02, -2.8777e-03]],

         [[-2.4727e-02, -2.6616e-02,  2.0157e-02],
          [-5.3808e-02, -6.5477e-03,  3.6809e-02],
          [-1.7112e-03, -3.2555e-02,  7.2222e-02]]],


        [[[-1.2698e-03,  3.7711e-02, -1.6679

In [118]:
test.eval()
pre_trained.eval()
with torch.inference_mode():
  inputx = torch.randn((2, 3, 224, 224)) * 100
  output_test = test(inputx)
  output_pre_trained = pre_trained(inputx)
print(output_test)
print(output_pre_trained)

tensor([[ -1.9673,   8.6262,   1.3732,  ..., -11.3348,  -2.6166,   3.2239],
        [ -2.5145,   7.2472,   0.4924,  ..., -11.8535,  -1.3859,   3.3113]])
tensor([[ -1.9673,   8.6262,   1.3732,  ..., -11.3348,  -2.6166,   3.2239],
        [ -2.5145,   7.2472,   0.4924,  ..., -11.8535,  -1.3859,   3.3113]])
