# VGG Network from Scratch in PyTorch

VGG stands for the Vision Geometry Group from the University of Oxford. The VGG net is a Deep Learning model based on Convolutional Neural Networks (CNN) and defined in the paper "Very Deep Convolutional Networks for Large-Scale Image Recognition" (https://arxiv.org/abs/1409.1556).

In the paper, the authors introduced not one but six different network configurations for the VGG neural network models. Each of them has a different neural network architecture. The VGG-16 has 13 convolutional and 3 fully-connected layers, carrying with them the ReLU tradition from AlexNet. This network stacks more layers onto AlexNet, and use smaller size filters ($2 \times 2$ and $3 \times 3$). It consists of 138M parameters and takes up about 500MB of storage space. There is also a deeper variant, VGG-19, and the simplest of all the configurations, VGG-11.

In [1]:
import time
import math
import numpy as np
import matplotlib.pyplot as plt
import inspect
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.optim.lr_scheduler import ReduceLROnPlateau

In [2]:
device= 'cuda' if torch.cuda.is_available() else 'cpu'

In [3]:
# count how many trainable weights the model has
def count_parameters(model) -> None:
    total_params= sum(p.numel() for p in model.parameters() if p.requires_grad)
    print(f'Number of parameters: {total_params}')

In [17]:
# vgg_= (num conv channels, norm presence, pool presence)
model_parameters= {}
model_parameters['vgg11']= (
    [64,128,256,256,512,512,512,512],  # Channels
    [1,1,1,1,1,1,1,1],                 # BatchNorm presence  (1 for yes, 0 for no)
    [1,1,0,1,0,1,0,1]                  # MaxPooling presence (1 for yes, 0 for no)
)
model_parameters['vgg13']= (
    [64,64,128,128,256,256,512,512,512,512],
    [1]*10,
    [0,1,0,1,0,1,0,1,0,1]
)
model_parameters['vgg16']= (
    [64,64,128,128,256,256,256,512,512,512,512,512,512],
    [1]*13,
    [0,1,0,1,0,0,1,0,0,1,0,0,1]
)
model_parameters['vgg19']= (
    [64,64,128,128,256,256,256,256,512,512,512,512,512,512,512,512],
    [1]*16,
    [0,1,0,1,0,0,0,1,0,0,0,1,0,0,0,1]
)

In [13]:
class ConvLayer(nn.Module):
    """
    Implements one customizable CNN layer.
    VGG-style: Input -> Conv2d -> BatchNorm2d -> ReLU -> MaxPool2d -> Output
    """

    def __init__(self, in_channels, out_channels, norm=True, activation=None, pool=True) -> None:
        super(ConvLayer, self).__init__()
        self.conv= nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=1, padding=1)
        # Batch Normalization -- not introduced when VGG models came out
        self.norm= nn.BatchNorm2d(out_channels) if norm else None
        # Activation function -- ReLU is default in VGG
        self.activation= nn.ReLU(inplace=True) if activation is None else activation
        # Max pooling layer -- halves the feature maps each time
        self.pool= nn.MaxPool2d(kernel_size=2, stride=2) if pool else None


    def forward(self, x):
        x= self.conv(x)
        if self.norm is not None:
            x= self.norm(x)
        x= self.activation(x)
        if self.pool is not None:
            x= self.pool(x)

        return x


In [14]:
class MLPLayer(nn.Module):
    """
    Implements one customizable MLP layer.
    AlexNet/VGG-style: Input -> Dropout -> Linear -> ReLU -> Output
    Dropout is placed before the FC layer
    """

    def __init__(self, in_dim, out_dim, activation=None, dropout=0.0) -> None:
        super(MLPLayer, self).__init__()
        # Dropout layer for regularization
        self.dropout= nn.Dropout(p=dropout) if dropout> 0.0 else None
        # Fully connected (FC) layer
        self.fc= nn.Linear(in_dim, out_dim)
        # Activation function -- ReLU is the default in VGG
        self.activation= activation


    def forward(self, x):
        if self.dropout is not None:
            x= self.dropout(x)
        x= self.fc(x)
        if self.activation is not None:
            x= self.activation(x)

        return x


In [18]:
class VGG(nn.Module):
    """
    Initializes the VGG architecture based on the provided variant.
    """

    def __init__(self, vgg_type, in_channels, num_classes, activation=None, dropout=0.1) -> None:
        super(VGG, self).__init__()
        # Channels along with BatchNorm2d and MaxPool2d presence for each ConvLayer
        channels= vgg_type[0]
        norms   = vgg_type[1]
        maxpools= vgg_type[2]
        # Define the activation function -- ReLU is default in VGG
        activation= nn.ReLU(inplace=True) if activation is None else activation

        # The convolutional feature extractor
        self.conv_layers= nn.ModuleList(
            [ConvLayer(in_channels, channels[0], norms[0], activation, maxpools[0])]
        )
        for i in range(1, len(channels)):
            self.conv_layers.append(
                ConvLayer(channels[i-1], channels[i], norms[i], activation, maxpools[i])
            )
        # Flatten the 2D feature maps into 1D feature vectors
        self.flatten= nn.Flatten(start_dim=1)
        # The classification head -- FC linear layers
        self.fc = MLPLayer(7*7*512, 4096, activation, dropout)
        self.fc1= MLPLayer(4096, 4096, activation, dropout)
        self.fc2= MLPLayer(4096, num_classes, activation=None, dropout=0.0)


    def forward(self, x):
        # Processes the input through the convolutional layers
        for layer in self.conv_layers:
            x= layer(x)
        x= self.flatten(x)
        # Runs the feature vector through the classification head to generate predictions
        x= self.fc2(self.fc1(self.fc(x)))

        return x


In [19]:
img= torch.randn(1, 3, 224, 224).to(device)
model= VGG(model_parameters['vgg16'], in_channels=3, num_classes=1000, dropout=0.1).to(device)
count_parameters(model)
print(model(img).shape)

model

Number of parameters: 138365992
torch.Size([1, 1000])


VGG(
  (conv_layers): ModuleList(
    (0): ConvLayer(
      (conv): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (norm): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (activation): ReLU(inplace=True)
    )
    (1): ConvLayer(
      (conv): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (norm): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (activation): ReLU(inplace=True)
      (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    )
    (2): ConvLayer(
      (conv): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (norm): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (activation): ReLU(inplace=True)
    )
    (3): ConvLayer(
      (conv): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (norm): BatchNorm2d(128, eps=1e-05, momentum=0.1, a

In [None]:
# https://medium.com/@ilaslanduzgun/create-vgg-from-scratch-in-pytorch-aa194c269b55
# https://www.digitalocean.com/community/tutorials/vgg-from-scratch-pytorch
# https://debuggercafe.com/implementing-vgg11-from-scratch-using-pytorch/