# VGGNet PyTorch Implementation


First we import the necessary libraries that we will use.

In [None]:
import torch
import torchvision

import tqdm
import matplotlib.pyplot as plt
import numpy as np

## VGGNet Implementation in PyTorch


In [None]:
class VGGNet11(torch.nn.Module):
    """
    The VGGNet-11 module.
    """

    def __init__(self, num_classes=1000):

        # Mandatory call to super class module.
        super(VGGNet11, self).__init__()

        b1 = torch.nn.Sequential(
            # Layer 1 - Convolution Layer - Nx3x244x244 -> Nx64x244x244
            torch.nn.Conv2d(in_channels=3, out_channels=64,
                            kernel_size=3),
            torch.nn.ReLU(inplace=True),

            # Layer 3 - Convolution Layer - Nx64x244x244 -> Nx64x112x112
            torch.nn.MaxPool2d(kernel_size=2, stride=2)
        )

        b2 = torch.nn.Sequential(

            # Layer 4 - Convolution Layer - Nx64x112x112 -> Nx128x112x112
            torch.nn.Conv2d(in_channels=64, out_channels=128,
                            kernel_size=3, padding=1),
            torch.nn.ReLU(inplace=True),

            # Layer 6 - Convolution Layer - Nx128x112x112 -> Nx128x56x56
            torch.nn.MaxPool2d(kernel_size=2, stride=2)
        )

        b3 = torch.nn.Sequential(

            # Layer 7 - Convolution Layer - Nx128x56x56 -> Nx256x56x56
            torch.nn.Conv2d(in_channels=128, out_channels=256,
                            kernel_size=3, padding=1),
            torch.nn.ReLU(inplace=True),

            # Layer 8 - Convolution Layer - Nx256x56x56 -> Nx256x56x56
            torch.nn.Conv2d(in_channels=256, out_channels=256,
                            kernel_size=3, padding=1),
            torch.nn.ReLU(inplace=True),

            # Layer 11 - Convolution Layer - Nx256x56x56 -> Nx128x28x28
            torch.nn.MaxPool2d(kernel_size=2, stride=2)
        )

        b4 = torch.nn.Sequential(

            # Layer 12 - Convolution Layer - Nx128x28x28 -> Nx512x28x28
            torch.nn.Conv2d(in_channels=256, out_channels=512,
                            kernel_size=3, padding=1),
            torch.nn.ReLU(inplace=True),

            # Layer 13 - Convolution Layer - Nx512x28x28 -> Nx512x28x28
            torch.nn.Conv2d(in_channels=512, out_channels=512,
                            kernel_size=3, padding=1),
            torch.nn.ReLU(inplace=True),

            # Layer 16 - Convolution Layer - Nx512x28x28 -> Nx512x14x14
            torch.nn.MaxPool2d(kernel_size=2, stride=2)
        )

        b5 = torch.nn.Sequential(

            # Layer 17 - Convolution Layer - Nx512x14x14 -> Nx512x14x14
            torch.nn.Conv2d(in_channels=512, out_channels=512,
                            kernel_size=3, padding=1),
            torch.nn.ReLU(inplace=True),

            # Layer 18 - Convolution Layer - Nx512x14x14 -> Nx512x14x14
            torch.nn.Conv2d(in_channels=512, out_channels=512,
                            kernel_size=3, padding=1),
            torch.nn.ReLU(inplace=True),

            # Layer 21 - Convolution Layer - Nx512x14x14 -> Nx512x7x7
            torch.nn.MaxPool2d(kernel_size=2, stride=2)
        )

        b6 = torch.nn.Sequential(

            # Layer 22 - Fully Connected Layer - Nx1x25088-> Nx1x4096
            torch.nn.Linear(in_features=512*7*7, out_features=4096),
            torch.nn.ReLU(inplace=True),

            # Layer 23 - Fully Connected Layer - Nx1x4096 -> Nx1x4096
            torch.nn.Linear(in_features=4096, out_features=4096),
            torch.nn.ReLU(inplace=True),

            # Layer 24 - Fully Connected Layer - Nx1x4096 -> Nx1xC
            torch.nn.Linear(in_features=4096, out_features=num_classes),
            torch.nn.Softmax(),
        )

        # Defining the feature extraction layers.
        self.feature_extractor = torch.nn.Sequential(b1, b2, b3, b4, b5)

        # Defining the classification layers.
        self.classifier = b6

    def forward(self, x):

        # Forward pass through the feature extractor - Nx3x224x224 -> Nx256x6x6
        x = self.feature_extractor(x)

        # Flattening the feature map - Nx256x6x6 -> Nx1x9216
        x = torch.flatten(x, 1)

        # Forward pass through the classifier - Nx1x9216 -> Nx1xnum_classes
        return self.classifier(x)



## Loading the CIFAR-100 dataset



In [None]:
# Defining a transform for the images.
transform = torchvision.transforms.Compose(
    [torchvision.transforms.Resize((244,244)), torchvision.transforms.ToTensor(), 
    torchvision.transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]
)

# Loading the training and validation data.
train_set = torchvision.datasets.CIFAR100(root='./cifar-100', train=True, download=True, transform=transform)
train_loader = torch.utils.data.DataLoader(train_set, batch_size=4, shuffle=True, num_workers=2)
#val_loader = torch.utils.data.DataLoader(train_set, batch_size=4, shuffle=True, num_workers=2)

# Loading the testing data.
test_set = torchvision.datasets.CIFAR100(root='./cifar-100', train=False, download=True, transform=transform)
test_loader = torch.utils.data.DataLoader(test_set, batch_size=4, shuffle=False, num_workers=2)
val_loader = torch.utils.data.DataLoader(test_set, batch_size=4, shuffle=False, num_workers=2)

# Defining the classes.
classes = ('beaver', 'dolphin', 'otter', 'seal', 'whale',
    'aquarium fish', 'flatfish', 'ray', 'shark', 'trout',
    'orchids', 'poppies', 'roses', 'sunflowers', 'tulips',
	'bottles', 'bowls', 'cans', 'cups', 'plates',
    'apples', 'mushrooms', 'oranges', 'pears', 'sweet peppers',
	'clock', 'computer keyboard', 'lamp', 'telephone', 'television',
    'bed', 'chair', 'couch', 'table', 'wardrobe',
	'bee', 'beetle', 'butterfly', 'caterpillar', 'cockroach',
	'bear', 'leopard', 'lion', 'tiger', 'wolf',
	'bridge', 'castle', 'house', 'road', 'skyscraper',
	'cloud', 'forest', 'mountain', 'plain', 'sea',
	'camel', 'cattle', 'chimpanzee', 'elephant', 'kangaroo',
	'fox', 'porcupine', 'possum', 'raccoon', 'skunk',
	'crab', 'lobster', 'snail', 'spider', 'worm',
	'baby', 'boy', 'girl', 'man', 'woman',
	'crocodile', 'dinosaur', 'lizard', 'snake', 'turtle',
	'hamster', 'mouse', 'rabbit', 'shrew', 'squirrel',
	'maple', 'oak', 'palm', 'pine', 'willow',
	'bicycle', 'bus', 'motorcycle', 'pickup truck', 'train',
	'lawn-mower', 'rocket', 'streetcar', 'tank', 'tractor',)

### Showing Sample Images

To confirm that the data was loaded correctly, we design a function below to show some sample images from the dataset.

In [None]:
def show_image(image):
    image = image / 2 + 0.5
    npimg = image.numpy()
    plt.imshow(np.transpose(npimg, (1, 2, 0)))
    plt.show()

def show_sample_images():

    # get some random training images
    dataiter = iter(train_loader)
    images, labels = dataiter.next()

    # Showing the image(s).
    show_image(torchvision.utils.make_grid(images))

    # Printing the labels.
    print(' '.join('%5s' % classes[labels[j]] for j in range(4)))

show_sample_images()

## Using the model

Before we proceed, we check to see your the machine has a GPU installed. If so, we use the GPU for training, else, we use the CPU.

In [None]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

In [None]:
model = VGGNet11(100).to(device)
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

## Training the model

We devise a function below to train the model. We use regular Stochastic Gradient Descent and Mean Squared Error loss for training as defined by LeCun.

In [None]:
def train(model, train_loader, val_loader, optimizer, epochs):

    # Iterate for several epochs
    for epoch in tqdm.trange(epochs):
        train_loss = val_loss = 0.0

        for phase in ['train', 'val']:
            
            if phase == 'train':
                loader = train_loader
                model.train(True)
            else:
                loader = val_loader
                model.train(False)

            # Iterate for each data item in the training set
            for i, data in enumerate(loader, 0):
                
                # Get the sample input data.
                inputs, labels = data[0].to(device), data[1].to(device)

                # Reset the gradients
                optimizer.zero_grad()

                # Perform forward pass.
                outputs = model(inputs)

                # Calculate current model loss.
                loss = criterion(outputs, labels)

                if phase == 'train':
                     
                     # Perform backward pass.
                    loss.backward()
                    optimizer.step()

                    train_loss += loss.item()
                else:
                    val_loss += loss.item()
        
        print('Epoch %d - Train Loss: %.3f Validation Loss %.3f' % (epoch + 1, train_loss/len(train_loader), val_loss/len(val_loader)))

    print('Finished Training')

train(model, train_loader, val_loader, optimizer, 5)

## Evaluating the Model

We evaluate the model on a ransom sample of test data to see how well it performs.

In [None]:
def test(model, test_loader):

    dataiter = iter(test_loader)
    images, labels = dataiter.next()

    # Showing the test images
    show_image(torchvision.utils.make_grid(images))
    print('GroundTruth: ', ' '.join('%5s' % classes[labels[j]] for j in range(4)))

    outputs = model(images.to(device))
    _, predicted = torch.max(outputs, 1)

    print('Predicted: ', ' '.join('%5s' % classes[predicted[j]] for j in range(4)))

test(model, test_loader)

### Evaluating on the entire dataset

We further evaluate the model's performance on the entire dataset.

In [None]:
def test_full(model, test_loader):

    correct = 0
    total = 0
    with torch.no_grad():
        for data in test_loader:
            images, labels = data[0].to(device), data[1].to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    print('Accuracy of the network on the 10000 test images: %d %%' % (
        100 * correct / total))

test_full(model, test_loader)

### Evaluating class by class

We further evaluate the class by class accuracy of the model

In [None]:
def test_class(model, test_loader):

    class_correct = list(0. for i in range(10))
    class_total = list(0. for i in range(10))
    with torch.no_grad():
        for data in test_loader:
            images, labels = data[0].to(device), data[1].to(device)
            outputs = model(images.to(device))
            _, predicted = torch.max(outputs, 1)
            c = (predicted == labels).squeeze()
            for i in range(4):
                label = labels[i]
                class_correct[label] += c[i].item()
                class_total[label] += 1


    for i in range(10):
        print('Accuracy of %5s : %2d %%' % (
            classes[i], 100 * class_correct[i] / class_total[i]))

test_class(model, test_loader)