# VGG16 in CIFAR-10 with GPU on Colab

**使用 Pytorch 完成 VGG16 在 CIFAR-10 数据集上的分类**

VGG16 网络结构如下所示：

<img src="../Image/VGG16.png" width=40%>

各层参数，需要内存计算如下：

<img src="../Image/VGG16_params.png" width=100%>

# Preparation

In [1]:
from google.colab import drive
drive.mount('/content/drive')

ModuleNotFoundError: No module named 'google.colab'

In [2]:
!pip3 install torch



In [3]:
!pip3 install torchvision



In [9]:
!pwd
!ls

/content/drive
'My Drive'


In [18]:
%cd '/content/drive/My Drive/Pytorch Learning/Neural Network'

/content/drive/My Drive/Pytorch Learning


In [68]:
!ls
!pwd

CNN_CIFAR10.ipynb  CNN.ipynb	LeNet5.ipynb
cnn.ckpt	   LeNet5.ckpt	VGG16_CIFAR10.ipynb
/content/drive/My Drive/Pytorch Learning/Neural Network


# Code

In [0]:
import torch
import torchvision
import torch.nn as nn
import torchvision.transforms as transforms
import torch.nn.functional as F
import torch.optim as optim

import numpy as np

In [34]:
# Device configuaration
device =  torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

device(type='cuda')

In [67]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cuda')

In [0]:
# Hyper-parameters
learning_rate = 0.001
batch_size = 10
num_epochs = 5

In [72]:
# Transform
transform = transforms.Compose(
        [transforms.ToTensor(),
         transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

train_dataset = torchvision.datasets.CIFAR10(root="../data",
                                            download=True,
                                            train=True,
                                            transform=transform)

test_dataset = torchvision.datasets.CIFAR10(root="../data",
                                           download=True,
                                           train=False,
                                           transform=transform)

train_loader = torch.utils.data.DataLoader(train_dataset, shuffle=True, batch_size=batch_size, num_workers=8)

test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, num_workers=8)

classes = ('plane', 'car', 'bird','cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

Files already downloaded and verified
Files already downloaded and verified


In [0]:
class VGG16(nn.Module):
    """
    Input - 224x224x3
    Conv1 - 224x224@64
    Conv2 - 224x224@64
    MaxPool1 - 112x112@64
    Conv3 - 112x112@128
    Conv4 - 112x112x128
    MaxPool2 - 56x56x128
    Conv5 - 56x56x256
    Conv6 - 56x56x256
    Conv7 - 56x56x256
    MaxPool3 - 28x28x256
    Conv8 - 28x28x512
    Conv9 - 28x28@512
    Conv10 - 28x28x512
    MaxPool4 - 14x14@512
    Conv11 - 14x14@512
    Conv12 - 14x14@512
    Conv13 - 14x14@512
    MaxPool5 - 7x7@512
    FC1 - 1x1@4096
    FC2 - 1x1@4096
    FC# - 1x1@1000
    """
    def __init__(self, num_classes=10):
        super(VGG16, self).__init__()
        
        self.convnet = nn.Sequential(
            # Conv1
            nn.Conv2d(in_channels=3, out_channels=64, kernel_size=3, padding=1),
            # Batch Normalization, to avoid gradient disapperance
            nn.BatchNorm2d(64),
            nn.ReLU(),
            
            # Conv2
            nn.Conv2d(in_channels=64, out_channels=64, kernel_size=3, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            
            # MaxPool
            nn.MaxPool2d(kernel_size=2, stride=2),
            
            # Conv3
            nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            
            # Conv4
            nn.Conv2d(in_channels=128, out_channels=128, kernel_size=3, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            
            # MaxPool
            nn.MaxPool2d(kernel_size=2, stride=2),
            
            # Conv5
            nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(),
            
            # Conv6
            nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(),
            
            # Conv7
            nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(),
            
            # MaxPool
            nn.MaxPool2d(kernel_size=2, stride=2),
            
            # Conv8
            nn.Conv2d(in_channels=256, out_channels=512, kernel_size=3, padding=1),
            nn.BatchNorm2d(512),
            nn.ReLU(),
            
            # Conv9
            nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, padding=1),
            nn.BatchNorm2d(512),
            nn.ReLU(),
            
            # Conv10
            nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, padding=1),
            nn.BatchNorm2d(512),
            nn.ReLU(),
            
            # MaxPool
            nn.MaxPool2d(kernel_size=2, stride=2),
            
            # Conv11
            nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, padding=1),
            nn.BatchNorm2d(512),
            nn.ReLU(),
            
            # Conv12
            nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, padding=1),
            nn.BatchNorm2d(512),
            nn.ReLU(),
            
            # Conv13
            nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, padding=1),
            nn.BatchNorm2d(512),
            nn.ReLU(),
            
            # MaxPool
            nn.MaxPool2d(kernel_size=2, stride=2),
            
            # AvgPool
            nn.AvgPool2d(kernel_size=1, stride=1))
        
        self.fc = nn.Sequential(
            # FC1
            nn.Linear(in_features=512, out_features=4096),
            nn.ReLU(),
            nn.Dropout(),
            
            # FC2
            nn.Linear(in_features=4096, out_features=4096),
            nn.ReLU(),
            nn.Dropout(),
            
            # FC3
            nn.Linear(in_features=4096, out_features=num_classes))
        
    def forward(self, x):
        output = self.convnet(x)
        output = output.view(output.size(0), -1)
        output = self.fc(output)
        return output

In [74]:
# Create an instance
vgg16 = VGG16()
vgg16.cuda()

VGG16(
  (convnet): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (4): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (5): ReLU()
    (6): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (7): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (9): ReLU()
    (10): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (12): ReLU()
    (13): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (14): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1

In [0]:
# Loss function
criterion = nn.CrossEntropyLoss()
# Optimizer
optimizer = optim.Adam(vgg16.parameters(), lr=learning_rate)

## Train the model

In [79]:
for epoch in range(num_epochs):      # loop over the dataset multiple times
    
    running_loss = 0.0
    for i, (inputs, labels) in enumerate(train_loader, 0):
        
        inputs = inputs.to(device)
        labels = labels.to(device)
        
        # Zero gradient
        optimizer.zero_grad()
        
        # forward + backward + optimize
        outputs = vgg16(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        # print('[%d, %5d] loss: %.4f' %(epoch + 1, (i+1)*batch_size, loss.item()))
        
        
        # print statistics
        running_loss += loss.item()
        if i % 1000 == 999:      # print every 2000 mini-batches
            print('[%d, %5d] loss: %.3f' % (epoch + 1, i+1, running_loss / 1000))
            running_loss = 0.0
        

print('Finished Training')

[1,  1000] loss: 1.980
[1,  2000] loss: 1.943
[1,  3000] loss: 1.939
[1,  4000] loss: 1.897
[1,  5000] loss: 1.853
[2,  1000] loss: 1.796
[2,  2000] loss: 1.724
[2,  3000] loss: 1.637
[2,  4000] loss: 1.598
[2,  5000] loss: 1.556
[3,  1000] loss: 1.505
[3,  2000] loss: 1.484
[3,  3000] loss: 1.427
[3,  4000] loss: 1.369
[3,  5000] loss: 1.317
[4,  1000] loss: 1.283
[4,  2000] loss: 1.227
[4,  3000] loss: 1.212
[4,  4000] loss: 1.182
[4,  5000] loss: 1.137
[5,  1000] loss: 1.103
[5,  2000] loss: 1.086
[5,  3000] loss: 1.065
[5,  4000] loss: 1.046
[5,  5000] loss: 1.025
Finished Training


## Test the model

In [80]:
vgg16.eval()

total_correct = 0
avg_loss = 0.0

with torch.no_grad():
    for images, labels in test_loader:
        images = images.to(device)
        labels = labels.to(device)
        output = vgg16(images)
        avg_loss += criterion(output, labels)

        pred = torch.argmax(output.data, dim=1)
        total_correct += (pred == labels).sum().item()

    avg_loss = avg_loss / len(test_dataset)
    print("Test Avg. Loss: {}, Accuracy: {}%"
          .format(avg_loss, 100 * total_correct / len(test_dataset)))

Test Avg. Loss: 0.09444749355316162, Accuracy: 66.01%


In [0]:
# Train the model
def train(num_epoch):
    """
    It is a function to train the LeNet5 model
    @param: num_epoch: the number of epochs
    """
    total_step = len(train_loader)
    for epoch in range(num_epoch):
        for i, (images, labels) in enumerate(train_loader):
            images = images.to(device)
            labels = labels.to(device)

            # Forward pass
            output = vgg16(images)
            loss = criterion(output, labels)

            # Backward and optimize
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            if (i + 1) % 1000 == 0:
                print("Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}"
                      .format(epoch + 1, num_epochs, i + 1, total_step, loss.item()))


In [0]:
def test():
    '''
    It is a function to test the model.
    '''
    net.eval()

    total_correct = 0
    avg_loss = 0.0

    with torch.no_grad():
        for images, labels in test_loader:
            images = images.to(device)
            labels = labels.to(device)
            output = vgg16(images)
            avg_loss += criterion(output, labels)

            pred = torch.argmax(output.data, dim=1)
            total_correct += (pred == labels).sum().item()

        avg_loss = avg_loss / len(test_dataset)
        print("Test Avg. Loss: {}, Accuracy: {}%"
              .format(avg_loss, 100 * total_correct / len(test_dataset)))


In [0]:
def main():
    """
    Main function
    """
    # Train
    train(num_epochs)
    # Test
    test()
    # Save the model checkpoint
    torch.save(vgg16.state_dict(), "VGG16_CIFAR10.ckpt")
    print("Saved model successfully!\n")

In [55]:
main()

KeyboardInterrupt: ignored