# VGG16 in CIFAR-10

**使用 Pytorch 完成 VGG16 在 CIFAR-10 数据集上的分类**

VGG16 网络结构如下所示：

<img src="../Image/VGG16.png" width=40%>

各层参数，需要内存计算如下：

<img src="../Image/VGG16_params.png" width=100%>

In [2]:
import torch
import torchvision
import torch.nn as nn
import torchvision.transforms as transforms
import torch.nn.functional as F
import torch.optim as optim

import numpy as np

In [3]:
# Device configuaration
device =  torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [4]:
# Hyper-parameters
learning_rate = 0.001
batch_size = 64
num_epochs = 5

In [11]:
# Transform
transform = transforms.Compose(
        [transforms.ToTensor(),
         transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

train_dataset = torchvision.datasets.CIFAR10(root="../data",
                                            download=True,
                                            train=True,
                                            transform=transform)

test_dataset = torchvision.datasets.CIFAR10(root="../data",
                                           download=True,
                                           train=False,
                                           transform=transform)

train_loader = torch.utils.data.DataLoader(train_dataset, shuffle=True, batch_size=batch_size, num_workers=8)

test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, num_workers=8)

classes = ('plane', 'car', 'bird','cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

Files already downloaded and verified
Files already downloaded and verified


In [12]:
class VGG16(nn.Module):
    """
    Input - 224x224x3
    Conv1 - 224x224@64
    Conv2 - 224x224@64
    MaxPool1 - 112x112@64
    Conv3 - 112x112@128
    Conv4 - 112x112x128
    MaxPool2 - 56x56x128
    Conv5 - 56x56x256
    Conv6 - 56x56x256
    Conv7 - 56x56x256
    MaxPool3 - 28x28x256
    Conv8 - 28x28x512
    Conv9 - 28x28@512
    Conv10 - 28x28x512
    MaxPool4 - 14x14@512
    Conv11 - 14x14@512
    Conv12 - 14x14@512
    Conv13 - 14x14@512
    MaxPool5 - 7x7@512
    FC1 - 1x1@4096
    FC2 - 1x1@4096
    FC# - 1x1@1000
    """
    def __init__(self, num_classes=10):
        super(VGG16, self).__init__()
        
        self.convnet = nn.Sequential(
            # Conv1
            nn.Conv2d(in_channels=3, out_channels=64, kernel_size=3, padding=1),
            # Batch Normalization, to avoid gradient disapperance
            nn.BatchNorm2d(64),
            nn.ReLU(),
            
            # Conv2
            nn.Conv2d(in_channels=64, out_channels=64, kernel_size=3, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            
            # MaxPool
            nn.MaxPool2d(kernel_size=2, stride=2),
            
            # Conv3
            nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            
            # Conv4
            nn.Conv2d(in_channels=128, out_channels=128, kernel_size=3, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            
            # MaxPool
            nn.MaxPool2d(kernel_size=2, stride=2),
            
            # Conv5
            nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(),
            
            # Conv6
            nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(),
            
            # Conv7
            nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(),
            
            # MaxPool
            nn.MaxPool2d(kernel_size=2, stride=2),
            
            # Conv8
            nn.Conv2d(in_channels=256, out_channels=512, kernel_size=3, padding=1),
            nn.BatchNorm2d(512),
            nn.ReLU(),
            
            # Conv9
            nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, padding=1),
            nn.BatchNorm2d(512),
            nn.ReLU(),
            
            # Conv10
            nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, padding=1),
            nn.BatchNorm2d(512),
            nn.ReLU(),
            
            # MaxPool
            nn.MaxPool2d(kernel_size=2, stride=2),
            
            # Conv11
            nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, padding=1),
            nn.BatchNorm2d(512),
            nn.ReLU(),
            
            # Conv12
            nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, padding=1),
            nn.BatchNorm2d(512),
            nn.ReLU(),
            
            # Conv13
            nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, padding=1),
            nn.BatchNorm2d(512),
            nn.ReLU(),
            
            # MaxPool
            nn.MaxPool2d(kernel_size=2, stride=2),
            
            # AvgPool
            nn.AvgPool2d(kernel_size=1, stride=1))
        
        self.fc = nn.Sequential(
            # FC1
            nn.Linear(in_features=512, out_features=4096),
            nn.ReLU(),
            nn.Dropout(),
            
            # FC2
            nn.Linear(in_features=4096, out_features=4096),
            nn.ReLU(),
            nn.Dropout(),
            
            # FC3
            nn.Linear(in_features=4096, out_features=num_classes))
        
    def forward(self, x):
        output = self.convnet(x)
        output = output.view(output.size(0), -1)
        output = self.fc(output)
        return output

In [13]:
# Create an instance
vgg16 = VGG16()

In [14]:
# Loss function
criterion = nn.CrossEntropyLoss()
# Optimizer
optimizer = optim.Adam(vgg16.parameters(), lr=learning_rate)

## Train the model

In [15]:
for epoch in range(num_epochs):      # loop over the dataset multiple times
    
    running_loss = 0.0
    for i, (inputs, labels) in enumerate(train_loader, 0):
        
        inputs = inputs.to(device)
        labels = labels.to(device)
        
        # Zero gradient
        optimizer.zero_grad()
        
        # forward + backward + optimize
        outputs = vgg16(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        # print('[%d, %5d] loss: %.4f' %(epoch + 1, (i+1)*batch_size, loss.item()))
        
        
        # print statistics
        running_loss += loss.item()
        if i % 10 == 9:      # print every 2000 mini-batches
            print('[%d, %5d] loss: %.3f' % (epoch + 1, i+1, running_loss / 10))
            running_loss = 0.0
        

print('Finished Training')

[1,    10] loss: 2.519
[1,    20] loss: 2.348
[1,    30] loss: 2.313
[1,    40] loss: 2.306
[1,    50] loss: 2.297


Traceback (most recent call last):
Traceback (most recent call last):
  File "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/multiprocessing/queues.py", line 242, in _feed
    send_bytes(obj)
  File "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/multiprocessing/connection.py", line 200, in send_bytes
    self._send_bytes(m[offset:offset + size])
  File "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/multiprocessing/connection.py", line 404, in _send_bytes
    self._send(header + buf)
  File "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/multiprocessing/connection.py", line 368, in _send
    n = write(self._handle, buf)
BrokenPipeError: [Errno 32] Broken pipe
  File "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/multiprocessing/queues.py", line 242, in _feed
    send_bytes(obj)
  File "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/multiprocessing/connection.py", line 200, in send_by

KeyboardInterrupt: 

## Test the model

In [None]:
vgg16.eval()

total_correct = 0
avg_loss = 0.0

with torch.no_grad():
    for images, labels in test_loader:
        images = images.to(device)
        labels = labels.to(device)
        output = vgg16(images)
        avg_loss += criterion(output, labels)

        pred = torch.argmax(output.data, dim=1)
        total_correct += (pred == labels).sum().item()

    avg_loss = avg_loss / len(test_dataset)
    print("Test Avg. Loss: {}, Accuracy: {}%"
          .format(avg_loss, 100 * total_correct / len(test_dataset)))