# 4. 完整的深度学习训练过程
    1. 遵循的步骤
    2. 各个步骤的实现
    

## 4.1 完成深度学习训练过程的步骤
    1. 定义模型网络
    2. 载入输入数据
    3. 定义优化器optim与损失函数
    4. 训练集进行训练：前向传播，反向传播，梯度更新
    5. 验证集验证

## 4.2 定义模型网络
    * 继承Module，定义一个类Vgg的网络
    * 该VGG网络，可以自定义网络结构

In [1]:
import torch
import torch.nn as nn
from torch.autograd import Variable

In [2]:
class VGG(nn.Module):
    # 在__init__中定义使用的Module
    # 在forward中，定义前向传播
    def __init__(self, layers):
        super(VGG, self).__init__()
        self.feature    = self.get_feature(layers)
        self.classifier = self.get_classifier()
    
    def get_feature(self, layers):
        
        sequence = []
        in_channels = 3
        for layer in layers:
            if layer == 'M':
                sequence += [nn.MaxPool2d(kernel_size=2, stride=2)]
            else:
                conv = nn.Conv2d(in_channels, layer, kernel_size=3, padding=1)
                sequence += [conv, nn.ReLU(inplace=True)]
                in_channels = layer
        return nn.Sequential(*sequence)
    
    def get_classifier(self):
        sequence = nn.Sequential(
            nn.Linear(512, 256),
            nn.ReLU(True),
            nn.Dropout(),
            nn.Linear(256, 256),
            nn.ReLU(True),
            nn.Dropout(),
            nn.Linear(256, 10),
        )
        return sequence
    
    def forward(self, x):
        x = self.feature(x)
        x = x.view(x.size(0), -1)
        x = self.classifier(x)
        return x        

In [3]:
# 创建一个VGG11网络
layers = [64, 'M', 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M']
vgg = VGG(layers)
print vgg

VGG (
  (feature): Sequential (
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU (inplace)
    (2): MaxPool2d (size=(2, 2), stride=(2, 2), dilation=(1, 1))
    (3): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (4): ReLU (inplace)
    (5): MaxPool2d (size=(2, 2), stride=(2, 2), dilation=(1, 1))
    (6): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU (inplace)
    (8): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU (inplace)
    (10): MaxPool2d (size=(2, 2), stride=(2, 2), dilation=(1, 1))
    (11): Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (12): ReLU (inplace)
    (13): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (14): ReLU (inplace)
    (15): MaxPool2d (size=(2, 2), stride=(2, 2), dilation=(1, 1))
    (16): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (17): ReLU (inplac

## 4.3 载入数据
    * 这里使用到了torchvision包，该包里包括了一些计算机视觉常用的模型、数据处理方法、数据
    * torchvision.transforms提供数据的预处理，以及一些数据增强的功能
    * torchvision.datasets中包括了许多常用数据集，如cifar10，MNIST
    * 后续具体介绍torchvision的使用

In [4]:
import torch
import torchvision
import torchvision.transforms as transforms

transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

trainset = torchvision.datasets.CIFAR10(root='../data', train=True,
                                        download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=4,
                                          shuffle=True, num_workers=2)

testset = torchvision.datasets.CIFAR10(root='../data', train=False,
                                       download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=4,
                                         shuffle=False, num_workers=2)

classes = ('plane', 'car', 'bird', 'cat',
           'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

Files already downloaded and verified
Files already downloaded and verified


## 4.4 定义优化器与损失函数

In [6]:
vgg.cuda()

VGG (
  (feature): Sequential (
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU (inplace)
    (2): MaxPool2d (size=(2, 2), stride=(2, 2), dilation=(1, 1))
    (3): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (4): ReLU (inplace)
    (5): MaxPool2d (size=(2, 2), stride=(2, 2), dilation=(1, 1))
    (6): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU (inplace)
    (8): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU (inplace)
    (10): MaxPool2d (size=(2, 2), stride=(2, 2), dilation=(1, 1))
    (11): Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (12): ReLU (inplace)
    (13): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (14): ReLU (inplace)
    (15): MaxPool2d (size=(2, 2), stride=(2, 2), dilation=(1, 1))
    (16): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (17): ReLU (inplac

In [7]:
import torch.optim as optim

# 定义Adam优化器
optim_Adam = optim.Adam(vgg.parameters())

# 定义交叉熵损失函数
criterion = nn.CrossEntropyLoss()

## 4.5 模型训练

In [39]:
epochs = 10

for step in range(epochs):
    
    one_step_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        inputs, labels = data
#         print inputs.size()
        inputs, labels = Variable(inputs), Variable(labels)
        
        pred = vgg(inputs)
        loss = criterion(pred, labels)
        
        one_step_loss += loss.data[0]
        
        optim_Adam.zero_grad()
        loss.backward()
        optim_Adam.step()
        
        if i % 2000 == 1999:    # print every 2000 mini-batches
            print('[%d, %5d] loss: %.3f' %
                  (epoch + 1, i + 1, running_loss / 2000))
            running_loss = 0.0

        
        

Process Process-14:
Process Process-13:
Traceback (most recent call last):
Traceback (most recent call last):
  File "/usr/share/Anaconda2/lib/python2.7/multiprocessing/process.py", line 258, in _bootstrap
  File "/usr/share/Anaconda2/lib/python2.7/multiprocessing/process.py", line 258, in _bootstrap
    self.run()
    self.run()
  File "/usr/share/Anaconda2/lib/python2.7/multiprocessing/process.py", line 114, in run
    self._target(*self._args, **self._kwargs)
  File "/usr/share/Anaconda2/lib/python2.7/multiprocessing/process.py", line 114, in run
  File "/usr/share/Anaconda2/lib/python2.7/site-packages/torch/utils/data/dataloader.py", line 28, in _worker_loop
    r = index_queue.get()
  File "/usr/share/Anaconda2/lib/python2.7/multiprocessing/queues.py", line 376, in get
    self._target(*self._args, **self._kwargs)
    racquire()
  File "/usr/share/Anaconda2/lib/python2.7/site-packages/torch/utils/data/dataloader.py", line 28, in _worker_loop
    r = index_queue.get()
  File "/usr/

KeyboardInterrupt: 