In [1]:
import torch
from torchvision import transforms
from torchvision import datasets
from torch.utils.data import DataLoader
import torch.nn.functional as F
import torch.optim as optim

In [2]:
batch_size = 64
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1307, ), (0.3801, ))
])

In [3]:
train_dataset = datasets.MNIST(root='./dataset/mnist/',
                               train=True,
                               download=False,
                               transform=transform)

train_loader = DataLoader(train_dataset,
                          shuffle=True,
                          batch_size=batch_size)

In [4]:
test_dataset = datasets.MNIST(root='./dataset/mnist/',
                               train=False,
                               download=False,
                               transform=transform)

test_loader = DataLoader(test_dataset,
                         shuffle=False,
                         batch_size=batch_size)

In [3]:
%%html
<img src='./picture/CNN4.jpg', width=600>
# 3×5×5的输入图像与3×3×3的卷积核做卷积运算得到的是1×3×3的输出图像
# 3×3的卷积核因为以最开始的4/6/8为中心，得到的特征图少上下共2行和左右共2列。5×5的则少上2行下2行，左2列，右2列
# 对于stride为1的3×3卷积核做运算，要想得到原图像一样的尺寸，则需要padding=1。

In [4]:
%%html
<img src='./picture/CNN3.jpg', width=600>
<img src='./picture/CNN5.jpg', width=600>
# 输入为n × h × w的图像经过一个卷积层，有m个卷积核，一个卷积核得到一个二维的特征图像，最后将m个特征图像拼接得到 m × h'× w'的图像
# 因此，该卷积层的卷积核是一个四维的张量 (m×n×h`×w`)，其中m为卷积核个数，n为输入通道数，h`× w`为卷积核尺寸

In [5]:
%%html
<img src='./picture/CNN1.jpg', width=600>
<img src='./picture/CNN2.jpg', width=800>
# 单通道28×28的图像经过第一层卷积运算，输出为10×24×24的特征图像，所以第一个卷积层卷积核设置为10×1×5×5
# 经过第一个池化层得到10×12×12的特征图像，所以第一个池化层的卷积核设置为2×2 (注意:池化层的卷积核是取最大值或求平均而不是做点乘)
# 经过第二层卷积运算，输出为20×8×8的特征图像，所以第二个卷积层卷积核设置为20×10×5×5
# 经过第二个池化层得到20×4×4的特征图像，所以第二个池化层的卷积核设置为2×2
# 最后经过一个全连接层将三位的特征矩阵拉乘一个向量，经过一个线性层得到10个输出。
# 因为特征图像一共有20×4×4=320个像素值，且输出为10分类，所以线性层设置为(320×10)

# 在实际中全连接层输入维度的设置，可以用pytorch设置一个随机的矩阵张量输入到神经网络中，然后直接查看最后一个输出层的输出尺寸，再计算
# 具体见Torch-Randn.py

In [6]:
class Net(torch.nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        # 卷积核只与输入的通道数有关，对于输入图片的大小无所谓。一个卷积层中卷积核的个数决定了下一个卷积层卷积核的通道数
        self.conv1 = torch.nn.Conv2d(1, 10, kernel_size=5)   # 卷积核尺寸为1×5×5,(其中1为通道数),个数为10
        self.conv2 = torch.nn.Conv2d(10, 20, kernel_size=5)  # 卷积核尺寸为10×5×5, (其中10为通道数),个数为20
        self.pooling = torch.nn.MaxPool2d(2)                 # 这里,最大池化层卷积核尺寸为2×2,默认步长为2
        self.fc = torch.nn.Linear(320, 10)                   # 
    
    def forward(self, x):
        # Flatten data from (n, 1, 28, 28) to (n, 784)
        batch_size = x.size(0)                               # 除去最后一次迭代，x.size都为(64,1,28,28)
        x = F.relu(self.pooling(self.conv1(x)))
        x = F.relu(self.pooling(self.conv2(x)))
        x = x.view(batch_size, -1)                           # flatten
        x = self.fc(x)
        return x

model = Net()
# 将参数和所有模块的缓存转换为CUDA Tensor
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
model.to(device)

Net(
  (conv1): Conv2d(1, 10, kernel_size=(5, 5), stride=(1, 1))
  (conv2): Conv2d(10, 20, kernel_size=(5, 5), stride=(1, 1))
  (pooling): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (fc): Linear(in_features=320, out_features=10, bias=True)
)

In [7]:
criterion = torch.nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.5)

In [8]:
def train(epoch):
    run_loss = 0
    for batch_idx, [inputs, labels] in enumerate(train_loader):
        # 将输入输出的每一步加载到显卡上
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()
        
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        run_loss += loss.item()
        
        if(batch_idx % 300 == 299):
            print('[%d, %5d], loss: %.3f' % (epoch + 1, batch_idx + 1, run_loss / 300))
            run_loss = 0

In [9]:
def test():
    correct = 0
    total = 0
    with torch.no_grad():
        for [inputs, labels] in test_loader:
            # 将输入输出的每一步加载到显卡上
            inputs, labels = inputs.to(device),labels.to(device) 
            
            outputs = model(inputs)
            _, pred = torch.max(outputs.data, dim=1)
            total += labels.size(0)
            correct += (pred == labels).sum().item()
    print('Accuracy on test set: %d %% [%d/%d]' % (100 * correct / total, correct, total))

In [10]:
if __name__ == '__main__':
    for epoch in range(10):
        train(epoch)
        test()

[1,   300], loss: 0.668
[1,   600], loss: 0.215
[1,   900], loss: 0.153
Accuracy on test set: 96 % [9655/10000]
[2,   300], loss: 0.123
[2,   600], loss: 0.105
[2,   900], loss: 0.093
Accuracy on test set: 97 % [9782/10000]
[3,   300], loss: 0.083
[3,   600], loss: 0.083
[3,   900], loss: 0.081
Accuracy on test set: 97 % [9780/10000]
[4,   300], loss: 0.069
[4,   600], loss: 0.069
[4,   900], loss: 0.070
Accuracy on test set: 98 % [9826/10000]
[5,   300], loss: 0.060
[5,   600], loss: 0.060
[5,   900], loss: 0.063
Accuracy on test set: 98 % [9838/10000]
[6,   300], loss: 0.054
[6,   600], loss: 0.055
[6,   900], loss: 0.051
Accuracy on test set: 98 % [9848/10000]
[7,   300], loss: 0.051
[7,   600], loss: 0.048
[7,   900], loss: 0.051
Accuracy on test set: 98 % [9870/10000]
[8,   300], loss: 0.043
[8,   600], loss: 0.048
[8,   900], loss: 0.048
Accuracy on test set: 98 % [9860/10000]
[9,   300], loss: 0.041
[9,   600], loss: 0.044
[9,   900], loss: 0.042
Accuracy on test set: 98 % [9863