In [1]:
import torch
from torchvision import transforms     # transforms 将pytorch读取到了PIL图像转为图像张量Tensor的以方便神经网络进行训练
from torchvision import datasets
from torch.utils.data import DataLoader
import torch.nn.functional as F
import torch.optim as optim

#### PIL或OpenCV读进来的图像张量一般都是H×W×C，为使神经网络进行更高效的训练一般转换为C×H×W且对像素值进行归一化

In [12]:
%%html
<img src='picture/image.jpg', width=200>

In [2]:
batch_size = 64
# Compose([]), 可以把[]里一系列的对象进行一系列的处理
transform = transforms.Compose([
    transforms.ToTensor(),                       # 将28×28的单通道图像转换为1×28×28的多通道的图像张量
    transforms.Normalize((0.1307, ), (0.3801, )) # 将图像的像素值进行归一化方便神经网络训练,Normalize((mean, ), (std, ))
])

In [3]:
train_dataset = datasets.MNIST(root='./dataset/mnist/',
                               train=True,
                               download=True,
                               transform=transform) # 读取第i个数据样本时，拿到的数据会直接用transform进行处理

train_loader = DataLoader(train_dataset,
                          shuffle=True,
                          batch_size=batch_size)

In [4]:
test_dataset = datasets.MNIST(root='./dataset/mnist/',
                              train=False,
                              download=True,
                              transform=transform)

test_loader = DataLoader(test_dataset,
                         shuffle=False,
                         batch_size=batch_size)

In [15]:
%%html
<img src='picture/quanlianjie.jpg', width=700>

In [5]:
class Net(torch.nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.l1 = torch.nn.Linear(784, 512) 
        self.l2 = torch.nn.Linear(512, 256)
        self.l3 = torch.nn.Linear(256, 128)
        self.l4 = torch.nn.Linear(128, 64)
        self.l5 = torch.nn.Linear(64, 10)
    
    def forward(self, x):
        # 全连接神经网络要求的输入样本是一个矩阵，因此对于N×1×28×28的图像，要将1×28×28三阶的张量变成一阶的向量即使用x.view()
        x = x.view(-1, 784)
        x = F.relu(self.l1(x))
        x = F.relu(self.l2(x))
        x = F.relu(self.l3(x))
        x = F.relu(self.l4(x))
        return self.l5(x)  # 注意:最后一层不做激活，直接将最后一层线性层的输出，输入到softmax层进行CrossEntropyLoss运算

model = Net()

In [6]:
criterion = torch.nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.5) 
# momentum冲量，给数据处理一个惯性值，以从局部最优解走出来尽可能找到全局最优解

In [7]:
def train(epoch):
    running_loss = 0.0
    # 一次送入 batch_size个样本，这里为64个， 样本总数为 64 × batch_idx
    for batch_idx, data in enumerate(train_loader, 0):
        # input.size() 为[64, 1, 28, 28]， target.size() 为[64]
        inputs, target = data
        optimizer.zero_grad()
        
        # forward + backward + update
        outputs = model(inputs)
        loss = criterion(outputs, target)
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
        # 每300次迭代输出一次
        if batch_idx % 300 == 299:
            # %5d表示的整型值占5位宽度，不足5位的话，在左边用空格补充
            # %f默认输出小数点后6位; %.3f，表示保留3位小数位
            print('[%d, %5d] loss: %.3f' % (epoch + 1, batch_idx + 1, running_loss / 300))
            running_loss = 0.0

In [8]:
def test():
    correct = 0                          # 正确的个数
    total = 0                            # 总数
    with torch.no_grad() :               # 使用torch.no_grad()在with里面的代码不会计算梯度
        # test一共有10,000个数据, 所以这里一次test()循环157次
        for data in test_loader:
            images, labels = data
            outputs = model(images)
            # 沿着第1个维度(行是第1个维度，列是第0个维度)去找最大值的下标,返回每一行的最大值和每一行的最大值下标
            _, predicted = torch.max(outputs.data, dim=1)  
            # total 前156次迭代一次加64， 最后一次迭代加剩余的样本数
            total += labels.size(0)
            # 每一次迭代都是判断64个predicted的值与64个labels的值是否相等，相等的个数为sum()的结果
            correct += (predicted == labels).sum().item()
    print('Accuracy on test set: %d %%' % (100 * correct / total)) # %% 表示输出为 %

In [9]:
if __name__ == '__main__':
    for epoch in range(10):
        train(epoch)
        test()

[1,   300] loss: 2.266
[1,   600] loss: 1.312
[1,   900] loss: 0.498
Accuracy on test set: 88 %
[2,   300] loss: 0.375
[2,   600] loss: 0.306
[2,   900] loss: 0.267
Accuracy on test set: 93 %
[3,   300] loss: 0.224
[3,   600] loss: 0.190
[3,   900] loss: 0.178
Accuracy on test set: 95 %
[4,   300] loss: 0.152
[4,   600] loss: 0.138
[4,   900] loss: 0.140
Accuracy on test set: 95 %
[5,   300] loss: 0.116
[5,   600] loss: 0.109
[5,   900] loss: 0.105
Accuracy on test set: 96 %
[6,   300] loss: 0.089
[6,   600] loss: 0.094
[6,   900] loss: 0.086
Accuracy on test set: 96 %
[7,   300] loss: 0.073
[7,   600] loss: 0.071
[7,   900] loss: 0.074
Accuracy on test set: 97 %
[8,   300] loss: 0.060
[8,   600] loss: 0.061
[8,   900] loss: 0.057
Accuracy on test set: 96 %
[9,   300] loss: 0.047
[9,   600] loss: 0.055
[9,   900] loss: 0.048
Accuracy on test set: 97 %
[10,   300] loss: 0.037
[10,   600] loss: 0.042
[10,   900] loss: 0.042
Accuracy on test set: 97 %
