In [1]:
import torch
from torchvision import datasets
from torchvision import transforms
# 用于数据预处理和转换
import torch.nn as nn
# 构建神经网络模型的模块
import torch.optim as optim

In [2]:
train_data = datasets.MNIST(
    root = "data/mnist",
    # 数据存储路径
    train = True,
    # 加载训练集
    transform = transforms.ToTensor(),
    # 将数据转换为张量
    download = True
    # 若数据不存在则下载
)
test_data = datasets.MNIST(
    root = "data/mnist",
    train = False,
    # 加载测试集
    transform = transforms.ToTensor(),
    download = True
)

In [3]:
batch_size = 100
train_loader = torch.utils.data.DataLoader(dataset=train_data,
                                           batch_size = batch_size,
                                           shuffle=True)
# torch.utils.data.DataLoader：用于将数据集包装成可迭代的批量数据
test_loader = torch.utils.data.DataLoader(dataset = test_data,
                                         batch_size = batch_size,
                                         shuffle=False)
# 训练集每次用epoch打乱数据顺序：增加数据随机性
# 测试集不打乱顺序：保持测试数据顺序稳定，便于评估

In [21]:
class MLP(nn.Module):
# 定义MLP类,继承自nn.Module,使MLP具备神经网络的基本功能
# nn.Module:Pytorch中所有神经网络模型的基类
    def __init__(self, input_size,hidden_size,num_classes):
        # input_size:输入数据的维度
        # hidden_size：隐藏层的神经元数量
        # num_classes:输出分类的数量
        super(MLP,self).__init__()
        # 调用父类初始化方法
        self.fc1 = nn.Linear(input_size,hidden_size)
        # 第一个全连接层，输入维度 -> 隐藏层维度
        self.relu = nn.ReLU()
        # 定义ReLu激活函数，引入非线性
        self.fc2 = nn.Linear(hidden_size,hidden_size)
        # 第二个全连接层，隐藏层 -> 隐藏层
        self.fc3 = nn.Linear(hidden_size, num_classes)
        # 第三个全连接层，隐藏层 -> 输出类别数
# nn.Linear:定义全连接层，参数为输入维度和输出维度
# super(MLP, self).__init__():必修调用父类初始化，确保模型正确继承nn.Module的功能
    def forward(self,x):
        out = self.fc1(x)
        # 第一层运算
        out = self.relu(out)
        # 对第一层的输出应用ReLU激活函数
        out = self.fc2(out)
        # 激活后的结果传入第二层全连接层
        out = self.relu(out)
        # 对第二层输出再次应用ReLU激活
        out = self.fc3(out)
        # 最后传入第三层全连接层，输出结果用于分类
        return out

In [22]:
input_size = 28*28
hidden_size = 512
# 隐藏层神经元数量为512,用于特征提取和特征转换
num_classes = 10
model = MLP(input_size, hidden_size, num_classes)

In [23]:
criterion = nn.CrossEntropyLoss()
# 定义交叉熵损失函数

In [24]:
learning_rate = 0.001
# 设置学习率
optimizer = optim.Adam(model.parameters(),lr = learning_rate)
# 使用Adam优化器，能够自适应调整各参数的学习率

In [26]:
num_epochs = 10
# 设置训练轮数
for epoch in range(num_epochs):
    for i,(images,labels) in enumerate(train_loader):
    # 通过数据加载其train_loader按批次获取图像和标签
        images = images.reshape(-1,28*28)
        # 将图像数据从二维(28*28)展平成一维(784,),以匹配模型输入维度(input_size = 28*28)
        outputs = model(images)
        # 将图像输入模型，进行前向传播
        loss = criterion(outputs,labels)
        # 使用交叉熵损失函数计算预测输出与真实标签间的损失
        optimizer.zero_grad()
        # 清空优化器中的梯度，避免梯度累积影响训练
        loss.backward()
        # 反向传播，根据损失计算模型各个参数的梯度
        optimizer.step()
        # 优化器根据计算的梯度更次年模型参数，完成一次参数迭代
        if(i + 1)% 100 == 0:
            print(f'Epoch[{epoch + 1}/{num_epochs}], Step[{i + 1}/{len(train_loader)}], Loss:{loss.item():.4f}')

Epoch[1/10], Step[100/600], Loss:0.2046
Epoch[1/10], Step[200/600], Loss:0.1341
Epoch[1/10], Step[300/600], Loss:0.1953
Epoch[1/10], Step[400/600], Loss:0.1062
Epoch[1/10], Step[500/600], Loss:0.1166
Epoch[1/10], Step[600/600], Loss:0.0838
Epoch[2/10], Step[100/600], Loss:0.1574
Epoch[2/10], Step[200/600], Loss:0.0855
Epoch[2/10], Step[300/600], Loss:0.0668
Epoch[2/10], Step[400/600], Loss:0.1025
Epoch[2/10], Step[500/600], Loss:0.0551
Epoch[2/10], Step[600/600], Loss:0.0845
Epoch[3/10], Step[100/600], Loss:0.0748
Epoch[3/10], Step[200/600], Loss:0.0768
Epoch[3/10], Step[300/600], Loss:0.0409
Epoch[3/10], Step[400/600], Loss:0.0642
Epoch[3/10], Step[500/600], Loss:0.0273
Epoch[3/10], Step[600/600], Loss:0.0495
Epoch[4/10], Step[100/600], Loss:0.0113
Epoch[4/10], Step[200/600], Loss:0.0453
Epoch[4/10], Step[300/600], Loss:0.0153
Epoch[4/10], Step[400/600], Loss:0.0051
Epoch[4/10], Step[500/600], Loss:0.1319
Epoch[4/10], Step[600/600], Loss:0.1750
Epoch[5/10], Step[100/600], Loss:0.0076


In [29]:
 with torch.no_grad():
# 关闭梯度计算,测试阶段无需反向传播,减少内存消耗
    correct = 0
    # 记录预测正确的样本数
    total = 0
    # 记录总样本数
    for images, labels in test_loader:
         images = images.reshape(-1, 28 * 28)
         outputs = model(images)
         _,predicted = torch.max(outputs.data,1)
         # 返回每行最大值的索引predicted
         total += labels.size(0)
         correct += (predicted == labels).sum().item()
    print (f'Accuracy of the network on the 10000 test images:{100*correct / total}%')

Accuracy of the network on the 10000 test images:98.22%


### 保存模型

In [32]:
torch.save(model,"mnist_m;p_model.pk1")