# 导入包
> 本节代码以手写数字分类为数据集，训练逻辑回归

In [1]:
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms

# 加载数据集
由于要设计网络结构，设置输入输出，所以对于一个未知的数据集，应该先获取其数据集的相关信息，再来设计网络。

In [35]:
# @root 数据集存储位置
# @train 是否为训练集
# @transform 转换器，将图像转换成tensor
# @download 如果指定位置数据集不存在，则下载到指定位置
train_dataset = torchvision.datasets.MNIST(root='data', train=True, transform=transforms.ToTensor(),download=True)
test_dataset = torchvision.datasets.MNIST(root='data',train=False,transform=transforms.ToTensor(),download=True)

# 查看数据图片情况
print('数据集情况', train_dataset[0][0].size())
# 查看标签情况
print('标签情况', train_dataset.classes)
# 查看数据集大小
print('训练集大小', len(train_dataset))
print('测试集大小', len(test_dataset))

数据集情况 torch.Size([1, 28, 28])
标签情况 ['0 - zero', '1 - one', '2 - two', '3 - three', '4 - four', '5 - five', '6 - six', '7 - seven', '8 - eight', '9 - nine']
训练集大小 60000
测试集大小 10000


# 设置超参数

1. 输入数据的大小 input_size
2. 输出类别的大小 num_classes
3. 迭代次数      epochs
4. 每批次数量    batch_size

In [36]:
# 由于每一张图片时 1*28*28的，所以数据大小为1*28*28
input_size = 1 * 28 * 28
# 输入classer有10类，所以num_classes 设置为 10
num_classes = 10
num_epochs = 5
batch_size = 100
learning_rate = 0.001


# 模型建立

In [42]:
# 加载数据集
train_dataloader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=batch_size,shuffle=True)
test_dataloader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=batch_size,shuffle=False)
model = nn.Linear(input_size, num_classes)
# 交叉商损失函数，结合了对数似然损失和softmax多分类，也就不用在模型上加上softmax
# 这一点跟纯numpy实现的还不太一样
loss_fn = nn.CrossEntropyLoss() 
optmizer = torch.optim.SGD(model.parameters(), lr = learning_rate)


# 循环训练模型

In [43]:
total_step = len(train_dataloader)
for epoch in range(num_epochs):
    for i,(images, labels) in enumerate(train_dataloader):
        # 因为输入大小时28*28,所以要将图像展开为28*28
        # 如果第一个值是-1，那么会根据剩下的值来计算最终的维度
        # 也就是保证一定有28*28列，多少行由计算决定
        images = images.reshape(-1, input_size)

        # 前向传播输入模型
        outputs = model(images)

        # 计算误差
        loss = loss_fn(outputs, labels)

        # 反向传播计算梯度，优化
        optmizer.zero_grad()
        loss.backward()
        optmizer.step()

        if (i+1) % 100 == 0:
            print('Epoch [{}/{}], step[{}/{}], Loss:{:.4f}'.format(epoch+1,num_epochs, i+1,total_step,loss.item()))


Epoch [1/5], step[100/600], Loss:2.2030
Epoch [1/5], step[200/600], Loss:2.0914
Epoch [1/5], step[300/600], Loss:1.9912
Epoch [1/5], step[400/600], Loss:1.9087
Epoch [1/5], step[500/600], Loss:1.8025
Epoch [1/5], step[600/600], Loss:1.8199
Epoch [2/5], step[100/600], Loss:1.7914
Epoch [2/5], step[200/600], Loss:1.6084
Epoch [2/5], step[300/600], Loss:1.6446
Epoch [2/5], step[400/600], Loss:1.5335
Epoch [2/5], step[500/600], Loss:1.5206
Epoch [2/5], step[600/600], Loss:1.5038
Epoch [3/5], step[100/600], Loss:1.3958
Epoch [3/5], step[200/600], Loss:1.3825
Epoch [3/5], step[300/600], Loss:1.2799
Epoch [3/5], step[400/600], Loss:1.2613
Epoch [3/5], step[500/600], Loss:1.3486
Epoch [3/5], step[600/600], Loss:1.2173
Epoch [4/5], step[100/600], Loss:1.1636
Epoch [4/5], step[200/600], Loss:1.1495
Epoch [4/5], step[300/600], Loss:1.1194
Epoch [4/5], step[400/600], Loss:1.1845
Epoch [4/5], step[500/600], Loss:1.1303
Epoch [4/5], step[600/600], Loss:1.1192
Epoch [5/5], step[100/600], Loss:1.0622


# 测试模型的准确性

In [44]:

 # 使用no_grad()区块，测试时无需 计算梯度
with torch.no_grad():
    correct = 0
    total = 0
    # 100 张图片，100个标签
    for images, labels in test_dataloader:
        images = images.reshape(-1, input_size)
        outputs = model(images)
        # 找到最大的那一类
        _, predicted = torch.max(outputs.data, 1)
        # 获取总数
        total += labels.size(0)
        correct += (predicted == labels).sum()
    print('准确率为：{} %'.format(100*correct/total))

准确率为：82.81999969482422 %


# 延伸
将模型和tensor放到cuda上训练
+ 将所有的数据集都放到cuda上
+ 将模型也放到cuda上

In [51]:
model2 = model.cuda()
total_step = len(train_dataloader)
for epoch in range(num_epochs):
    for i,(images, labels) in enumerate(train_dataloader):
        # 因为输入大小时28*28,所以要将图像展开为28*28
        # 如果第一个值是-1，那么会根据剩下的值来计算最终的维度
        # 也就是保证一定有28*28列，多少行由计算决定
        images = images.reshape(-1, input_size)
        # 放入GPU
        images = images.cuda()
        labels = labels.cuda()
        # 前向传播输入模型
        outputs = model2(images)

        # 计算误差
        loss = loss_fn(outputs, labels)

        # 反向传播计算梯度，优化
        optmizer.zero_grad()
        loss.backward()
        optmizer.step()

        if (i+1) % 100 == 0:
            print('Epoch [{}/{}], step[{}/{}], Loss:{:.4f}'.format(epoch+1,num_epochs, i+1,total_step,loss.item()))

with torch.no_grad():
    total = 0
    correct = 0
    for images, labels in test_dataloader:
        images = images.reshape(-1, input_size)
        images= images.cuda()
        labels = labels.cuda()
        outputs = model2(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum()
    print('准确率为:{}%'.format(100*correct/total))

Epoch [1/5], step[100/600], Loss:0.6235
Epoch [1/5], step[200/600], Loss:0.7005
Epoch [1/5], step[300/600], Loss:0.6553
Epoch [1/5], step[400/600], Loss:0.6716
Epoch [1/5], step[500/600], Loss:0.6629
Epoch [1/5], step[600/600], Loss:0.6344
Epoch [2/5], step[100/600], Loss:0.5944
Epoch [2/5], step[200/600], Loss:0.6103
Epoch [2/5], step[300/600], Loss:0.7032
Epoch [2/5], step[400/600], Loss:0.6198
Epoch [2/5], step[500/600], Loss:0.6450
Epoch [2/5], step[600/600], Loss:0.6484
Epoch [3/5], step[100/600], Loss:0.6300
Epoch [3/5], step[200/600], Loss:0.5855
Epoch [3/5], step[300/600], Loss:0.5340
Epoch [3/5], step[400/600], Loss:0.6380
Epoch [3/5], step[500/600], Loss:0.5803
Epoch [3/5], step[600/600], Loss:0.6930
Epoch [4/5], step[100/600], Loss:0.5791
Epoch [4/5], step[200/600], Loss:0.5222
Epoch [4/5], step[300/600], Loss:0.4864
Epoch [4/5], step[400/600], Loss:0.5254
Epoch [4/5], step[500/600], Loss:0.6125
Epoch [4/5], step[600/600], Loss:0.6559
Epoch [5/5], step[100/600], Loss:0.6015
