### Pytorch 第八期 打卡 01
#### 参考 <a href="https://zhuanlan.zhihu.com/p/39667289">zhihu<a/>


In [7]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms
# cifar-10官方提供的数据集是用numpy array存储的
# 下面这个transform会把numpy array变成torch tensor，然后把rgb值归一到[0, 1]这个区间
transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

# 在构建数据集的时候指定transform，就会应用我们定义好的transform
# root是存储数据的文件夹，download=True指定如果数据不存在先下载数据
cifar_train = torchvision.datasets.CIFAR10(root='./data', train=True,
                                           download=True, transform=transform)
cifar_test = torchvision.datasets.CIFAR10(root='./data', train=False,
                                          transform=transform)

Files already downloaded and verified


In [8]:
trainloader = torch.utils.data.DataLoader(cifar_train, batch_size=32, shuffle=True)
testloader = torch.utils.data.DataLoader(cifar_test, batch_size=32, shuffle=True)

In [9]:
class LeNet(nn.Module):
    # 一般在__init__中定义网络需要的操作算子，比如卷积、全连接算子等等
    def __init__(self):
        super(LeNet, self).__init__()
        # Conv2d的第一个参数是输入的channel数量，第二个是输出的channel数量，第三个是kernel size
        self.conv1 = nn.Conv2d(3, 6, 5)
        self.conv2 = nn.Conv2d(6, 16, 5)
        # 由于上一层有16个channel输出，每个feature map大小为5*5，所以全连接层的输入是16*5*5
        self.fc1 = nn.Linear(16*5*5, 120)
        self.fc2 = nn.Linear(120, 84)
        # 最终有10类，所以最后一个全连接层输出数量是10
        self.fc3 = nn.Linear(84, 10)
        self.pool = nn.MaxPool2d(2, 2)
    # forward这个函数定义了前向传播的运算，只需要像写普通的python算数运算那样就可以了
    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = self.pool(x)
        x = F.relu(self.conv2(x))
        x = self.pool(x)
        # 下面这步把二维特征图变为一维，这样全连接层才能处理
        x = x.view(-1, 16*5*5)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

In [11]:
# optim中定义了各种各样的优化方法，包括SGD
import torch.optim as optim

# 如果你没有GPU，那么可以忽略device相关的代码
device = torch.device("cpu")
net = LeNet().to(device)

# CrossEntropyLoss就是我们需要的损失函数
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)

In [None]:
print("Start Training...")
for epoch in range(5):
    # 我们用一个变量来记录每100个batch的平均loss
    loss100 = 0.0
    # 我们的dataloader派上了用场
    for i, data in enumerate(trainloader):
        inputs, labels = data
        inputs, labels = inputs.to(device), labels.to(device) # 注意需要复制到GPU
        optimizer.zero_grad()
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        loss100 += loss.item()
        if i % 100 == 99:
            print('[Epoch %d, Batch %5d] loss: %.3f' %
                  (epoch + 1, i + 1, loss100 / 100))
            loss100 = 0.0

print("Done Training!")

Start Training...
[Epoch 1, Batch   100] loss: 1.558
[Epoch 1, Batch   200] loss: 1.551
[Epoch 1, Batch   300] loss: 1.511
[Epoch 1, Batch   400] loss: 1.552
[Epoch 1, Batch   500] loss: 1.508
[Epoch 1, Batch   600] loss: 1.494
[Epoch 1, Batch   700] loss: 1.523
[Epoch 1, Batch   800] loss: 1.483
[Epoch 1, Batch   900] loss: 1.521
[Epoch 1, Batch  1000] loss: 1.484
[Epoch 1, Batch  1100] loss: 1.467
[Epoch 1, Batch  1200] loss: 1.490
[Epoch 1, Batch  1300] loss: 1.469
[Epoch 1, Batch  1400] loss: 1.459
[Epoch 1, Batch  1500] loss: 1.471
[Epoch 2, Batch   100] loss: 1.434
[Epoch 2, Batch   200] loss: 1.423
[Epoch 2, Batch   300] loss: 1.442
[Epoch 2, Batch   400] loss: 1.447
[Epoch 2, Batch   500] loss: 1.432
[Epoch 2, Batch   600] loss: 1.402
[Epoch 2, Batch   700] loss: 1.396
[Epoch 2, Batch   800] loss: 1.424
[Epoch 2, Batch   900] loss: 1.422
[Epoch 2, Batch  1000] loss: 1.379
[Epoch 2, Batch  1100] loss: 1.383
[Epoch 2, Batch  1200] loss: 1.415
[Epoch 2, Batch  1300] loss: 1.367
[E

In [1]:
# 以上部分jieshi
# 首先要把梯度清零，不然PyTorch每次计算梯度会累加，不清零的话第二次算的梯度等于第一次加第二次的       
# optimizer.zero_grad()
# 计算前向传播的输出
# outputs = net(inputs)
# 根据输出计算loss
# loss = criterion(outputs, labels)
# 算完loss之后进行反向梯度传播，这个过程之后梯度会记录在变量中
# loss.backward()
# 用计算的梯度去做优化
# optimizer.step()