In [7]:
import torch
import torch.nn.functional as F
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
import argparse

class ResidualBlock(nn.Module):
    def __init__(self, inchannel, outchannel, stride=1):
        super(ResidualBlock, self).__init__()
        self.left = nn.Sequential(
            nn.Conv2d(inchannel, outchannel, kernel_size=3, stride=stride, padding=1, bias=False),
            nn.BatchNorm2d(outchannel),
            nn.ReLU(inplace=True),
            nn.Conv2d(outchannel, outchannel, kernel_size=3, stride=1, padding=1, bias=False),
            nn.BatchNorm2d(outchannel)
        )
        self.shortcut = nn.Sequential()
        if stride != 1 or inchannel != outchannel:
            self.shortcut = nn.Sequential(
                nn.Conv2d(inchannel, outchannel, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(outchannel)
            )

    def forward(self, x):
        out = self.left(x)
        out += self.shortcut(x)
        out = F.relu(out)
        return out

class ResNet(nn.Module):
    def __init__(self, ResidualBlock, num_classes=10):
        super(ResNet, self).__init__()
        self.inchannel = 64
        self.conv1 = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False),
            nn.BatchNorm2d(64),
            nn.ReLU(),
        )
        self.layer1 = self.make_layer(ResidualBlock, 64,  2, stride=1)
        self.layer2 = self.make_layer(ResidualBlock, 128, 2, stride=2)
        self.layer3 = self.make_layer(ResidualBlock, 256, 2, stride=2)
        self.layer4 = self.make_layer(ResidualBlock, 512, 2, stride=2)
        self.fc = nn.Linear(512, num_classes)

    def make_layer(self, block, channels, num_blocks, stride):
        strides = [stride] + [1] * (num_blocks - 1)   #strides=[1,1]
        layers = []
        for stride in strides:
            layers.append(block(self.inchannel, channels, stride))
            self.inchannel = channels
        return nn.Sequential(*layers)

    def forward(self, x):
        out = self.conv1(x)
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        out = F.avg_pool2d(out, 4)
        out = out.view(out.size(0), -1)
        out = self.fc(out)
        return out


def ResNet18():

    return ResNet(ResidualBlock)

In [11]:
import matplotlib.pyplot as plt
%matplotlib inline
from google.colab import drive
drive.mount('/content/drive')
import os
os.chdir("./drive/My Drive/workspaces")

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [12]:
# 超参数设置
EPOCH = 200   #遍历数据集次数
pre_epoch = 0  # 定义已经遍历数据集的次数
BATCH_SIZE = 128      #批处理尺寸(batch_size)
LR = 0.1        #学习率
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
transform_train = transforms.Compose([
    transforms.RandomCrop(32, padding=4),  #先四周填充0，在吧图像随机裁剪成32*32
    transforms.RandomHorizontalFlip(),  #图像一半的概率翻转，一半的概率不翻转
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)), #R,G,B每层的归一化用到的均值和方差
])

transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])

trainset = torchvision.datasets.CIFAR10(root='nas/cifar10', train=True, download=False, transform=transform_train) #训练数据集
trainloader = torch.utils.data.DataLoader(trainset, batch_size=BATCH_SIZE, shuffle=True, num_workers=2)   #生成一个个batch进行批训练，组成batch的时候顺序打乱取

testset = torchvision.datasets.CIFAR10(root='nas/cifar10', train=False, download=False, transform=transform_test)
testloader = torch.utils.data.DataLoader(testset, batch_size=100, shuffle=False, num_workers=2)
# Cifar-10的标签

net = ResNet18().to(device)
criterion = nn.CrossEntropyLoss()  #损失函数为交叉熵，多用于多分类问题
optimizer = optim.SGD(net.parameters(), lr=LR, momentum=0.9, weight_decay=5e-4) #优化方式为mini-batch momentum-SGD，并采用L2正则化（权重衰减）
net.to(device)

ResNet(
  (conv1): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
  )
  (layer1): Sequential(
    (0): ResidualBlock(
      (left): Sequential(
        (0): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (2): ReLU(inplace=True)
        (3): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (4): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
      (shortcut): Sequential()
    )
    (1): ResidualBlock(
      (left): Sequential(
        (0): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (2): ReLU(inp

In [None]:
# 训练
for epoch in range(EPOCH):
    for i, data in enumerate(trainloader):
        inputs, labels = data
        inputs, labels = inputs.to(device), labels.to(device)
        # 前向传播
        outputs = net(inputs)
        # 计算损失函数
        loss = criterion(outputs, labels)
        # 清空上一轮梯度
        optimizer.zero_grad()
        # 反向传播
        loss.backward()
        # 参数更新
        optimizer.step()
 
    print('epoch{} loss:{:.4f}'.format(epoch+1, loss.item()))
    net.eval()                                   #测试模式
    with torch.no_grad():             
      total_correct = 0                           #预测正确的个数
      total_num = 0
      for i, data in enumerate(testloader): 
        inputs, labels = data
        inputs, labels = inputs.to(device), labels.to(device)
        out = net(inputs)
        _, predicted = torch.max(out.data, 1)
        total_num += labels.size(0)
        total_correct += (predicted == labels).sum().item()                            
      print('10000测试图像 准确率:{:.4f}%'.format(100 * total_correct / total_num)) 

print("Finished Traning")

epoch1 loss:1.2888
10000测试图像 准确率:54.1600%
epoch2 loss:1.5931
10000测试图像 准确率:40.2000%
epoch3 loss:1.4090
10000测试图像 准确率:46.3100%
epoch4 loss:1.4452
10000测试图像 准确率:53.8800%
epoch5 loss:1.3756
10000测试图像 准确率:57.8800%
epoch6 loss:1.0981
10000测试图像 准确率:62.2100%
epoch7 loss:1.2492
10000测试图像 准确率:63.1000%
epoch8 loss:1.1433
10000测试图像 准确率:64.2500%
epoch9 loss:0.7596
10000测试图像 准确率:66.5900%
epoch10 loss:1.0436
10000测试图像 准确率:66.5700%
epoch11 loss:1.1541
10000测试图像 准确率:67.1200%
epoch12 loss:0.9339
10000测试图像 准确率:68.5800%
epoch13 loss:0.7724
10000测试图像 准确率:69.2900%
epoch14 loss:0.8345
10000测试图像 准确率:69.6000%
epoch15 loss:0.7234
10000测试图像 准确率:72.2900%
epoch16 loss:0.6810
10000测试图像 准确率:69.9500%
epoch17 loss:0.6350
10000测试图像 准确率:73.1300%
epoch18 loss:0.6631
10000测试图像 准确率:74.8300%
epoch19 loss:0.5984
10000测试图像 准确率:72.3800%
epoch20 loss:0.6952
10000测试图像 准确率:75.3200%
epoch21 loss:0.6838
10000测试图像 准确率:76.1000%
epoch22 loss:0.7351
10000测试图像 准确率:76.2100%
epoch23 loss:0.6955
10000测试图像 准确率:75.5100%
epoch24 loss:0.5571


参照网上的写法说是可以达到90%以上，但实际并没有实现
个人理解: 在原始的resnet上，做了些优化：
1.原始的7*7的卷积核改为3*3，去掉了最大池化部分，这两部分的修改猜测是因为输入原始图片32*32过小，所以去掉了一些特征提取和降维的处理
2.另外对原始数据做了一些预处理，进行随机翻转，正则化，都有助于提升准备率.
3.调整LR，EPOCH，BATCH_SIZE等
4.问题：训练太慢了，有啥好办法，而且容易断线