# 5.11 ResNet残差网络

关于ResNet：

+ 实际中，添加过多的层后，训练误差反而会增加，

+ 使用批量归一化，能使深层网络的中间数值变得稳定，但不能完全解决该问题，

+ 2015年，何恺明提出的残差网络ResNet，较好的解决了深度网络的训练误差上升的问题。

+ ResNet一经提出（2015年），就夺得了ImageNet图像识别挑战赛冠军。

In [1]:
import time
import torch 
from torch import nn,optim
import torch.nn.functional as F 

import sys
sys.path.append('..')
import d2l_pytorch as d2l 

device=torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [2]:
# 定义残差块
# 残差块,它可以设定输出通道数、
# 是否使用额外的1×1卷积层来修改通道数
# 以及卷积层的步幅

class residual(nn.Module):
    def __init__(self,in_channels,out_channels,use_1conv=False,stride=1):
        super(residual,self).__init__()
        self.conv1=nn.Conv2d(in_channels,out_channels,kernel_size=3,stride=stride,padding=1)
        self.conv2=nn.Conv2d(out_channels,out_channels,kernel_size=3,padding=1)
        if use_1conv:
            self.conv3=nn.Conv2d(in_channels,out_channels,kernel_size=1,stride=stride)
        else:
            self.conv3=None
        self.bn1=nn.BatchNorm2d(out_channels)
        self.bn2=nn.BatchNorm2d(out_channels)
    
    def forward(self,x):
        y=F.relu(self.bn1(self.conv1(x)))
        y=self.bn2(self.conv2(y))
        if self.conv3:
            x=self.conv3(x)
        return F.relu(y+x)

In [3]:
# test
blk = residual(3, 3)
X = torch.rand((4, 3, 6, 6))
blk(X).shape # torch.Size([4, 3, 6, 6])


torch.Size([4, 3, 6, 6])

In [4]:
blk = residual(3, 6, use_1conv=True, stride=2)
blk(X).shape # torch.Size([4, 6, 3, 3])


torch.Size([4, 6, 3, 3])

ResNet模型


In [5]:
net=nn.Sequential(
    nn.Conv2d(1,64,kernel_size=7,stride=2,padding=3),
    nn.BatchNorm2d(64),
    nn.ReLU(),
    nn.MaxPool2d(kernel_size=3,stride=2,padding=1)
)

In [6]:
# 后面接4个残差块组成的模块

# 第一个模块的通道数与输入的通道数相同
def resnet_blk(in_channels,out_channels,num_residual,first_blk=False):
    if first_blk:
        assert in_channels==out_channels
    blk=[]
    for i in range(num_residual):
        if i==0 and not first_blk:
            blk.append(residual(in_channels,out_channels,use_1conv=True,stride=2))
        else:
            blk.append(residual(out_channels,out_channels))
    return nn.Sequential(*blk) 

In [7]:
# 每个模块，使用2个残差块

net.add_module('resnet_block1',resnet_blk(64,64,2,first_blk=True))
net.add_module('resnet_block2',resnet_blk(64,128,2))
net.add_module('resnet_block3',resnet_blk(128,256,2))
net.add_module('resnet_block4',resnet_blk(256,512,2))

In [8]:
# 加入全局平均池化层，全连接层输出
net.add_module('global_avg_pool',d2l.GlobalAvgPool2d())
net.add_module('fc',nn.Sequential(d2l.FlattenLayer(),nn.Linear(512,10)))

In [9]:
# test
X = torch.rand((1, 1, 224, 224))
for name, layer in net.named_children():
    X = layer(X)
    print(name, ' output shape:\t', X.shape)


0  output shape:	 torch.Size([1, 64, 112, 112])
1  output shape:	 torch.Size([1, 64, 112, 112])
2  output shape:	 torch.Size([1, 64, 112, 112])
3  output shape:	 torch.Size([1, 64, 56, 56])
resnet_block1  output shape:	 torch.Size([1, 64, 56, 56])
resnet_block2  output shape:	 torch.Size([1, 128, 28, 28])
resnet_block3  output shape:	 torch.Size([1, 256, 14, 14])
resnet_block4  output shape:	 torch.Size([1, 512, 7, 7])
global_avg_pool  output shape:	 torch.Size([1, 512, 1, 1])
fc  output shape:	 torch.Size([1, 10])


In [10]:
# 获取数据，训练模型

batch_size=512

train_iter,test_iter=d2l.load_data_fashion_mnist_ch05(batch_size,resize=96)


In [11]:
lr,num_epochs=0.001,1  # 5
optimizer=torch.optim.Adam(net.parameters(),lr=lr)

In [12]:
# 训练
d2l.train_ch05(net,train_iter,test_iter,batch_size,optimizer,device,num_epochs)

training on  cpu
epoch 0/1, iter 0/117, loss 2.353
epoch 0/1, iter 1/117, loss 1.957
epoch 0/1, iter 2/117, loss 2.378
epoch 0/1, iter 3/117, loss 1.593
epoch 0/1, iter 4/117, loss 1.145
epoch 0/1, iter 5/117, loss 1.138


残差块通过跨层的数据通道从而能够训练出有效的深度神经网络.