In [10]:
import torch 
from torch import nn
from d2l import torch as d2l
from torch.nn import functional as F

In [11]:
class Residual( nn.Module ):
    def __init__ ( self  , input_channels , num_channels , kernel_size = 3 , padding  =1 , 
                    use1x1cov = False,inputs_strides =1   ):
        super().__init__()
        self.cov1 =  nn.Conv2d( input_channels , num_channels , kernel_size=kernel_size , 
                            stride=inputs_strides , padding=padding , )#更改输出的特征长度，相当于将原来的特征图除以2
        self.cov2 = nn.Conv2d( num_channels , num_channels , kernel_size= kernel_size , 
                            stride=1 , padding= padding )
        if use1x1cov :
            self.cov3 = nn.Conv2d( input_channels , num_channels ,kernel_size=1,stride=inputs_strides )
            #如果更改了特征图的长度，那么就需要对输入x也更改特征图的长度，这里使用1*1的卷积核进行更改
        else:
            self.cov3 = None
        self.bn1 = nn.BatchNorm2d( num_channels )
        self.bn2 = nn.BatchNorm2d( num_channels )
    
    def forward( self , X ):
        Y = F.relu( self.bn1( self.cov1( X )  ))
        Y = self.bn2( self.cov2( Y ) )
        if self.cov3:
            X = self.cov3( X )
        Y += X 
        return F.relu( Y )

In [12]:
blk = Residual( 3,3 , use1x1cov=True  , inputs_strides= 2 )
x = torch.rand( 4 ,3 , 6,6)
x = blk( x )
x.shape#更改长度

torch.Size([4, 3, 3, 3])

In [13]:
blk = Residual( 3,6 ,use1x1cov=True )
x = torch.rand( 4 ,3 , 6,6)
x = blk( x )
x.shape#更改通道数

torch.Size([4, 6, 6, 6])

In [14]:
def resnet_block( input_channels , num_channels , num_residuals , first_block = False ):
    #我们将这个Residual块添加到
    blk = []
    for i in range( num_residuals ):
        if i == 0 and not first_block:
            #如果是非第一个模块的第一个residucl块的话，就将改变通道数并且减半特征图大小的残差模块添加进去,conv_block
            blk.append( Residual( input_channels , num_channels  , use1x1cov=True , inputs_strides=2 ) )
        else:#如果是第一个模块的第一个residual块，就添加到block当中，这里是承接maxpooling2d的第一个残差块
            #如果不是非第一个模块的非第一个residucl块的话，就将当前不增加通道数的residucl块添加进去。identity_block
            blk.append( Residual( num_channels , num_channels ) )
    return blk

In [15]:
b1 = nn.Sequential( nn.Conv2d( 1, 64 , kernel_size=7 , stride=2 , padding=3 ) , nn.BatchNorm2d( 64 ), nn.ReLU(),
                        nn.MaxPool2d( kernel_size=3 , stride= 2 , padding=1 ) )

In [16]:
b2 = nn.Sequential( *resnet_block( 64 , 64 , 2 , first_block=True ) )#每个模块包含两个残差块并且因为在maxplooing的时候我们实现了减半的操作，
                                                                    #所以这里的第一个残差块就不需要减半了,也就是说直接执行else语句添加的是
                                                                    #两个相同的输入输出通道
b3 = nn.Sequential( *resnet_block( 64 , 128 , 2 ) )
b4 = nn.Sequential( *resnet_block( 128 , 256 , 2) )
b5 = nn.Sequential( *resnet_block( 256 ,512 , 2 ))

In [17]:
net = nn.Sequential(b1, b2, b3,b4,b5,
                    nn.AdaptiveAvgPool2d((1,1)),
                    nn.Flatten(), nn.Linear(512, 10)  )


In [18]:
x = torch.rand( 1,  1 ,224 ,224 )
for layer in net:
    x = layer( x )
    print( layer.__class__.__name__,'output_size:',x.shape)

Sequential output_size: torch.Size([1, 64, 56, 56])
Sequential output_size: torch.Size([1, 64, 56, 56])
Sequential output_size: torch.Size([1, 128, 28, 28])
Sequential output_size: torch.Size([1, 256, 14, 14])
Sequential output_size: torch.Size([1, 512, 7, 7])
AdaptiveAvgPool2d output_size: torch.Size([1, 512, 1, 1])
Flatten output_size: torch.Size([1, 512])
Linear output_size: torch.Size([1, 10])


In [19]:
lr , num_epochs , batch_size = 0.001 , 20 , 128
train_iter , test_iter = d2l.load_data_fashion_mnist( batch_size=batch_size , resize = 224 )
d2l.train_no_image( net , train_iter , test_iter , num_epochs , lr , d2l.try_gpu( i=3 ) , num_print=1 )

training on cuda:3
loss 0.449, train acc 0.839 
test_acc is 0.880
loss 0.256, train acc 0.907 
test_acc is 0.847
loss 0.211, train acc 0.923 
test_acc is 0.903
loss 0.184, train acc 0.932 
test_acc is 0.914
loss 0.157, train acc 0.942 
test_acc is 0.893
loss 0.137, train acc 0.949 
test_acc is 0.894
loss 0.115, train acc 0.958 
test_acc is 0.923
loss 0.095, train acc 0.965 
test_acc is 0.928
loss 0.075, train acc 0.973 
test_acc is 0.921
loss 0.054, train acc 0.981 
test_acc is 0.917
loss 0.048, train acc 0.983 
test_acc is 0.928
loss 0.034, train acc 0.988 
test_acc is 0.916
loss 0.036, train acc 0.987 
test_acc is 0.926
loss 0.023, train acc 0.992 
test_acc is 0.926


KeyboardInterrupt: 