In [1]:
from torch import nn
import torch 
from d2l import torch as d2l

In [2]:
def conv_block( input_channels , out_channels ):
    return nn.Sequential( 
            nn.BatchNorm2d( input_channels ), nn.ReLU(),#这里使用的是归一化-激活-卷积的方式
            nn.Conv2d(  input_channels , out_channels , kernel_size= 3 , padding=1 )#就是返回的特征图是原图大小
    )

In [5]:
class DenseBlock( nn.Module ):
    def __init__( self , num_convs , input_channels , num_channels  ):
        super().__init__()
        layers= []
        for i in range( num_convs ):
            layers.append( conv_block(
                 input_channels +num_channels* i , num_channels
                 )#这里添加将输入通道与输出通道进行相加，比如说两个block，输入通道输出通道为3,10，
                 #第一个block输出通道为13，那么为了保证第二个block的输入通道数正确，这里必须得加上1*num_channels
                 #那么第二个block输入通道为13，输出通道为23.同理向后继续叠加。
                 )
        self.net = nn.Sequential( *layers )
    def forward( self , x ):
        for blk in self.net:
            y = blk( x )
            x = torch.cat(( x , y ) , dim= 1 )#在通道数上进行叠加
        return x 

In [6]:
blk = DenseBlock(2, 3, 10)
X = torch.randn(4, 3, 8, 8)
Y = blk(X)
Y.shape


torch.Size([4, 23, 8, 8])

## 增加一个过渡层，对特征图长度进行减半，也就是1*1的卷积，然后添加一个Maxpool或者Avgpool

In [19]:
def transition_block( input_channels , output_channels ):
    return nn.Sequential( 
            nn.BatchNorm2d( input_channels ), nn.ReLU() , 
            nn.Conv2d(  input_channels , output_channels ,  kernel_size=1 ),
            nn.AvgPool2d( kernel_size=2 , stride=2 ),
    )

In [16]:
blk = transition_block( 23 , 23 )
blk(Y).shape

torch.Size([4, 23, 4, 4])

## densenet采用与resnet18一样的结构，采用4 个稠密块进行搭建.每个稠密块与残差快保持一致，都含有四个卷积层

In [17]:
b1 = nn.Sequential(
    nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3),
    nn.BatchNorm2d(64), nn.ReLU(),
    nn.MaxPool2d(kernel_size=3, stride=2, padding=1))


In [20]:
# `num_channels`为当前的通道数
num_channels, growth_rate = 64, 32
num_convs_in_dense_blocks = [4, 4, 4, 4]
blks = []
for i, num_convs in enumerate(num_convs_in_dense_blocks):
    blks.append(DenseBlock(num_convs, num_channels, growth_rate))
    # 上一个稠密块的输出通道数
    num_channels += num_convs * growth_rate
    # 在稠密块之间添加一个转换层，使通道数量减半
    if i != len(num_convs_in_dense_blocks) - 1:
        blks.append(transition_block(num_channels, num_channels // 2))
        num_channels = num_channels // 2


In [21]:
net = nn.Sequential(
    b1, *blks,
    nn.BatchNorm2d(num_channels), nn.ReLU(),
    nn.AdaptiveMaxPool2d((1, 1)),
    nn.Flatten(),
    nn.Linear(num_channels, 10))


In [22]:
lr, num_epochs, batch_size = 0.1, 10, 256
train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size, resize=96)
d2l.train_ch6(net, train_iter, test_iter, num_epochs, lr, d2l.try_gpu())#非常的莫名其妙，在第二个epoch跑飞，在第三个epoch又收敛


training on  cuda:0
num_batch:78 , avg_loss:2.61 , avg_accuracy:0.19
num_batch:156 , avg_loss:1.90 , avg_accuracy:0.35
num_batch:234 , avg_loss:1.52 , avg_accuracy:0.48
num_batch:235 , avg_loss:1.52 , avg_accuracy:0.48
epochs:0 ,test_acc:0.69 
num_batch:78 , avg_loss:3.05 , avg_accuracy:0.11
num_batch:156 , avg_loss:2.64 , avg_accuracy:0.12
num_batch:234 , avg_loss:2.35 , avg_accuracy:0.16
num_batch:235 , avg_loss:2.35 , avg_accuracy:0.16
epochs:1 ,test_acc:0.30 
num_batch:78 , avg_loss:1.55 , avg_accuracy:0.28


KeyboardInterrupt: 