#### 稠密块（Dense Block）

DenseNet的卷积块使用ResNet改进版本的`BN -> Relu -> Conv`。每个卷积的输出通道数据被称之为`growth_rate`，这是因为假设输出为`in_channels`，而且有`layers`层，那么输出的通道数就是`in_channels + growth_rate * layers`。

In [1]:
from mxnet import nd
from mxnet.gluon import nn

def conv_block(channels):
    out = nn.Sequential()
    out.add(
        nn.BatchNorm(),
        nn.Activation('relu'),
        nn.Conv2D(channels, kernel_size=3, padding=1)
    )
    return out

class DenseBlock(nn.Block):
    def __init__(self, layers, growth_rate, **kwargs):
        super(DenseBlock, self).__init__(**kwargs)
        self.net = nn.Sequential()
        for i in range(layers):
            self.net.add(conv_block(growth_rate))
            
    def forward(self, x):
        for layer in self.net:
            out = layer(x)
            x = nd.concat(x, out, dim=1)
        return x

RuntimeError: module compiled against API version 0xc but this version of numpy is 0xb

RuntimeError: module compiled against API version 0xc but this version of numpy is 0xb

In [5]:
dblk = DenseBlock(2, 10)
dblk.initialize()

x = nd.random.uniform(shape=(4, 3, 8, 8))
dblk(x).shape # 2 * 10 + 3 = 23

(4, 23, 8, 8)

#### 过渡块
因为使用拼接的缘故，每经过一次拼接输出通道数可能会激增。为了控制模型复杂度，这里引入一个过渡块，它不仅把输入的长宽减半，同时也使用$1 \times 1$卷积来改变通道数

In [3]:
def transition_block(channels):
    out = nn.Sequential()
    out.add(
        nn.BatchNorm(),
        nn.Activation('relu'),
        nn.Conv2D(channels, kernel_size=1),
        nn.AvgPool2D(pool_size=2, strides=2)
    )
    return out

In [4]:
tblk = transition_block(10)
tblk.initialize()

tblk(x).shape

(4, 10, 4, 4)

#### DenseNet

In [10]:
init_channels = 64
growth_rate = 32
block_layers = [6, 12, 24, 16]
num_classes = 10

def dense_net():
    net = nn.Sequential()
    with net.name_scope():
        net.add(
            nn.Conv2D(init_channels, kernel_size=7, strides=2, padding=3),
            nn.BatchNorm(),
            nn.Activation('relu'),
            nn.MaxPool2D(pool_size=3, strides=2, padding=1)
        )
        channels = init_channels
        for i, layers in enumerate(block_layers):
            net.add(DenseBlock(layers, growth_rate))
            channels += layers * growth_rate
            if i != len(block_layers) - 1:
                net.add(transition_block(channels//2))
        
        net.add(
            nn.BatchNorm(),
            nn.Activation('relu'),
            nn.AvgPool2D(pool_size=1),
            nn.Flatten(),
            nn.Dense(num_classes)
        )
    return net

In [13]:
#因为网络较深，将输入减少到32*32来训练
import sys
sys.path.append('..')
import utils
from mxnet import gluon, init

train_data, test_data = utils.load_data_fashion_mnist(batch_size=64, resize=32)

ctx = utils.try_gpu()
net = dense_net()
net.initialize(ctx=ctx, init=init.Xavier())

loss = gluon.loss.SoftmaxCrossEntropyLoss()
trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': 0.1})
utils.train(train_data, test_data, net, loss, trainer, ctx, num_epochs=1)

Start training on  cpu(0)


KeyboardInterrupt: 