# DenseNet
因为 ResNet 提出了跨层链接的思想，这直接影响了随后出现的卷积网络架构，其中最有名的就是 cvpr 2017 的 best paper，DenseNet。

DenseNet 和 ResNet 不同在于 ResNet 是跨层求和，而 DenseNet 是跨层将特征在通道维度进行拼接

因为是在通道维度进行特征的拼接，所以底层的输出会保留进入所有后面的层，这能够更好的保证梯度的传播，同时能够使用低维的特征和高维的特征进行联合训练，能够得到更好的结果。

DenseNet 主要由 dense block 构成，下面我们来实现一个 densen block

In [1]:
from __future__ import print_function
from __future__ import division
from __future__ import absolute_import

import tensorflow as tf
from utils import cifar10_input

  from ._conv import register_converters as _register_converters


In [3]:
batch_size = 64

train_imgs, train_labels, val_imgs, val_labels = cifar10_input.load_data(image_size=96)

In [4]:
import tensorflow.contrib.slim as slim

### 构建基本卷积单元

In [5]:
def bn_relu_conv(x, out_depth, scope='dense_basic_conv', reuse=None):
    # 基本卷积单元是: bn->relu-conv
    with tf.variable_scope(scope, reuse=reuse):
        net = slim.batch_norm(x, activation_fn=None, scope='bn')
        net = tf.nn.relu(net, name='activation')
        net = slim.conv2d(net, out_depth, 3, activation_fn=None, normalizer_fn=None, biases_initializer=None, scope='conv')
        
        return net

### 构建`densenet`的基本单元

In [6]:
def dense_block(x, growth_rate, num_layers, scope='dense_block', reuse=None):
    in_depth = x.get_shape().as_list()[-1]
    
    with tf.variable_scope(scope, reuse=reuse):
        net = x
        for i in range(num_layers):
            out = bn_relu_conv(net, growth_rate, scope='block%d' % i)
            # 将前面所有的输出连接到一起作为下一个基本卷积单元的输入
            net = tf.concat([net, out], axis=-1)
            
        return net

### 构建`transition`层

In [7]:
def transition(x, out_depth, scope='transition', reuse=None):
    in_depth = x.get_shape().as_list()[-1]
    
    with tf.variable_scope(scope, reuse=reuse):
        net = slim.batch_norm(x, activation_fn=None, scope='bn')
        net = tf.nn.relu(net, name='activation')
        net = slim.conv2d(net, out_depth, 1, activation_fn=None, normalizer_fn=None, biases_initializer=None, scope='conv')
        net = slim.avg_pool2d(net, 2, 2, scope='avg_pool')
        
        return net

### 构建`densenet`整体结构

In [8]:
def densenet(x, num_classes, growth_rate=32, block_layers=[6, 12, 24, 16], is_training=None, scope='densenet', reuse=None, verbose=False):
    with tf.variable_scope(scope, reuse=reuse):
        with slim.arg_scope([slim.batch_norm], is_training=is_training):
            
            if verbose:
                print('input: {}'.format(x.shape))
            
            with tf.variable_scope('block0'):
                net = slim.conv2d(x, 64, [7, 7], stride=2, normalizer_fn=None, activation_fn=None, scope='conv_7x7')
                net = slim.batch_norm(net, activation_fn=None, scope='bn')
                net = tf.nn.relu(net, name='activation')
                net = slim.max_pool2d(net, [3, 3], stride=2, scope='max_pool')

                if verbose:
                    print('block0: {}'.format(net.shape))

            # 循环创建`dense_block`和`transition`
            for i, num_layers in enumerate(block_layers):
                with tf.variable_scope('block%d' % (i + 1)):
                    net = dense_block(net, growth_rate, num_layers)
                    if i != len(block_layers) - 1:
                        current_depth = net.get_shape().as_list()[-1]
                        net = transition(net, current_depth // 2)

                if verbose:
                    print('block{}: {}'.format(i+1, net.shape))

            with tf.variable_scope('block%d' % (len(block_layers) + 1)):
                net = slim.batch_norm(net, activation_fn=None, scope='bn')
                net = tf.nn.relu(net, name='activation')
                net = tf.reduce_mean(net, [1, 2], name='global_pool', keep_dims=True)

                if verbose:
                    print('block{}: {}'.format(len(block_layers) + 1, net.shape))

            with tf.variable_scope('classification'):
                net = slim.flatten(net, scope='flatten')
                net = slim.fully_connected(net, num_classes, activation_fn=None, normalizer_fn=None, scope='logit')

                if verbose:
                    print('classification: {}'.format(net.shape))

                return net

In [9]:
with slim.arg_scope([slim.conv2d], activation_fn=tf.nn.relu, normalizer_fn=slim.batch_norm) as sc:
    conv_scope = sc

In [10]:
is_training = tf.placeholder(tf.bool, name='is_training')

with slim.arg_scope(conv_scope):
    train_out = densenet(train_imgs, 10, is_training=is_training, verbose=True)
    val_out = densenet(val_imgs, 10, is_training=is_training, reuse=True)

input: (64, 96, 96, 3)
block0: (64, 23, 23, 64)
block1: (64, 11, 11, 128)
block2: (64, 5, 5, 256)
block3: (64, 2, 2, 512)
block4: (64, 2, 2, 1024)
Instructions for updating:
keep_dims is deprecated, use keepdims instead
block5: (64, 1, 1, 1024)
classification: (64, 10)


### 构建训练

In [11]:
with tf.variable_scope('loss'):
    train_loss = tf.losses.sparse_softmax_cross_entropy(labels=train_labels, logits=train_out, scope='train')
    val_loss = tf.losses.sparse_softmax_cross_entropy(labels=val_labels, logits=val_out, scope='val')

In [12]:
with tf.name_scope('accuracy'):
    with tf.name_scope('train'):
        train_acc = tf.reduce_mean(tf.cast(tf.equal(tf.argmax(train_out, axis=-1, output_type=tf.int32), train_labels), tf.float32))
    with tf.name_scope('val'):
        val_acc = tf.reduce_mean(tf.cast(tf.equal(tf.argmax(val_out, axis=-1, output_type=tf.int32), val_labels), tf.float32))

In [13]:
lr = 0.01

opt = tf.train.MomentumOptimizer(lr, momentum=0.9)

In [14]:
update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
with tf.control_dependencies(update_ops):
    train_op = opt.minimize(train_loss)

### 开始训练

In [15]:
from utils.learning import train_with_bn

In [16]:
sess = tf.Session()
sess.run(tf.global_variables_initializer())

train_with_bn(sess, train_op, train_loss, train_acc, val_loss, val_acc, 20000, is_training)

sess.close()

[train]: step 0 loss = 2.3076 acc = 0.1094 (0.0392 / batch)
[val]: step 0 loss = 2.3089 acc = 0.1719
[train]: step 1000 loss = 1.0021 acc = 0.6875 (0.1351 / batch)
[train]: step 2000 loss = 0.6037 acc = 0.7656 (0.1330 / batch)
[train]: step 3000 loss = 0.4614 acc = 0.8125 (0.1330 / batch)
[train]: step 4000 loss = 0.3103 acc = 0.9219 (0.1330 / batch)
[val]: step 4000 loss = 0.8321 acc = 0.7500
[train]: step 5000 loss = 0.4124 acc = 0.8594 (0.1335 / batch)
[train]: step 6000 loss = 0.4418 acc = 0.8906 (0.1329 / batch)
[train]: step 7000 loss = 0.2239 acc = 0.9531 (0.1327 / batch)
[train]: step 8000 loss = 0.0356 acc = 0.9844 (0.1331 / batch)
[val]: step 8000 loss = 0.7691 acc = 0.8125
[train]: step 9000 loss = 0.0643 acc = 0.9688 (0.1331 / batch)
[train]: step 10000 loss = 0.3805 acc = 0.8750 (0.1331 / batch)
[train]: step 11000 loss = 0.1420 acc = 0.9688 (0.1330 / batch)
[train]: step 12000 loss = 0.1092 acc = 0.9688 (0.1331 / batch)
[val]: step 12000 loss = 1.2618 acc = 0.6875
[train]