# 计算输出特征图

**假设：**

> - 正方形的 方形的 输入 特征图 特征图 的大小为 的大小为 I * I 
> - 正方形的 卷积核的大小为 f * f
> - 步长 （stride）为 s
> - 填充 （padding）的行数或者列为 p

**问：**正方形的输出特征图的大小 （O * O）为多少？请列出相应的数学公式？

**答：**

- 在 'SAME' 即填充的情况下
    - $O = ceil(\frac{I}{s})$
- 在 'VALID' 即不填充的情况下
    - $O = ceil(\frac{I-f+1}{s})$

# 根据下图中描述的卷积神经网络参数搭建一个与之对应的卷积神经网络，实现 MNIST 手写数字识别

![](../pics/AIE20_W06_HW_CNN_01.png)

In [4]:
import tensorflow as tf
import numpy as np
import os

In [5]:
lib_path = os.path.abspath('../libs/')
if lib_path not in os.sys.path:
    os.sys.path.append(lib_path)
import input_data

In [6]:
################################################################################
#
# load data
#
################################################################################
mnist = input_data.read_data_sets('mnist_data', one_hot=True)

Extracting mnist_data\train-images-idx3-ubyte.gz
Extracting mnist_data\train-labels-idx1-ubyte.gz
Extracting mnist_data\t10k-images-idx3-ubyte.gz
Extracting mnist_data\t10k-labels-idx1-ubyte.gz


In [7]:
sample_features, sample_lable = mnist.train.next_batch(1)
print(f'features shape is {sample_features.shape} and data type is {sample_features.dtype}\n',
      f'lable shape is {sample_lable.shape} and data type is {sample_lable.dtype}')

features shape is (1, 784) and data type is float32
 lable shape is (1, 10) and data type is float64


In [8]:
################################################################################
#
# initial tensorflow enviroment
#
################################################################################
tf.reset_default_graph()

In [9]:
################################################################################
#
# declare parameters & function
#
################################################################################

learning_rate = 0.1
batch_size = 100
training_epoch = 1000

X_input = tf.placeholder(tf.float32,
                         shape=[None, 28, 28, 1])
Y_label = tf.placeholder(tf.float64,
                         shape=[None, 10])


def add_nn_layer(X,
                 in_size,
                 out_size,
                 activation_function=None):
    # weigths 使用随机正态分布初始化权重
    W = tf.Variable(tf.random_normal(shape=[in_size, out_size]))
    # bias 设置偏置单元的值为常量0.1
    b = tf.Variable(tf.constant(0.1, shape=[1, out_size]))
    # 计算：f = X product W + b
    f = tf.add(tf.matmul(X, W), b)

    return f if activation_function is None else activation_function(f)


def add_convolutional_layer(X,
                            in_channels,
                            out_channels,
                            filter_height,
                            filter_width,
                            stride,
                            activation_function=None):
    """添加卷积层

    Parameters:
    -----------
    X: 输入的数据 [batch, in_height, in_width, in_channels]
    in_channels: 输入特征图（feature map）的数量
    out_channels: 输出特征图（feature map）的数量
    filter_hight: 过滤器卷积核的高度
    filter_width：过滤器卷积核的宽度
    stride: 步长
    """
    # Filters 过滤器初始化
    F = tf.Variable(tf.truncated_normal(shape=[filter_height,
                                               filter_width,
                                               in_channels,
                                               out_channels]))
    # bais 偏置单元初始化
    b = tf.Variable(tf.constant(0.1, shape=[out_channels]))
    # X [batch, in_height, in_width, in_channels]
    # F [kernel_size kernel_size in_channels out_channels]
    conv = tf.nn.conv2d(X,
                        F,
                        strides=[1, stride, stride, 1],
                        padding='SAME')
    output = conv if activation_function is None else activation_function(conv)

    return tf.add(output, b)


def add_pooling_layer(X,
                      pool_height,
                      pool_width,
                      stride):
    """添加池化层

    Parameters:
    -----------
    X: 输入数据 [batch_size] + input_spatial_shape + [num_channels]
    pool_height: 池化核高度
    pool_width: 池化核宽度
    stride: 步长
    """
    return tf.nn.max_pool(X,
                          [1, pool_height, pool_width, 1],
                          [1, stride, stride, 1],
                          padding='SAME')

In [10]:
################################################################################
#
# constructed graph
#
################################################################################
# 卷积层1
# 输入数据 的维度 [-1, 28, 28, 1]
# 输出数据的维度 [-1, 28, 28, 32]
conv_1 = add_convolutional_layer(X_input,
                                 in_channels=1,
                                 out_channels=32,
                                 filter_height=5,
                                 filter_width=5,
                                 stride=1,
                                 activation_function=tf.nn.relu)
# 池化层1
# 输入数据 的维度 [-1, 28, 28, 32]
# 输出数据的维度 [-1, 14, 14, 32]
pool_1 = add_pooling_layer(conv_1,
                           pool_height=2,
                           pool_width=2,
                           stride=2)
# 卷积层2
# 输入数据 的维度 [-1, 14, 14, 32]
# 输出数据的维度 [-1, 14, 14, 64]
conv_2 = add_convolutional_layer(pool_1,
                                 in_channels=32,
                                 out_channels=64,
                                 filter_height=5,
                                 filter_width=5,
                                 stride=1,
                                 activation_function=tf.nn.relu)
# 池化层2
# 输入数据 的维度 [-1, 14, 14, 64]
# 输出数据的维度 [-1, 7, 7, 64]
pool_2 = add_pooling_layer(conv_2,
                           pool_height=2,
                           pool_width=2,
                           stride=2)
# 打平数据
# [-1, 7, 7, 64] -> [-1, 7*7*64]
fc_X = tf.reshape(pool_2, shape=[-1, 7*7*64])
# 全连接层1
# 输入数据 的维度 [-1, 7*7*64]
# 输出数据的维度 [-1, 1024]
fc_1 = add_nn_layer(fc_X,
                    in_size=7*7*64,
                    out_size=1024,
                    activation_function=tf.nn.relu)
# 输出层
# 输入数据 的维度 [-1, 1024]
# 输出数据的维度 [-1, 10]
output = add_nn_layer(fc_1,
                      in_size=1024,
                      out_size=10)
# 损失函数
loss_op = tf.losses.softmax_cross_entropy(onehot_labels=Y_label,logits=output)
# 优化算法 梯度下降
train_op = tf.train.GradientDescentOptimizer(
    learning_rate=learning_rate).minimize(loss_op)
# 初始化数据operator
init_op = tf.global_variables_initializer()

In [12]:
################################################################################
#
# executed graph
#
################################################################################
with tf.Session() as sess:
#     writer = tf.summary.FileWriter('../graphs', sess.graph)
    sess.run(init_op)
    for epoch in range(training_epoch):
        x, y = mnist.train.next_batch(batch_size=batch_size)
        x = x.reshape([batch_size, 28, 28, 1])
        sess.run(train_op,
                 feed_dict={X_input: x,
                            Y_label: y})
        if epoch % 100 == 0:
            test_number = 1000
            test_data = mnist.test.images[:test_number]
            test_data = test_data.reshape([test_number, 28, 28, 1])
            pred = sess.run(output,
                            feed_dict={X_input: test_data})
            result = tf.argmax(pred, 1)
            label = tf.argmax(mnist.test.labels[:test_number], 1)
            accuracy = sess.run(
                tf.reduce_sum(
                    tf.cast(tf.equal(result, label),
                            tf.int32)) / batch_size)
            print(accuracy)

0.87
0.85
0.85
0.85
0.85
0.85
0.85
0.85
0.85
0.85


![](../pics/AIE20_W06_HW_CNN_02.png)