In [1]:
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
import os
import numpy as np

#### 1. 设定神经网络的参数

In [2]:
# 输入为28*28的图像[28, 28]
INPUT_NODE = 784
# 输出为1~10的可能性[10]
OUTPUT_NODE = 10

# 图像尺寸
IMAGE_SIZE = 28
# 图像的颜色通道数，这里只有黑白一种通道
NUM_CHANNELS = 1
# 标签的数量
NUM_LABELS = 10

# 第一层卷积的深度
CONV1_DEEP = 32
# 第一层卷积的过滤器尺寸
CONV1_SIZE = 5

# 第二层卷积的深度
CONV2_DEEP = 64
# 第二层卷积的过滤器尺寸
CONV2_SIZE = 5

# 全连接层的节点个数
FC_SIZE = 512


# 常见的卷积模型
# 本例子卷积模型 输入 -> 卷积层 -> 池化层 -> 卷积层 -> 池化层 -> 全连接层 -> 全连接层
# 输入 -> (卷积层+ -> 池化层?)+ -> 全连接层+

#### 2. 定义前向传播的过程

In [3]:
def inference(input_tensor, train, regularizer):
    # 第一层卷积1
    # 输入为[x-size=28, y-size=28, channel=1]的图像
    # 过滤器尺寸[x-size=5, y-size=5, channel=1, deep=32]
    # 过滤器步长=1
    # 输出为[x-size=28, y-size=28, deep=32]的矩阵
    with tf.variable_scope('layer1-conv1'):
        conv1_weights = tf.get_variable(
            name="weight", 
            shape=[CONV1_SIZE, CONV1_SIZE, NUM_CHANNELS, CONV1_DEEP],
            initializer=tf.truncated_normal_initializer(stddev=0.1)
        )
        conv1_biases = tf.get_variable(
            name="bias", 
            shape=[CONV1_DEEP], 
            initializer=tf.constant_initializer(0.0)
        )
        conv1 = tf.nn.conv2d(input_tensor, conv1_weights, strides=[1, 1, 1, 1], padding='SAME')
        relu1 = tf.nn.relu(tf.nn.bias_add(conv1, conv1_biases))

    # 第二层池化1
    # 输入为[x-size=28, y-size=28, deep=32]的矩阵
    # 过滤器尺寸[x-size=2, y-size=2]
    # 过滤器步长=2
    # 输出为[x-size=14, y-size=14, deep=32]的矩阵
    with tf.name_scope("layer2-pool1"):
        pool1 = tf.nn.max_pool(relu1, ksize = [1,2,2,1],strides=[1,2,2,1],padding="SAME")

    # 第三层卷积2
    # 输入为[x-size=14, y-size=14, deep=32]的矩阵
    # 过滤器尺寸[x-size=5, y-size=5, channel=1, deep=64]
    # 过滤器步长=1
    # 输出为[x-size=14, y-size=14, deep=64]的矩阵
    with tf.variable_scope("layer3-conv2"):
        conv2_weights = tf.get_variable(
            name="weight", 
            shape=[CONV2_SIZE, CONV2_SIZE, CONV1_DEEP, CONV2_DEEP],
            initializer=tf.truncated_normal_initializer(stddev=0.1)
        )
        conv2_biases = tf.get_variable(
            name="bias", 
            shape=[CONV2_DEEP], 
            initializer=tf.constant_initializer(0.0)
        )
        conv2 = tf.nn.conv2d(pool1, conv2_weights, strides=[1, 1, 1, 1], padding='SAME')
        relu2 = tf.nn.relu(tf.nn.bias_add(conv2, conv2_biases))

    # 第四层池化2
    # 输入为[x-size=14, y-size=14, deep=64]的矩阵
    # 过滤器尺寸[x-size=2, y-size=2]
    # 过滤器步长=2
    # 输出为[x-size=7, y-size=7, deep=64]的矩阵
    with tf.name_scope("layer4-pool2"):
        pool2 = tf.nn.max_pool(relu2, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
        # 把[batch, x-size, y-size, deep]4维矩阵转化为[batch, vector]2维矩阵，长*宽*深度转换为1维向量
        pool_shape = pool2.get_shape().as_list()
        nodes = pool_shape[1] * pool_shape[2] * pool_shape[3]
        reshaped = tf.reshape(pool2, [pool_shape[0], nodes])

    # 全连接层    
    with tf.variable_scope('layer5-fc1'):
        fc1_weights = tf.get_variable(
            name="weight", 
            shape=[nodes, FC_SIZE],
            initializer=tf.truncated_normal_initializer(stddev=0.1)
        )
        # 只有全连接的权重需要加入正则化
        if regularizer != None: tf.add_to_collection('losses', regularizer(fc1_weights))
        fc1_biases = tf.get_variable("bias", [FC_SIZE], initializer=tf.constant_initializer(0.1))

        fc1 = tf.nn.relu(tf.matmul(reshaped, fc1_weights) + fc1_biases)
        # dropout在训练数据的时候，会随机把部分输出改为0
        # dropout可以避免过度拟合，dropout一般只在全连接层，而不是在卷积层或者池化层使用
        if train: fc1 = tf.nn.dropout(fc1, 0.5)

    # 全连接层
    # 输入为[512]的向量
    # 输出为[10]的向量
    with tf.variable_scope('layer6-fc2'):
        fc2_weights = tf.get_variable(
            name="weight", 
            shape=[FC_SIZE, NUM_LABELS],
            initializer=tf.truncated_normal_initializer(stddev=0.1)
        )
        if regularizer != None: tf.add_to_collection('losses', regularizer(fc2_weights))
        fc2_biases = tf.get_variable("bias", [NUM_LABELS], initializer=tf.constant_initializer(0.1))
        logit = tf.matmul(fc1, fc2_weights) + fc2_biases

    return logit

#### 3. 定义神经网络参数

In [4]:
BATCH_SIZE = 100
LEARNING_RATE_BASE = 0.01
LEARNING_RATE_DECAY = 0.99
REGULARIZATION_RATE = 0.0001
TRAINING_STEPS = 6000
MOVING_AVERAGE_DECAY = 0.99

#### 4. 定义训练

In [7]:
def train(mnist):
    # 定义输出为4维矩阵的placeholder
    x = tf.placeholder(tf.float32, [
            BATCH_SIZE,
            IMAGE_SIZE,
            IMAGE_SIZE,
            NUM_CHANNELS],
        name='x-input')
    y_ = tf.placeholder(tf.float32, [None, OUTPUT_NODE], name='y-input')
    
    regularizer = tf.contrib.layers.l2_regularizer(REGULARIZATION_RATE)
    y = inference(x,False,regularizer)
    global_step = tf.Variable(0, trainable=False)

    # 定义损失函数、学习率、滑动平均操作以及训练过程。
    variable_averages = tf.train.ExponentialMovingAverage(MOVING_AVERAGE_DECAY, global_step)
    variables_averages_op = variable_averages.apply(tf.trainable_variables())
    cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=y, labels=tf.argmax(y_, 1))
    cross_entropy_mean = tf.reduce_mean(cross_entropy)
    loss = cross_entropy_mean + tf.add_n(tf.get_collection('losses'))
    learning_rate = tf.train.exponential_decay(
        LEARNING_RATE_BASE,
        global_step,
        mnist.train.num_examples / BATCH_SIZE, LEARNING_RATE_DECAY,
        staircase=True)

    train_step = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss, global_step=global_step)
    with tf.control_dependencies([train_step, variables_averages_op]):
        train_op = tf.no_op(name='train')
        
    # 初始化TensorFlow持久化类。
    saver = tf.train.Saver()
    with tf.Session() as sess:
        tf.global_variables_initializer().run()
        for i in range(TRAINING_STEPS):
            xs, ys = mnist.train.next_batch(BATCH_SIZE)

            reshaped_xs = np.reshape(xs, (
                BATCH_SIZE,
                IMAGE_SIZE,
                IMAGE_SIZE,
                NUM_CHANNELS))
            _, loss_value, step = sess.run([train_op, loss, global_step], feed_dict={x: reshaped_xs, y_: ys})

            if i % 50 == 0:
                print("After %d training step(s), loss on training batch is %g." % (step, loss_value))

#### 5. 主程序入口

In [None]:
def main(argv=None):
    mnist = input_data.read_data_sets("../../datasets/MNIST_data", one_hot=True)
    train(mnist)

if __name__ == '__main__':
    main()

Extracting ../../datasets/MNIST_data/train-images-idx3-ubyte.gz
Extracting ../../datasets/MNIST_data/train-labels-idx1-ubyte.gz
Extracting ../../datasets/MNIST_data/t10k-images-idx3-ubyte.gz
Extracting ../../datasets/MNIST_data/t10k-labels-idx1-ubyte.gz
After 1 training step(s), loss on training batch is 4.10292.
After 51 training step(s), loss on training batch is 1.14509.
After 101 training step(s), loss on training batch is 0.88744.
After 151 training step(s), loss on training batch is 1.03481.
After 201 training step(s), loss on training batch is 0.947686.
After 251 training step(s), loss on training batch is 0.992111.
After 301 training step(s), loss on training batch is 0.828515.
After 351 training step(s), loss on training batch is 0.94212.
After 401 training step(s), loss on training batch is 0.976691.
After 451 training step(s), loss on training batch is 0.928402.
After 501 training step(s), loss on training batch is 0.813116.
After 551 training step(s), loss on training batch