In [None]:
import numpy as np
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data

# MNIST数据集相关的常数。
OUTPUT_NODE = 10

# 配置神经网络的参数。
BATCH_SIZE = 100

# 基础学习率以及学习率衰减指数
LEARNING_RATE_BASE = 0.05
LEARNING_RATE_DECAY = 0.99    

KEEP_PROB = 0.5

# 正则化权重
REGULARAZTION_RATE = 0.001

TRAINING_STEPS = 5000

# 网络结构参数
IMAGE_SIZE = 28
NUM_CHANNELS = 1
NUM_LABELS = 10

CONV1_DEEP = 32
CONV1_SIZE = 5

CONV2_DEEP = 64
CONV2_SIZE = 5

FC_SIZE = 512

# 定义神经网络结构
def inference(input_tensor, train, reuse, regularizer):
    with tf.variable_scope('layer1-conv1', reuse = reuse):
        conv1_weights = tf.get_variable(
            "weight", [CONV1_SIZE, CONV1_SIZE, NUM_CHANNELS, CONV1_DEEP],
            initializer=tf.truncated_normal_initializer(stddev=0.1))
        conv1_biases = tf.get_variable("bias", [CONV1_DEEP], initializer=tf.constant_initializer(0.0))
        conv1 = tf.nn.conv2d(input_tensor, conv1_weights, strides=[1, 1, 1, 1], padding='SAME')
        relu1 = tf.nn.relu(tf.nn.bias_add(conv1, conv1_biases))

    with tf.name_scope("layer2-pool1"):
        pool1 = tf.nn.max_pool(relu1, ksize = [1,2,2,1],strides=[1,2,2,1],padding="SAME")

    with tf.variable_scope("layer3-conv2", reuse = reuse):
        conv2_weights = tf.get_variable(
            "weight", [CONV2_SIZE, CONV2_SIZE, CONV1_DEEP, CONV2_DEEP],
            initializer=tf.truncated_normal_initializer(stddev=0.1))
        conv2_biases = tf.get_variable("bias", [CONV2_DEEP], initializer=tf.constant_initializer(0.0))
        conv2 = tf.nn.conv2d(pool1, conv2_weights, strides=[1, 1, 1, 1], padding='SAME')
        relu2 = tf.nn.relu(tf.nn.bias_add(conv2, conv2_biases))

    with tf.name_scope("layer4-pool2"):
        pool2 = tf.nn.max_pool(relu2, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
        pool_shape = pool2.get_shape().as_list()
        nodes = pool_shape[1] * pool_shape[2] * pool_shape[3]
        reshaped = tf.reshape(pool2, [-1, nodes])

    with tf.variable_scope('layer5-fc1', reuse = reuse):
        fc1_weights = tf.get_variable("weight", [nodes, FC_SIZE],
                                      initializer=tf.truncated_normal_initializer(stddev=0.1))
        if regularizer != None: tf.add_to_collection('losses', regularizer(fc1_weights))
        fc1_biases = tf.get_variable("bias", [FC_SIZE], initializer=tf.constant_initializer(0.1))

        fc1 = tf.nn.relu(tf.matmul(reshaped, fc1_weights) + fc1_biases)
        if train: fc1 = tf.nn.dropout(fc1, 0.5)

    with tf.variable_scope('layer6-fc2', reuse = reuse):
        fc2_weights = tf.get_variable("weight", [FC_SIZE, NUM_LABELS],
                                      initializer=tf.truncated_normal_initializer(stddev=0.1))
        if regularizer != None: tf.add_to_collection('losses', regularizer(fc2_weights))
        fc2_biases = tf.get_variable("bias", [NUM_LABELS], initializer=tf.constant_initializer(0.1))
        logit = tf.matmul(fc1, fc2_weights) + fc2_biases

    return logit

# TensorFlow计算图创建过程。
def define_graph():
    x = tf.placeholder(tf.float32, [None, IMAGE_SIZE, IMAGE_SIZE, NUM_CHANNELS], name='x-input')
    y_ = tf.placeholder(tf.float32, [None, OUTPUT_NODE], name='y-input')
    
    regularizer = tf.contrib.layers.l2_regularizer(REGULARAZTION_RATE)
    y = inference(x, True, False, regularizer)
    y_test = inference(x, False, True, None)
    
    # 定义存储训练轮数的变量。 
    global_step = tf.Variable(0, trainable=False)
    
    # 计算交叉熵作为刻画预测值和真实值之间差距的损失函数。
    cross_entropy = tf.nn.softmax_cross_entropy_with_logits(labels=y_, logits=y)
    loss = tf.reduce_mean(cross_entropy) + tf.add_n(tf.get_collection('losses'))
 
    # 设置指数衰减的学习率。
    learning_rate = tf.train.exponential_decay(
        LEARNING_RATE_BASE,
        global_step,
        mnist.train.num_examples / BATCH_SIZE,
        LEARNING_RATE_DECAY,
        staircase=True)
    # 使用tf.train.GradientDescentOptimizer优化算法来优化损失函数。
    train_op=tf.train.GradientDescentOptimizer(learning_rate).minimize(loss, global_step=global_step)

    # 检验神经网络的正确率。
    correct_prediction = tf.equal(tf.argmax(y_test, 1), tf.argmax(y_,1))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
    
    return x, y_, train_op, accuracy

# 训练模型的过程。
def train(x, y_, train_op, accuracy, mnist):
    # 初始化会话并开始训练过程。
    with tf.Session() as sess:
        tf.global_variables_initializer().run()
        # 准备验证数据。一般在神经网络的训练过程中会通过验证数据来大致判断停止的
        # 条件和评判训练的效果。
        validate_feed = {x: np.reshape(mnist.validation.images, 
                                       (-1, IMAGE_SIZE, IMAGE_SIZE, NUM_CHANNELS)), 
                         y_: mnist.validation.labels}

        # 准备测试数据。在真实的应用中，这部分数据在训练时是不可见的，这个数据只是作为  
        # 模型优劣的最后评价标准。
        test_feed = {x: np.reshape(mnist.test.images,
                                   (-1, IMAGE_SIZE, IMAGE_SIZE, NUM_CHANNELS)), 
                     y_: mnist.test.labels}

        # 迭代地训练神经网络。
        for i in range(TRAINING_STEPS):
            # 每1000轮输出一次在验证数据集上的测试结果。
            if i % 100 == 0:
                validate_acc = sess.run(accuracy, feed_dict=validate_feed)
                print("After %d training step(s), validation accuracy is %g " % (i, validate_acc))

            # 产生这一轮使用的一个batch的训练数据，并运行训练过程。
            xs, ys = mnist.train.next_batch(BATCH_SIZE)
            reshaped_xs = np.reshape(xs, (-1, IMAGE_SIZE, IMAGE_SIZE, NUM_CHANNELS))
            sess.run(train_op, feed_dict={x: reshaped_xs, y_: ys})
            
        # 在训练结束之后，在测试数据上检测神经网络模型的最终正确率。
        test_acc = sess.run(accuracy, feed_dict=test_feed)
        print("After %d training step(s), test accuracy is %g" % (TRAINING_STEPS, test_acc))

# 主程序入口
if __name__ == '__main__':
    # 声明处理MNIST数据集的类，这个类在初始化时会自动下载数据。
    mnist = input_data.read_data_sets("../MNIST_data", one_hot=True)
    x, y_, train_op, accuracy = define_graph()
    train(x, y_, train_op, accuracy, mnist)

Extracting ../MNIST_data/train-images-idx3-ubyte.gz
Extracting ../MNIST_data/train-labels-idx1-ubyte.gz
Extracting ../MNIST_data/t10k-images-idx3-ubyte.gz
Extracting ../MNIST_data/t10k-labels-idx1-ubyte.gz
After 0 training step(s), validation accuracy is 0.0776 
After 100 training step(s), validation accuracy is 0.9344 
After 200 training step(s), validation accuracy is 0.9532 
After 300 training step(s), validation accuracy is 0.9616 
After 400 training step(s), validation accuracy is 0.9662 
After 500 training step(s), validation accuracy is 0.9716 
After 600 training step(s), validation accuracy is 0.9732 
After 700 training step(s), validation accuracy is 0.9744 
After 800 training step(s), validation accuracy is 0.977 
After 900 training step(s), validation accuracy is 0.9782 
After 1000 training step(s), validation accuracy is 0.98 
After 1100 training step(s), validation accuracy is 0.979 
After 1200 training step(s), validation accuracy is 0.981 
After 1300 training step(s), va