In [2]:
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data

# MNIST数据集相关的常数。
INPUT_NODE = 784
OUTPUT_NODE = 10

# 配置神经网络的参数。
LAYER1_NODE = 500
BATCH_SIZE = 100

# 基础学习率以及学习率衰减指数
LEARNING_RATE_BASE = 0.05
LEARNING_RATE_DECAY = 0.99    

KEEP_PROB = 0.5

# 正则化权重
REGULARAZTION_RATE = 0.001

TRAINING_STEPS = 5000

def get_weight_variable(shape, regularizer):
    weights = tf.get_variable("weights", shape, initializer=tf.truncated_normal_initializer(stddev=0.1))
    if regularizer != None: tf.add_to_collection('losses', regularizer(weights))
    return weights


# 定义神经网络结构
def inference(x, is_train, reuse, regularizer):
    # 声明第一层神经网络的变量并完成前向传播过程。
    with tf.variable_scope('layer1', reuse = reuse):
        weights = get_weight_variable([INPUT_NODE, LAYER1_NODE], regularizer)
        biases = tf.get_variable("biases", [LAYER1_NODE], initializer=tf.constant_initializer(0.0))
        layer1 = tf.nn.relu(tf.matmul(x, weights) + biases)
        if is_train:
            layer1 = tf.nn.dropout(layer1, KEEP_PROB)

    # 类似的声明第二层神经网络的变量并完成前向传播过程。
    with tf.variable_scope('layer2', reuse = reuse):
        weights = get_weight_variable([LAYER1_NODE, OUTPUT_NODE], regularizer) 
        biases = tf.get_variable("biases", [OUTPUT_NODE], initializer=tf.constant_initializer(0.0))
        layer2 = tf.matmul(layer1, weights) + biases 

    return layer2

# TensorFlow计算图创建过程。
def define_graph():
    x = tf.placeholder(tf.float32, [None, INPUT_NODE], name='x-input')
    y_ = tf.placeholder(tf.float32, [None, OUTPUT_NODE], name='y-input')
    
    regularizer = tf.contrib.layers.l2_regularizer(REGULARAZTION_RATE)
    y = inference(x, True, False, regularizer)
    y_test = inference(x, False, True, None)
    
    # 定义存储训练轮数的变量。 
    global_step = tf.Variable(0, trainable=False)
    
    # 计算交叉熵作为刻画预测值和真实值之间差距的损失函数。
    cross_entropy = tf.nn.softmax_cross_entropy_with_logits(labels=y_, logits=y)
    loss = tf.reduce_mean(cross_entropy) + tf.add_n(tf.get_collection('losses'))
 
    # 设置指数衰减的学习率。
    learning_rate = tf.train.exponential_decay(
        LEARNING_RATE_BASE,
        global_step,
        mnist.train.num_examples / BATCH_SIZE,
        LEARNING_RATE_DECAY,
        staircase=True)
    # 使用tf.train.GradientDescentOptimizer优化算法来优化损失函数。
    train_op = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss, global_step=global_step)

    # 检验神经网络的正确率。
    correct_prediction = tf.equal(tf.argmax(y_test, 1), tf.argmax(y_,1))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
    
    return x, y_, train_op, accuracy

# 训练模型的过程。
def train(x, y_, train_op, accuracy, mnist):
    # 初始化会话并开始训练过程。
    with tf.Session() as sess:
        tf.global_variables_initializer().run()
        # 准备验证数据。一般在神经网络的训练过程中会通过验证数据来大致判断停止的
        # 条件和评判训练的效果。
        validate_feed = {x: mnist.validation.images, 
                         y_: mnist.validation.labels}

        # 准备测试数据。在真实的应用中，这部分数据在训练时是不可见的，这个数据只是作为  
        # 模型优劣的最后评价标准。
        test_feed = {x: mnist.test.images, y_: mnist.test.labels}

        # 迭代地训练神经网络。
        for i in range(TRAINING_STEPS):
            # 每1000轮输出一次在验证数据集上的测试结果。
            if i % 1000 == 0:
                validate_acc = sess.run(accuracy, feed_dict=validate_feed)
                print("After %d training step(s), validation accuracy is %g " % (i, validate_acc))

            # 产生这一轮使用的一个batch的训练数据，并运行训练过程。
            xs, ys = mnist.train.next_batch(BATCH_SIZE)
            sess.run(train_op, feed_dict={x: xs, y_: ys})
            
        # 在训练结束之后，在测试数据上检测神经网络模型的最终正确率。
        test_acc = sess.run(accuracy, feed_dict=test_feed)
        print("After %d training step(s), test accuracy is %g" % (TRAINING_STEPS, test_acc))

# 主程序入口
if __name__ == '__main__':
    # 声明处理MNIST数据集的类，这个类在初始化时会自动下载数据。
    mnist = input_data.read_data_sets("../MNIST_data", one_hot=True)
    x, y_, train_op, accuracy = define_graph()
    train(x, y_, train_op, accuracy, mnist)

Extracting ../MNIST_data/train-images-idx3-ubyte.gz
Extracting ../MNIST_data/train-labels-idx1-ubyte.gz
Extracting ../MNIST_data/t10k-images-idx3-ubyte.gz
Extracting ../MNIST_data/t10k-labels-idx1-ubyte.gz
After 0 training step(s), validation accuracy is 0.116 
After 1000 training step(s), validation accuracy is 0.9362 
After 2000 training step(s), validation accuracy is 0.9482 
After 3000 training step(s), validation accuracy is 0.9548 
After 4000 training step(s), validation accuracy is 0.9596 
After 5000 training step(s), test accuracy is 0.9608
