In [6]:
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data

#### 1.设置输入和输出节点的个数，配置神经网络的参数

In [48]:
INPUT_NODE = 784    # 输入节点
OUTPUT_NODE = 10    # 输出节点
LAYERS_NODE = 500   # 隐含层节点

BATCH_SIZE = 32    # 每次batch打包的样本个数

# 模型相关的参数
LEARNING_RATE_BASE = 0.1    # 基础学习率
LEARNING_RATE_DECAY = 0.99  # 学习率衰减率
REGULARAZTION_RATE = 0.0001 # 正则化率
TRAINING_STEPS = 5000       # 迭代的步数
MOVING_AVERGE_DECAY = 0.99  # 动量比例

#### 2.定义辅助函数来计算前向穿鼻结果，使用ReLU做为激活函数

In [49]:
def inferece(input_tensor, avg_class, weights1, biases1, weights2, biases2):
    # 不使用滑动平均类
    if avg_class == None:
        before_active1 = tf.matmul(input_tensor, weights1) + biases1
        layer1 = tf.nn.leaky_relu(before_active1)
        before_active2 = tf.matmul(layer1, weights2) + biases2
        return before_active2
        # layer1 = tf.nn.relu(tf.matmul(input_tensor, weights1) + biases1)
        # return tf.matmul(layer1, weights2) + biases2
    else:
        # 使用滑动平均类
        moving_weights1 = avg_class.average(weights1)
        moving_biases1 = avg_class.average(biases1)
        before_active1 = tf.matmul(input_tensor, moving_weights1) + moving_biases1
        layer1 = tf.nn.leaky_relu(before_active1)
        moving_weights2 = avg_class.average(weights2)
        moving_biases2 = avg_class.average(biases2)
        before_active2 = tf.matmul(layer1, moving_weights2) + moving_biases2
        return before_active2
        # layer1 = tf.nn.relu(tf.matmul(input_tensor, avg_class.average(weights1)) + avg_class.average(biases1))
        # return tf.matmul(layer1, avg_class.average(weights2)) + avg_class.average(biases2)

#### 3.定义训练过程

In [50]:
def train(mnist):
    x = tf.placeholder(tf.float32, [None, INPUT_NODE], name="x-input")
    y_ = tf.placeholder(tf.float32, [None, OUTPUT_NODE], name="y-input")
    
    # 生成隐含层的擦数
    weights1 = tf.Variable(tf.truncated_normal([INPUT_NODE, LAYERS_NODE], stddev=1))
    biases1 =  tf.Variable(tf.constant(0.1, shape=[LAYERS_NODE]))
    # 生成输出层的参数
    weights2 = tf.Variable(tf.truncated_normal([LAYERS_NODE, OUTPUT_NODE], stddev=1))
    biases2 = tf.Variable(tf.constant(0.1, shape=[OUTPUT_NODE]))
    
    # 计算不含滑动平均类的前向传播结果
    y = inferece(x, None, weights1, biases1, weights2, biases2)
    
    # 定义的训练轮数及相关的滑动平均类
    global_step = tf.Variable(0, trainable=False)
    variable_averages = tf.train.ExponentialMovingAverage(MOVING_AVERGE_DECAY, global_step)
    variable_averages_op = variable_averages.apply(tf.trainable_variables())
    average_y = inferece(x, variable_averages, weights1, biases1, weights2, biases2)
    
    # 计算交叉熵
    predict_label = tf.argmax(y_, 1) # 预测的标签
    cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=y, 
                                                    labels=predict_label)
    cross_entropy_mean = tf.reduce_mean(cross_entropy) # 计算平均值
    
    # 损失函数
    regularizer = tf.contrib.layers.l2_regularizer(REGULARAZTION_RATE)
    regulariztion = regularizer(weights1) + regularizer(weights2)
    loss = cross_entropy_mean + regulariztion # 计算损失值
    
    # 设置指数衰减的学习率
    learning_rate = tf.train.exponential_decay(
        LEARNING_RATE_BASE,
        global_step,
        mnist.train.num_examples / BATCH_SIZE,
        LEARNING_RATE_DECAY,
        staircase=True)
    
    # 优化损失函数
    train_step = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss, global_step)
    
    # 反向传播更新参数和更新每一个参数的滑动平均值
    with tf.control_dependencies([train_step, variable_averages_op]):
        train_op = tf.no_op(name="train")
        
    # 计算正确率
    correct_prediction = tf.equal(tf.argmax(average_y, 1), tf.argmax(y_, 1))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
    
    # 初始化会话，并开始训练过程
    config = tf.ConfigProto(allow_soft_placement=True)
    config.gpu_options.allow_growth = True
    with tf.Session(config=config) as sess:
        # 变量初始化
        tf.global_variables_initializer().run()
        
        # 获取交叉验证集和测试集数据
        validate_feed = {x: mnist.validation.images, y_:mnist.validation.labels}
        test_feed = {x: mnist.test.images, y_:mnist.test.labels}
        
        # 循环的训练神经网络
        for i in range(TRAINING_STEPS):
            if i % 1000 == 0:
                validate_acc = sess.run(accuracy, feed_dict=validate_feed)
                print("After %d training step(s), validation accuracy using average model is %g" % (i, validate_acc))
                
            xs, ys = mnist.train.next_batch(BATCH_SIZE)
            sess.run(train_op, feed_dict={x: xs, y_:ys})
            
        test_acc = sess.run(accuracy, feed_dict=test_feed)
        print("After %d training step(s), test accuracy using average model is %g" % (TRAINING_STEPS, test_acc))
        

#### 4.主程序入口，这里设定模型次数为50000次

In [51]:
tf.reset_default_graph()

def main(argv=None):
    mnist = input_data.read_data_sets("../../../../datasets/MNIST_data/", one_hot=True)
    train(mnist)
    
if __name__=='__main__':
    main()

Extracting ../../../../datasets/MNIST_data/train-images-idx3-ubyte.gz
Extracting ../../../../datasets/MNIST_data/train-labels-idx1-ubyte.gz
Extracting ../../../../datasets/MNIST_data/t10k-images-idx3-ubyte.gz
Extracting ../../../../datasets/MNIST_data/t10k-labels-idx1-ubyte.gz
After 0 training step(s), validation accuracy using average model is 0.0872
After 1000 training step(s), validation accuracy using average model is 0.9306
After 2000 training step(s), validation accuracy using average model is 0.9458
After 3000 training step(s), validation accuracy using average model is 0.9502
After 4000 training step(s), validation accuracy using average model is 0.951
After 5000 training step(s), test accuracy using average model is 0.9549
