#### 加载库文件

In [11]:
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
import numpy as np

tf.reset_default_graph()

#### 定义必要的参数

In [7]:
# 节点参数
INPUT_NODE = 784
LAYER1_NODE = 291
LAYER2_NODE = 97
LAYER3_NODE = 33
OUTPUT_NODE = 10

# 训练集参数
BATCH_SIZE = 32

# 动量参数
MOVING_AVERAGE_DECAY = 0.99 # 动量比例

# 正则化参数
REGULARELIZATION_RATE = 0.01 # 正则化率

# 学习率参数
LEARNING_RATE_BASE = 0.1 # 初始化学习率
LEARNING_RATE_DECAY = 0.99 # 学习衰减率

# 优化参数
global_step = tf.Variable(0, trainable=False)
TRAING_STEPS = 50000

#### 定义前向传播函数

In [8]:
def inference(input_tensor, avg_class, weigth1, bias1, weight2, bias2, weight3, bias3, weight4, bias4):
    # 如果不含滑动平均类
    if avg_class == None:
        # 计算第一层激活值
        layer1 = tf.nn.leaky_relu(tf.matmul(input_tensor, weigth1) + bias1)
        # 计算第二层激活值
        layer2 = tf.nn.leaky_relu(tf.matmul(layer1, weight2) + bias2)
        # 计算第三次
        layer3 = tf.nn.leaky_relu(tf.matmul(layer2, weight3) + bias3)
        # 计算第四层值，没有激活函数
        return tf.matmul(layer3, weight4) + bias4
        
    # 如果含滑动平均类
    else:
        # 计算第一层激活值
        layer1 = tf.nn.leaky_relu(tf.matmul(input_tensor, avg_class.average(weigth1)) + avg_class.average(bias1))
        # 计算第二层激活值
        layer2 = tf.nn.leaky_relu(tf.matmul(layer1, avg_class.average(weight2)) + avg_class.average(bias2))
        # 计算第三次
        layer3 = tf.nn.leaky_relu(tf.matmul(layer2, avg_class.average(weight3)) + avg_class.average(bias3))
        # 计算第四层值，没有激活函数
        return tf.matmul(layer3, avg_class.average(weight4)) + avg_class.average(bias4)


#### 定义训练函数

In [12]:
def train(mnist):
    # 构建输入输出接口
    xs = tf.placeholder(tf.float32, shape=[None, INPUT_NODE], name="x-input")
    ys = tf.placeholder(tf.float32, shape=[None, OUTPUT_NODE], name="y-input")
    
    # 构建权重
    w1 = tf.Variable(tf.random_normal(shape=[INPUT_NODE, LAYER1_NODE], stddev=0.1))
    b1 = tf.Variable(tf.constant(0.1, shape=[LAYER1_NODE]))
    w2 = tf.Variable(tf.random_normal(shape=[LAYER1_NODE, LAYER2_NODE], stddev=0.1))
    b2 = tf.Variable(tf.constant(0.1, shape=[LAYER2_NODE]))
    w3 = tf.Variable(tf.random_normal(shape=[LAYER2_NODE, LAYER3_NODE], stddev=0.1))
    b3 = tf.Variable(tf.constant(0.1, shape=[LAYER3_NODE]))
    w4 = tf.Variable(tf.random_normal(shape=[LAYER3_NODE, OUTPUT_NODE], stddev=0.1))
    b4 = tf.Variable(tf.constant(0.1, shape=[OUTPUT_NODE]))
    
    # 定义滑动平均类
    variable_average = tf.train.ExponentialMovingAverage(MOVING_AVERAGE_DECAY, global_step)
    variable_average_op = variable_average.apply(tf.trainable_variables())
    
    # 计算前向传播:不带滑动平均类
    y_pre = inference(xs, None, w1, b1, w2, b2, w3, b3, w4, b4)
    
    # 计算前向传播结果:带有滑动平均类
    average_y = inference(xs, variable_average, w1, b1, w2, b2, w3, b3, w4, b4)
    
    # 计算交叉熵
    predict_label = tf.argmax(ys, 1)
    cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=y_pre,
                                                                  labels=predict_label)
    cross_entropy_mean = tf.reduce_mean(cross_entropy)
    
    # 正则化
    regularier = tf.contrib.layers.l2_regularizer(REGULARELIZATION_RATE)
    regularization = regularier(w1) + regularier(w2) + regularier(w3) + regularier(w4)
    loss = cross_entropy_mean + regularization
    
    # 设置指数衰减的学习率
    learning_rate = tf.train.exponential_decay(
        LEARNING_RATE_BASE,
        global_step,
        mnist.train.num_examples / BATCH_SIZE,
        LEARNING_RATE_DECAY,
        staircase=True)
    
    # 优化损失函数
    train_op = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss)
    
    # 定义依赖关系
    with tf.control_dependencies([train_op, variable_average_op]):
        train_op = tf.no_op(name="train")
        
    # 计算正确率
    correct_prediction = tf.equal(tf.argmax(average_y, 1), tf.argmax(ys, 1))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
    
    # 初始化会话，并开始训练
    config = tf.ConfigProto(allow_soft_placement=True)
    config.gpu_options.allow_growth = True
    with tf.Session(config=config) as sess:
        # 变量初始化
        tf.global_variables_initializer().run()
        
        # 获取交叉验证集和测试数据集
        validate_feed = {xs: mnist.validation.images, ys: mnist.validation.labels}
        test_feed = {xs: mnist.test.images, ys: mnist.test.labels}
        
        # 循环训练神经网络
        for i in range(TRAING_STEPS):
            if i % 1000 == 0:
                validate_acc = sess.run(accuracy, feed_dict=validate_feed)
                print("After %d training step(s), validation accuracy using average model is %g" % (i, validate_acc))
    
            xbatch, ybatch = mnist.train.next_batch(BATCH_SIZE)
            sess.run(train_op, feed_dict={xs: xbatch, ys: ybatch})
            
        test_acc = sess.run(accuracy, feed_dict=test_feed)
        print("After %d training step(s), test accuracy using average model is %g" % (TRAING_STEPS, test_acc))

#### 程序主入口

In [13]:
def main(argv=None):
    mnist = input_data.read_data_sets("../../../datasets/MNIST_data/", one_hot=True)
    train(mnist)
    
if __name__=="__main__":
    main()

Extracting ../../../datasets/MNIST_data/train-images-idx3-ubyte.gz
Extracting ../../../datasets/MNIST_data/train-labels-idx1-ubyte.gz
Extracting ../../../datasets/MNIST_data/t10k-images-idx3-ubyte.gz
Extracting ../../../datasets/MNIST_data/t10k-labels-idx1-ubyte.gz


ValueError: Tensor("truediv:0", shape=(), dtype=float32) must be from the same graph as Tensor("ExponentialMovingAverage/decay:0", shape=(), dtype=float32).