## 5.1 MINST数据处理

### 1. 加载数据

In [33]:
from tensorflow.examples.tutorials.mnist import input_data
import tensorflow as tf

mnist = input_data.read_data_sets("../datasets/MNIST_data/", one_hot=True)
print("Training data size: ", mnist.train.num_examples)
print("Validating data size: ", mnist.validation.num_examples)
print("Testing data size: ", mnist.test.num_examples)

Extracting ../datasets/MNIST_data/train-images-idx3-ubyte.gz
Extracting ../datasets/MNIST_data/train-labels-idx1-ubyte.gz
Extracting ../datasets/MNIST_data/t10k-images-idx3-ubyte.gz
Extracting ../datasets/MNIST_data/t10k-labels-idx1-ubyte.gz
Training data size:  55000
Validating data size:  5000
Testing data size:  10000


## 5.2 使用MINST数据训练模型

### 1. 设置参数

In [34]:
INPUT_NODE = 784     # 输入节点
OUTPUT_NODE = 10     # 输出节点
LAYER1_NODE = 500    # 隐藏层数       
                              
BATCH_SIZE = 100     # 每次batch打包的样本个数        

# 模型相关的参数
LEARNING_RATE_BASE = 0.8      
LEARNING_RATE_DECAY = 0.99    
REGULARAZTION_RATE = 0.0001   
TRAINING_STEPS = 30000        
MOVING_AVERAGE_DECAY = 0.99  

### 2. 计算神经网络前向传播结果

In [35]:
def inference(input_tensor, avg_class, weights1, biases1, weights2, biases2):
    if avg_class == None:
        layer1 = tf.nn.relu(tf.matmul(input_tensor, weights1) + biases1 ) 
        return tf.matmul(layer1, weights2) + biases2
    else:
        layer1 = tf.nn.relu(tf.matmul(input_tensor, avg_class.average(weights1)) + avg_class.average(biases1))
        return tf.matmul(layer1, avg_class.average(weights2)) + avg_class.average(biases2)

### 3. 开始训练

In [36]:
def train(minst):
    x = tf.placeholder(tf.float32, [None, INPUT_NODE], name='x-input')
    y_ = tf.placeholder(tf.float32, [None, OUTPUT_NODE], name='y-input')
    
    #生成隐藏层参数
    weights1 = tf.Variable(tf.truncated_normal([INPUT_NODE, LAYER1_NODE], stddev=0.1))
    biases1 = tf.Variable(tf.constant(0.1, shape=[LAYER1_NODE]))
    
    weights2 = tf.Variable(tf.truncated_normal([LAYER1_NODE, OUTPUT_NODE], stddev=0.1))
    biases2 = tf.Variable(tf.constant(0.1, shape=[OUTPUT_NODE]))
    
    # 计算不含滑动平均类的前向传播结果
    y = inference(x, None, weights1, biases1, weights2, biases2)
    
    # 定义训练轮数及相关的滑动平均类
    global_step = tf.Variable(0, trainable=False)
    varible_averages = tf.train.ExponentialMovingAverage(MOVING_AVERAGE_DECAY, global_step)
    varible_averages_op = varible_averages.apply(tf.trainable_variables())
    average_y = inference(x, varible_averages, weights1, biases1, weights2, biases2)
    
    # 计算交叉熵及其平均值
    cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=y, labels=tf.argmax(y_, 1))
    cross_entropy_mean = tf.reduce_mean(cross_entropy)
    
    #带有l2正则的损失函数
    regularizer = tf.contrib.layers.l2_regularizer(REGULARAZTION_RATE)
    regularization = regularizer(weights1) + regularizer(weights2)
    loss = cross_entropy_mean + regularization
    
    learning_ratge = tf.train.exponential_decay(LEARNING_RATE_BASE,
                                               global_step,
                                               minst.train.num_examples / BATCH_SIZE,#过完一遍数据需要的迭代次数
                                               LEARNING_RATE_DECAY)#学习率衰减速度
    
    #优化目标
    train_step = tf.train.GradientDescentOptimizer(learning_ratge).minimize(loss, global_step=global_step)

    #每次迭代需要更新权重和权重对应的滑动平均值，为了一次完成多个操作
    with tf.control_dependencies([train_step, varible_averages_op]):
        train_op = tf.no_op(name='train')

    correct_prediction = tf.equal(tf.argmax(average_y, 1), tf.argmax(y_, 1))           
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

    with tf.Session() as sess:
        tf.global_variables_initializer().run()
        validate_feed = {x: minst.validation.images,
                         y_: minst.validation.labels}
        test_feed = {x: mnist.test.images, y_: mnist.test.labels} 

        for i in range(TRAINING_STEPS):
            if i % 1000 == 0:
                validate_acc = sess.run(accuracy, feed_dict=validate_feed)
                print("After %d training step(s), validation accuracy using average model is %g " % (i, validate_acc))

            xs,ys=mnist.train.next_batch(BATCH_SIZE)
            sess.run(train_op, feed_dict={x:xs,y_:ys})

        test_acc=sess.run(accuracy,feed_dict=test_feed)
        print(("After %d training step(s), test accuracy using average model is %g" %(TRAINING_STEPS, test_acc)))

### 4. 开始训练

In [37]:
def main(argv=None):
    mnist = input_data.read_data_sets("../datasets/MNIST_data", one_hot=True)
    train(mnist)

if __name__=='__main__':
    main()

Extracting ../datasets/MNIST_data\train-images-idx3-ubyte.gz
Extracting ../datasets/MNIST_data\train-labels-idx1-ubyte.gz
Extracting ../datasets/MNIST_data\t10k-images-idx3-ubyte.gz
Extracting ../datasets/MNIST_data\t10k-labels-idx1-ubyte.gz
After 0 training step(s), validation accuracy using average model is 0.1168 
After 1000 training step(s), validation accuracy using average model is 0.979 
After 2000 training step(s), validation accuracy using average model is 0.9812 
After 3000 training step(s), validation accuracy using average model is 0.9828 
After 4000 training step(s), validation accuracy using average model is 0.9834 
After 5000 training step(s), validation accuracy using average model is 0.9834 
After 6000 training step(s), validation accuracy using average model is 0.9844 
After 7000 training step(s), validation accuracy using average model is 0.9834 
After 8000 training step(s), validation accuracy using average model is 0.9834 
After 9000 training step(s), validation ac