In [20]:
# 5.1 MNIST数据处理
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("../../datasets/MNIST_data/", one_hot=True)

Extracting ../../datasets/MNIST_data/train-images-idx3-ubyte.gz


Extracting ../../datasets/MNIST_data/train-labels-idx1-ubyte.gz
Extracting ../../datasets/MNIST_data/t10k-images-idx3-ubyte.gz
Extracting ../../datasets/MNIST_data/t10k-labels-idx1-ubyte.gz


In [21]:
print("Training data size: ", mnist.train.num_examples)
print("Validating data size: ", mnist.validation.num_examples)
print("Testing data size: ", mnist.test.num_examples)

Training data size:  55000
Validating data size:  5000
Testing data size:  10000


In [22]:
print("Example training data: ", mnist.train.images[0] )
print("Example training data label: ", mnist.train.labels[0])

Example training data:  [0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.       

In [23]:
batch_size = 100
xs, ys = mnist.train.next_batch(batch_size)    # 从train的集合中选取batch_size个训练数据。
print("X shape:", xs.shape)                       
print("Y shape:", ys.shape)

X shape: (100, 784)
Y shape: (100, 10)


In [24]:
# 5.2 神经网络模型训练及不同模型结果对比
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data

# MNIST 数据集相关的常数 
INPUT_NODE = 784                        # 输入的节点数，等于图片像素
OUTPUT_NODE = 10                        # 输出的节点数，等于类别的数目
                                        # 区分0~9的10个数字，输出层节点数是10

# 配置神经网络的参数
LAYER1_NODE = 500                       # 设置隐藏层的节点数500
BATCH_SIZE = 100                        # 一个训练batch中的训练数据个数，随机梯度下降
LEARNING_RATE_BASE = 0.8                # 基础的学习率
LEARNING_RATE_DECAY = 0.99              # 学习率的衰减率
REGULARIZATION_RATE = 0.0001            # 正则化系数
TRAINING_STEPS = 5000                  # 训练轮数
MOVING_AVERAGE_DECAY = 0.99             # 滑动平均衰减率
MODEL_SAVE_PATH = "MNIST_model/"
MODEL_NAME = "mnist_model"

In [25]:
def inference(input_tensor, avg_class, weights1, biases1, weights2, biases2):
    # 当没有提供滑动平均类时，直接使用参数当前的取值
    if avg_class ==None:
        # 计算隐藏层的前向传播结果，使用ReLU激活函数
        layer1 = tf.nn.relu(tf.matmul(input_tensor, weights1) + biases1)
        
        # 计算输出层的前夕传播结果，计算损失函数时一并计算softmax函数
        # 所以不需要加入激活函数
        return tf.matmul(layer1, weights2) + biases2
    
    else:
        # 首先使用avg_class.average计算变量的滑动平均值
        # 计算相应的神经网络前向传播结果
        layer1 = tf.nn.relu(tf.matmul(input_tensor, avg_class.average(weights1)) + avg_class.average(biases1))
        return tf.matmul(layer1, avg_class.average(weights2)) + avg_class.average(biases2)

In [26]:
# 训练模型的过程
def train(mnist):
    x = tf.placeholder(tf.float32, [None, INPUT_NODE], name = 'x-input')
    y_ = tf.placeholder(tf.float32, [None, OUTPUT_NODE], name = 'y-input')
    
    # 生成隐藏层的参数
    weights1 = tf.Variable(tf.truncated_normal([INPUT_NODE, LAYER1_NODE], stddev = 0.1))
    biases1 = tf.Variable(tf.constant(0.1, shape=[LAYER1_NODE]))
    
    # 生成输出层的参数
    weights2 = tf.Variable(tf.truncated_normal([LAYER1_NODE, OUTPUT_NODE], stddev = 0.1)) 
    biases2 = tf.Variable(tf.constant(0.1, shape = [OUTPUT_NODE]))
    
    # 计算在当前参数下神经网络前向传播的结果，不用参数滑动平均值
    y = inference(x, None, weights1, biases1, weights2, biases2)
    
    # 定义村相互训练轮数的变量
    global_step = tf.Variable(0, trainable = False)
    
    # 初始化滑动平均类，加快训练早起变量的更新速度
    variable_averages = tf.train.ExponentialMovingAverage(MOVING_AVERAGE_DECAY, global_step)
    variable_averages_op = variable_averages.apply(tf.trainable_variables())
    
    # 取值维护滑动平均值
    average_y = inference(x, variable_averages, weights1, biases1, weights2, biases2)
    
    # 计算交叉熵作为损失函数
    cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=y, labels=tf.argmax(y_, 1))
    cross_entropy_mean = tf.reduce_mean(cross_entropy)
    
    # 计算L2正则
    regularizer = tf.contrib.layers.l2_regularizer(REGULARIZATION_RATE)
    regularization = regularizer(weights1) + regularizer(weights2)
    loss = cross_entropy_mean + regularization
    
    # 设置指数衰减率
    learning_rate = tf.train.exponential_decay(LEARNING_RATE_BASE, global_step, mnist.train.num_examples / BATCH_SIZE, LEARNING_RATE_DECAY)
    
    # 使用tf.train.GradientDescentOptimizer优化损失函数 包含交叉熵和L2正则损失
    train_step = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss, global_step = global_step)

    # 每训练一次更新一次参数
    with tf.control_dependencies([train_step, variable_averages_op]):
        train_op = tf.no_op(name = 'train')
        
    # 验证前向传播的结果
    correct_prediction = tf.equal(tf.argmax(average_y, 1), tf.argmax(y_, 1))
    
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
    
    # 初始化会话
    with tf.Session() as sess:
        
        tf.global_variables_initializer().run()
        # 准备验证数据。
        validate_feed = {x:mnist.validation.images,
                         y_: mnist.validation.labels}
        # 准备测试数据
        test_feed = {x:mnist.test.images, y_: mnist.test.labels}
        
        # 迭代训练神经网络
        for i in range(TRAINING_STEPS):
            # 每1000轮输出一次在验证数据集上的测试结果
            if i % 1000 == 0:
                # 计算滑动平均模型在验证数据上的结果
                validate_acc = sess.run(accuracy, feed_dict=validate_feed)
                print("After %d training steps, validation accuracy using average model is %g " % (i, validate_acc))
            # 产生这一轮batch的训练数据
            xs, ys = mnist.train.next_batch(BATCH_SIZE)
            sess.run(train_op, feed_dict = {x: xs, y_: ys})
        
        # 训练结束后，检验最终正确率
        test_acc = sess.run(accuracy, feed_dict = test_feed)
        print("After %d training steps, test accuracy using average model is %g " % (TRAINING_STEPS, test_acc))
        # 5.2.2 使用验证数据集判断模型效果
        # 计算滑动平均模型在测试数据和验证数据上的正确率
        validate_acc = sess.run(accuracy, feed_dict = validate_feed)
        # test_acc = sess.run(accuracy, feed_dict = test_feed)
        
        #输出正确率信息
        print("After %d training steps, validation accuracy using average model is %g, test accuracy using average model is %g" % (i, validate_acc, test_acc) )

In [27]:
def main(argv = None):
    # mnist = input_data.read_data_sets("../../datasets/MNIST_data/",one_hot=True)
    train(mnist)
    
if __name__ == '__main__':
    tf.app.run()

After 0 training steps, validation accuracy using average model is 0.0974 


After 1000 training steps, validation accuracy using average model is 0.9762 


After 2000 training steps, validation accuracy using average model is 0.9818 


After 3000 training steps, validation accuracy using average model is 0.9816 


After 4000 training steps, validation accuracy using average model is 0.983 


After 5000 training steps, test accuracy using average model is 0.9819 
After 4999 training steps, validation accuracy using average model is 0.9834, test accuracy using average model is 0.9819


SystemExit: 

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)


In [37]:
# 不使用正则化
def train(mnist):
    x = tf.placeholder(tf.float32, [None, INPUT_NODE], name='x-input')
    y_ = tf.placeholder(tf.float32, [None, OUTPUT_NODE], name='y-input')
    # 生成隐藏层的参数。
    weights1 = tf.Variable(tf.truncated_normal([INPUT_NODE, LAYER1_NODE], stddev=0.1))
    biases1 = tf.Variable(tf.constant(0.1, shape=[LAYER1_NODE]))
    # 生成输出层的参数。
    weights2 = tf.Variable(tf.truncated_normal([LAYER1_NODE, OUTPUT_NODE], stddev=0.1))
    biases2 = tf.Variable(tf.constant(0.1, shape=[OUTPUT_NODE]))

    # 计算不含滑动平均类的前向传播结果
    y = inference(x, None, weights1, biases1, weights2, biases2)
    
    # 定义训练轮数及相关的滑动平均类 
    global_step = tf.Variable(0, trainable=False)
    variable_averages = tf.train.ExponentialMovingAverage(MOVING_AVERAGE_DECAY, global_step)
    variables_averages_op = variable_averages.apply(tf.trainable_variables())
    average_y = inference(x, variable_averages, weights1, biases1, weights2, biases2)
    
    # 计算交叉熵及其平均值
    cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=y, labels=tf.argmax(y_, 1))
    cross_entropy_mean = tf.reduce_mean(cross_entropy)
    
    # 损失函数的计算
    loss = cross_entropy_mean
    
    # 设置指数衰减的学习率。
    learning_rate = tf.train.exponential_decay(
        LEARNING_RATE_BASE,
        global_step,
        mnist.train.num_examples / BATCH_SIZE,
        LEARNING_RATE_DECAY,
        staircase=True)
    
    # 优化损失函数
    train_step = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss, global_step=global_step)
    
    # 反向传播更新参数和更新每一个参数的滑动平均值
    with tf.control_dependencies([train_step, variables_averages_op]):
        train_op = tf.no_op(name='train')

    # 计算正确率
    correct_prediction = tf.equal(tf.argmax(average_y, 1), tf.argmax(y_, 1))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
    
    # 初始化回话并开始训练过程。
    with tf.Session() as sess:
        tf.global_variables_initializer().run()
        validate_feed = {x: mnist.validation.images, y_: mnist.validation.labels}
        test_feed = {x: mnist.test.images, y_: mnist.test.labels} 
        
        # 循环的训练神经网络。
        for i in range(TRAINING_STEPS):
            if i % 1000 == 0:
                validate_acc = sess.run(accuracy, feed_dict=validate_feed)
                print("After %d training step(s), validation accuracy using average model is %g " % (i, validate_acc))
            
            xs,ys=mnist.train.next_batch(BATCH_SIZE)
            sess.run(train_op,feed_dict={x:xs,y_:ys})

        # 训练结束后，检验最终正确率
        test_acc = sess.run(accuracy, feed_dict = test_feed)
        print("After %d training steps, test accuracy using average model is %g " % (TRAINING_STEPS, test_acc))
        # 5.2.2 使用验证数据集判断模型效果
        # 计算滑动平均模型在测试数据和验证数据上的正确率
        validate_acc = sess.run(accuracy, feed_dict = validate_feed)
        # test_acc = sess.run(accuracy, feed_dict = test_feed)
        
        #输出正确率信息
        print("After %d training steps, validation accuracy using average model is %g, test accuracy using average model is %g" % (i, validate_acc, test_acc) )

def main(argv=None):
    #mnist = input_data.read_data_sets("../../../datasets/MNIST_data", one_hot=True)
    train(mnist)

if __name__=='__main__':
    main()

After 0 training step(s), validation accuracy using average model is 0.0788 


After 1000 training step(s), validation accuracy using average model is 0.9774 


After 2000 training step(s), validation accuracy using average model is 0.981 


After 3000 training step(s), validation accuracy using average model is 0.9836 


After 4000 training step(s), validation accuracy using average model is 0.985 


After 5000 training steps, test accuracy using average model is 0.9825 
After 4999 training steps, validation accuracy using average model is 0.9848, test accuracy using average model is 0.9825


In [30]:
# 不使用指数衰减的学习率
LEARNING_RATE = 0.1  

def train(mnist):
    x = tf.placeholder(tf.float32, [None, INPUT_NODE], name='x-input')
    y_ = tf.placeholder(tf.float32, [None, OUTPUT_NODE], name='y-input')
    # 生成隐藏层的参数。
    weights1 = tf.Variable(tf.truncated_normal([INPUT_NODE, LAYER1_NODE], stddev=0.1))
    biases1 = tf.Variable(tf.constant(0.1, shape=[LAYER1_NODE]))
    # 生成输出层的参数。
    weights2 = tf.Variable(tf.truncated_normal([LAYER1_NODE, OUTPUT_NODE], stddev=0.1))
    biases2 = tf.Variable(tf.constant(0.1, shape=[OUTPUT_NODE]))

    # 计算不含滑动平均类的前向传播结果
    y = inference(x, None, weights1, biases1, weights2, biases2)
    
    # 定义训练轮数及相关的滑动平均类 
    global_step = tf.Variable(0, trainable=False)
    variable_averages = tf.train.ExponentialMovingAverage(MOVING_AVERAGE_DECAY, global_step)
    variables_averages_op = variable_averages.apply(tf.trainable_variables())
    average_y = inference(x, variable_averages, weights1, biases1, weights2, biases2)
    
    # 计算交叉熵及其平均值
    cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=y, labels=tf.argmax(y_, 1))
    cross_entropy_mean = tf.reduce_mean(cross_entropy)
    
    # 损失函数的计算
    regularizer = tf.contrib.layers.l2_regularizer(REGULARIZATION_RATE)
    regularaztion = regularizer(weights1) + regularizer(weights2)
    loss = cross_entropy_mean + regularaztion
    
    # 优化损失函数
    train_step = tf.train.GradientDescentOptimizer(LEARNING_RATE).minimize(loss, global_step=global_step)
    
    # 反向传播更新参数和更新每一个参数的滑动平均值
    with tf.control_dependencies([train_step, variables_averages_op]):
        train_op = tf.no_op(name='train')

    # 计算正确率
    correct_prediction = tf.equal(tf.argmax(average_y, 1), tf.argmax(y_, 1))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
    
    # 初始化回话并开始训练过程。
    with tf.Session() as sess:
        tf.global_variables_initializer().run()
        validate_feed = {x: mnist.validation.images, y_: mnist.validation.labels}
        test_feed = {x: mnist.test.images, y_: mnist.test.labels} 
        
        # 循环的训练神经网络。
        for i in range(TRAINING_STEPS):
            if i % 1000 == 0:
                validate_acc = sess.run(accuracy, feed_dict=validate_feed)
                print("After %d training step(s), validation accuracy using average model is %g " % (i, validate_acc))
            
            xs,ys=mnist.train.next_batch(BATCH_SIZE)
            sess.run(train_op,feed_dict={x:xs,y_:ys})

        # 训练结束后，检验最终正确率
        test_acc = sess.run(accuracy, feed_dict = test_feed)
        print("After %d training steps, test accuracy using average model is %g " % (TRAINING_STEPS, test_acc))
        # 5.2.2 使用验证数据集判断模型效果
        # 计算滑动平均模型在测试数据和验证数据上的正确率
        validate_acc = sess.run(accuracy, feed_dict = validate_feed)
        # test_acc = sess.run(accuracy, feed_dict = test_feed)
        
        #输出正确率信息
        print("After %d training steps, validation accuracy using average model is %g, test accuracy using average model is %g" % (i, validate_acc, test_acc) )

def main(argv=None):
    # mnist = input_data.read_data_sets("../../../datasets/MNIST_data", one_hot=True)
    train(mnist)

if __name__=='__main__':
    main()

After 0 training step(s), validation accuracy using average model is 0.099 


After 1000 training step(s), validation accuracy using average model is 0.9492 


After 2000 training step(s), validation accuracy using average model is 0.9654 


After 3000 training step(s), validation accuracy using average model is 0.9706 


After 4000 training step(s), validation accuracy using average model is 0.9762 


After 5000 training steps, test accuracy using average model is 0.9739 
After 4999 training steps, validation accuracy using average model is 0.9762, test accuracy using average model is 0.9739


In [32]:
# 不使用激活函数
def train(mnist):
    x = tf.placeholder(tf.float32, [None, INPUT_NODE], name='x-input')
    y_ = tf.placeholder(tf.float32, [None, OUTPUT_NODE], name='y-input')
    # 生成隐藏层的参数。
    weights1 = tf.Variable(tf.truncated_normal([INPUT_NODE, LAYER1_NODE], stddev=0.1))
    biases1 = tf.Variable(tf.constant(0.1, shape=[LAYER1_NODE]))
    # 生成输出层的参数。
    weights2 = tf.Variable(tf.truncated_normal([LAYER1_NODE, OUTPUT_NODE], stddev=0.1))
    biases2 = tf.Variable(tf.constant(0.1, shape=[OUTPUT_NODE]))

    # 计算不含滑动平均类的前向传播结果
    y = inference(x, None, weights1, biases1, weights2, biases2)
    
    # 定义训练轮数及相关的滑动平均类 
    global_step = tf.Variable(0, trainable=False)
    variable_averages = tf.train.ExponentialMovingAverage(MOVING_AVERAGE_DECAY, global_step)
    variables_averages_op = variable_averages.apply(tf.trainable_variables())
    average_y = inference(x, variable_averages, weights1, biases1, weights2, biases2)
    
    # 计算交叉熵及其平均值
    cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=y, labels=tf.argmax(y_, 1))
    cross_entropy_mean = tf.reduce_mean(cross_entropy)
    
    # 损失函数的计算
    regularizer = tf.contrib.layers.l2_regularizer(REGULARIZATION_RATE)
    regularaztion = regularizer(weights1) + regularizer(weights2)
    loss = cross_entropy_mean + regularaztion
    
    # 设置指数衰减的学习率。
    learning_rate = tf.train.exponential_decay(
        LEARNING_RATE_BASE,
        global_step,
        mnist.train.num_examples / BATCH_SIZE,
        LEARNING_RATE_DECAY,
        staircase=True)
    
    # 优化损失函数
    train_step = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss, global_step=global_step)
    
    # 反向传播更新参数和更新每一个参数的滑动平均值
    with tf.control_dependencies([train_step, variables_averages_op]):
        train_op = tf.no_op(name='train')

    # 计算正确率
    correct_prediction = tf.equal(tf.argmax(average_y, 1), tf.argmax(y_, 1))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
    
    # 初始化回话并开始训练过程。
    with tf.Session() as sess:
        tf.global_variables_initializer().run()
        validate_feed = {x: mnist.validation.images, y_: mnist.validation.labels}
        test_feed = {x: mnist.test.images, y_: mnist.test.labels} 
        
        # 循环的训练神经网络。
        for i in range(TRAINING_STEPS):
            if i % 1000 == 0:
                validate_acc = sess.run(accuracy, feed_dict=validate_feed)
                print("After %d training step(s), validation accuracy using average model is %g " % (i, validate_acc))
            
            xs,ys=mnist.train.next_batch(BATCH_SIZE)
            sess.run(train_op,feed_dict={x:xs,y_:ys})

        # 训练结束后，检验最终正确率
        test_acc = sess.run(accuracy, feed_dict = test_feed)
        print("After %d training steps, test accuracy using average model is %g " % (TRAINING_STEPS, test_acc))
        # 5.2.2 使用验证数据集判断模型效果
        # 计算滑动平均模型在测试数据和验证数据上的正确率
        validate_acc = sess.run(accuracy, feed_dict = validate_feed)
        # test_acc = sess.run(accuracy, feed_dict = test_feed)
        
        #输出正确率信息
        print("After %d training steps, validation accuracy using average model is %g, test accuracy using average model is %g" % (i, validate_acc, test_acc) )

def main(argv=None):
    # mnist = input_data.read_data_sets("../../../datasets/MNIST_data", one_hot=True)
    train(mnist)

if __name__=='__main__':
    main()

After 0 training step(s), validation accuracy using average model is 0.1258 


After 1000 training step(s), validation accuracy using average model is 0.9774 


After 2000 training step(s), validation accuracy using average model is 0.9838 


After 3000 training step(s), validation accuracy using average model is 0.9826 


After 4000 training step(s), validation accuracy using average model is 0.983 


After 5000 training steps, test accuracy using average model is 0.9832 
After 4999 training steps, validation accuracy using average model is 0.9832, test accuracy using average model is 0.9832


In [35]:
# 不使用隐藏层
def inference_nohidenlayer(input_tensor, avg_class, weights1, biases1):
    # 使用滑动平均类
    if avg_class == None:
        layer1 = tf.nn.relu(tf.matmul(input_tensor, weights1) + biases1)
        return layer1

    else:
        # 不使用滑动平均类
        layer1 = tf.nn.relu(tf.matmul(input_tensor, avg_class.average(weights1)) + avg_class.average(biases1))
        return layer1

def train(mnist):
    x = tf.placeholder(tf.float32, [None, INPUT_NODE], name='x-input')
    y_ = tf.placeholder(tf.float32, [None, OUTPUT_NODE], name='y-input')
    
    # 生成输出层的参数。
    weights1 = tf.Variable(tf.truncated_normal([INPUT_NODE, OUTPUT_NODE], stddev=0.1))
    biases1 = tf.Variable(tf.constant(0.1, shape=[OUTPUT_NODE]))

    # 计算不含滑动平均类的前向传播结果
    y = inference_nohidenlayer(x, None, weights1, biases1)
    
    # 定义训练轮数及相关的滑动平均类 
    global_step = tf.Variable(0, trainable=False)
    variable_averages = tf.train.ExponentialMovingAverage(MOVING_AVERAGE_DECAY, global_step)
    variables_averages_op = variable_averages.apply(tf.trainable_variables())
    average_y = inference_nohidenlayer(x, variable_averages, weights1, biases1)
    
    # 计算交叉熵及其平均值
    cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=y, labels=tf.argmax(y_, 1))
    cross_entropy_mean = tf.reduce_mean(cross_entropy)
    
    # 损失函数的计算
    regularizer = tf.contrib.layers.l2_regularizer(REGULARIZATION_RATE)
    regularaztion = regularizer(weights1)
    loss = cross_entropy_mean + regularaztion
    
    # 设置指数衰减的学习率。
    learning_rate = tf.train.exponential_decay(
        LEARNING_RATE_BASE,
        global_step,
        mnist.train.num_examples / BATCH_SIZE,
        LEARNING_RATE_DECAY,
        staircase=True)
    
    # 优化损失函数
    train_step = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss, global_step=global_step)
    
    # 反向传播更新参数和更新每一个参数的滑动平均值
    with tf.control_dependencies([train_step, variables_averages_op]):
        train_op = tf.no_op(name='train')

    # 计算正确率
    correct_prediction = tf.equal(tf.argmax(average_y, 1), tf.argmax(y_, 1))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
    
    # 初始化回话并开始训练过程。
    with tf.Session() as sess:
        tf.global_variables_initializer().run()
        validate_feed = {x: mnist.validation.images, y_: mnist.validation.labels}
        test_feed = {x: mnist.test.images, y_: mnist.test.labels} 
        
        # 循环的训练神经网络。
        for i in range(TRAINING_STEPS):
            if i % 1000 == 0:
                validate_acc = sess.run(accuracy, feed_dict=validate_feed)
                print("After %d training step(s), validation accuracy using average model is %g " % (i, validate_acc))
            
            xs,ys=mnist.train.next_batch(BATCH_SIZE)
            sess.run(train_op,feed_dict={x:xs,y_:ys})

        # 训练结束后，检验最终正确率
        test_acc = sess.run(accuracy, feed_dict = test_feed)
        print("After %d training steps, test accuracy using average model is %g " % (TRAINING_STEPS, test_acc))
        # 5.2.2 使用验证数据集判断模型效果
        # 计算滑动平均模型在测试数据和验证数据上的正确率
        validate_acc = sess.run(accuracy, feed_dict = validate_feed)
        # test_acc = sess.run(accuracy, feed_dict = test_feed)
        
        #输出正确率信息
        print("After %d training steps, validation accuracy using average model is %g, test accuracy using average model is %g" % (i, validate_acc, test_acc) )

def main(argv=None):
    #mnist = input_data.read_data_sets("../../../datasets/MNIST_data", one_hot=True)
    train(mnist)

if __name__=='__main__':
    main()

After 0 training step(s), validation accuracy using average model is 0.0892 


After 1000 training step(s), validation accuracy using average model is 0.6552 


After 2000 training step(s), validation accuracy using average model is 0.6572 


After 3000 training step(s), validation accuracy using average model is 0.66 


After 4000 training step(s), validation accuracy using average model is 0.6584 


After 5000 training steps, test accuracy using average model is 0.6656 
After 4999 training steps, validation accuracy using average model is 0.6578, test accuracy using average model is 0.6656


In [36]:
# 不使用滑动平均类
def train(mnist):
    x = tf.placeholder(tf.float32, [None, INPUT_NODE], name='x-input')
    y_ = tf.placeholder(tf.float32, [None, OUTPUT_NODE], name='y-input')
    # 生成隐藏层的参数。
    weights1 = tf.Variable(tf.truncated_normal([INPUT_NODE, LAYER1_NODE], stddev=0.1))
    biases1 = tf.Variable(tf.constant(0.1, shape=[LAYER1_NODE]))
    # 生成输出层的参数。
    weights2 = tf.Variable(tf.truncated_normal([LAYER1_NODE, OUTPUT_NODE], stddev=0.1))
    biases2 = tf.Variable(tf.constant(0.1, shape=[OUTPUT_NODE]))

    # 计算不含滑动平均类的前向传播结果
    y = inference(x, None, weights1, biases1, weights2, biases2)
    
    # 定义训练轮数及相关的滑动平均类 
    global_step = tf.Variable(0, trainable=False)
    
    # 计算交叉熵及其平均值
    cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=y, labels=tf.argmax(y_, 1))
    cross_entropy_mean = tf.reduce_mean(cross_entropy)
    
    # 损失函数的计算
    regularizer = tf.contrib.layers.l2_regularizer(REGULARIZATION_RATE)
    regularaztion = regularizer(weights1) + regularizer(weights2)
    loss = cross_entropy_mean + regularaztion
    
    # 设置指数衰减的学习率。
    learning_rate = tf.train.exponential_decay(
        LEARNING_RATE_BASE,
        global_step,
        mnist.train.num_examples / BATCH_SIZE,
        LEARNING_RATE_DECAY,
        staircase=True)
    
    # 优化损失函数
    train_step = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss, global_step=global_step)
    
    # 反向传播更新参数
    with tf.control_dependencies([train_step]):
        train_op = tf.no_op(name='train')

    # 计算正确率
    correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
    
    # 初始化回话并开始训练过程。
    with tf.Session() as sess:
        tf.global_variables_initializer().run()
        validate_feed = {x: mnist.validation.images, y_: mnist.validation.labels}
        test_feed = {x: mnist.test.images, y_: mnist.test.labels} 
        
        # 循环的训练神经网络。
        for i in range(TRAINING_STEPS):
            if i % 1000 == 0:
                validate_acc = sess.run(accuracy, feed_dict=validate_feed)
                print("After %d training step(s), validation accuracy using average model is %g " % (i, validate_acc))
            
            xs,ys=mnist.train.next_batch(BATCH_SIZE)
            sess.run(train_op,feed_dict={x:xs,y_:ys})
        # 训练结束后，检验最终正确率
        test_acc = sess.run(accuracy, feed_dict = test_feed)
        print("After %d training steps, test accuracy using average model is %g " % (TRAINING_STEPS, test_acc))
        # 5.2.2 使用验证数据集判断模型效果
        # 计算滑动平均模型在测试数据和验证数据上的正确率
        validate_acc = sess.run(accuracy, feed_dict = validate_feed)
        # test_acc = sess.run(accuracy, feed_dict = test_feed)
        
        #输出正确率信息
        print("After %d training steps, validation accuracy using average model is %g, test accuracy using average model is %g" % (i, validate_acc, test_acc) )

def main(argv=None):
    #mnist = input_data.read_data_sets("../../../datasets/MNIST_data", one_hot=True)
    train(mnist)

if __name__=='__main__':
    main()

After 0 training step(s), validation accuracy using average model is 0.1228 


After 1000 training step(s), validation accuracy using average model is 0.9668 


After 2000 training step(s), validation accuracy using average model is 0.9774 


After 3000 training step(s), validation accuracy using average model is 0.9796 


After 4000 training step(s), validation accuracy using average model is 0.9798 


After 5000 training steps, test accuracy using average model is 0.9818 
After 4999 training steps, validation accuracy using average model is 0.98, test accuracy using average model is 0.9818


In [25]:
# 5.3 变量管理
# tf.get_variable() 与 tf.Variable()作用相同
import tensorflow as tf

p = tf.get_variable("p", shape=[1], initializer=tf.constant_initializer(1.0))

q = tf.Variable(tf.constant(1.0, shape =[1]), name="q")

with tf.Session() as sess:
    tf.global_variables_initializer().run()
    print(sess.run(p))
    print(sess.run(q))

[1.]
[1.]


In [30]:
# 在名字为"foo"的命名空间内创建名字为v的变量并初始化
with tf.variable_scope("foo"):
    v = tf.get_variable("v", [1], initializer = tf.constant_initializer(1.0))
    
with tf.variable_scope("foo", reuse=True):
    v1 = tf.get_variable("v", [1])
    print(v==v1)

ValueError: Variable foo/v already exists, disallowed. Did you mean to set reuse=True in VarScope? Originally defined at:

  File "C:\Users\adward\Anaconda3\lib\site-packages\tensorflow\python\framework\ops.py", line 1269, in __init__
    self._traceback = _extract_stack()
  File "C:\Users\adward\Anaconda3\lib\site-packages\tensorflow\python\framework\ops.py", line 2506, in create_op
    original_op=self._default_original_op, op_def=op_def)
  File "C:\Users\adward\Anaconda3\lib\site-packages\tensorflow\python\framework\op_def_library.py", line 767, in apply_op
    op_def=op_def)


In [34]:
# tf.variabel_scope()嵌套  
with tf.variable_scope("root"):
    # tf.get_variable_scope().resue获取上下文管理器中参数取值
    print(tf.get_variable_scope().reuse)
    
    with tf.variable_scope("foo", reuse=True):
        print(tf.get_variable_scope().reuse)
        
        with tf.variable_scope("bar"):
            print(tf.get_variable_scope().reuse)
            
    print(tf.get_variable_scope().reuse)

False
True
True
False


In [2]:
v1 = tf.get_variable("v", [1])
print(v1.name)                                   #v:0

with tf.variable_scope("foo"):
    v2 = tf.get_variable("v",[1])
    print(v2.name)                               #foo/v:0 
    
with tf.variable_scope("foo"):
    with tf.variable_scope("bar"):
        v3 = tf.get_variable("v", [1])
        print(v3.name)                           #foo/bar/v:0 
    
    v4 = tf.get_variable("v1",[1])
    print(v4.name)                               #foo/v1:0 
    
with tf.variable_scope("",reuse=True):
    v5 = tf.get_variable("foo/bar/v", [1])
    
    print(v5==v3)                                #True
    v6 = tf.get_variable("foo/v1", [1])
    print(v6==v4)                                #True

v:0
foo/v:0
foo/bar/v:0
foo/v1:0
True
True
