In [2]:
"""
例子为MNIST，对手写图片进行分类。
《tensorflow实战》第五章实例
"""
import tensorflow as tf
import numpy as np
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets('MNIST_data', one_hot=True)

print(mnist.train.num_examples)
print(mnist.validation.num_examples)
print(mnist.test.num_examples)

Extracting MNIST_data\train-images-idx3-ubyte.gz
Extracting MNIST_data\train-labels-idx1-ubyte.gz
Extracting MNIST_data\t10k-images-idx3-ubyte.gz
Extracting MNIST_data\t10k-labels-idx1-ubyte.gz
55000
5000
10000


In [3]:
batch_size = 100

# 从集合中取出数据
xs, ys = mnist.train.next_batch(batch_size)
print(xs.shape)
print(ys.shape)

(100, 784)
(100, 10)


In [4]:
input_node = 784 # 图片像素
output_node = 10 # 10个数字

layer1_node = 500 # 隐藏层节点数

batch_size = 100
training_steps = 30000

learning_rate_base = 0.8 # 基础学习率
learning_rate_decay = 0.99 # 学习率衰减率
regularization_rate = 0.0001 # 描述模型复杂度的正则化项在损失函数中的系数
moving_average_decay = 0.99 # 滑动平均衰减率

In [5]:
# 模型封装
def inference(input_tensor, avg_class, weights1, biases1, weights2, biases2):
    # 如果没有提供滑动平均类，就直接使用参数
    if avg_class == None:
        layer1 = tf.nn.relu(tf.matmul(input_tensor, weights1) + biases1)
        return tf.matmul(layer1, weights2) + biases2
    else:
        layer1 = tf.nn.relu(
            tf.matmul(input_tensor, avg_class.average(weights1)) + 
            avg_class.average(biases1))
        return tf.matmul(
            layer1, avg_class.average(weights2)) + avg_class.average(biases2)

In [7]:
"""训练模型过程"""
x = tf.placeholder(tf.float32, [None, input_node], name='x_input')
y_ = tf.placeholder(tf.float32, [None, output_node], name='y_input')

# 隐藏层参数
weights1 = tf.Variable(
    tf.truncated_normal([input_node, layer1_node], stddev=0.1))
biases1 = tf.Variable(tf.constant(0.1, shape=[layer1_node]))
# 输出层参数
weights2 = tf.Variable(
    tf.truncated_normal([layer1_node, output_node], stddev=0.1))
biases2 = tf.Variable(tf.constant(0.1, shape=[output_node]))

# 计算前向传播结果，这里不使用滑动平均值
y = inference(x, None, weights1, biases1, weights2, biases2)

# 存储训练轮数变量，不可训练参数
global_step = tf.Variable(0, trainable=False)
# 初始化滑动平均类
variable_averages = tf.train.ExponentialMovingAverage(
    moving_average_decay, global_step)
# 在可训练的变量上使用滑动平均
variables_averages_op = variable_averages.apply(
    tf.trainable_variables())
# 使用滑动平均之后的前向传播
average_y = inference(
    x, variable_averages, weights1, biases1, weights2, biases2)

# 计算损失
cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(
    logits=y, labels=tf.argmax(y_, 1))
cross_entropy_mean = tf.reduce_mean(cross_entropy)

# L2正则损失
regularizer = tf.contrib.layers.l2_regularizer(regularization_rate)
regularization = regularizer(weights1) + regularizer(weights2)

# 总损失等于交叉熵和正则化和
loss = cross_entropy_mean + regularization

learning_rate = tf.train.exponential_decay(
    learning_rate_base,
    global_step,
    mnist.train.num_examples / batch_size, # 走完训练数据的迭代次数
    learning_rate_decay)

# 添加优化器
train_step = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss, global_step=global_step)

# 计算反向网络，更新网络参数和滑动平均值
with tf.control_dependencies([train_step, variables_averages_op]):
    train_op = tf.no_op(name='train')
    
# 评估模型
correct_prediction = tf.equal(tf.argmax(average_y, 1), tf.argmax(y_, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

In [10]:
# 计算图
with tf.Session() as sess:
    tf.global_variables_initializer().run()
    # 验证数据
    validate_feed = {
        x: mnist.validation.images,
        y_: mnist.validation.labels
    }
    # 测试数据
    test_feed = {
        x: mnist.test.images,
        y_: mnist.test.labels
    }
    # 开始迭代
    for i in range(training_steps):
        xs, ys = mnist.train.next_batch(batch_size)
        sess.run(train_op, feed_dict={ x: xs, y_: ys })
        if i%1000 == 0:
            validate_acc = sess.run(accuracy, feed_dict=validate_feed)
            print("After %d training step(s), validation accuracy "
                  "using average model is %g " % (i, validate_acc))

    # 训练结束，计算最终准确率
    test_acc = sess.run(accuracy, feed_dict=test_feed)
    print("After %d training step(s), test accuracy using average "
        "model is %g " % ( training_steps, test_acc))

After 0 training step(s), validation accuracy using average model is 0.0994 
After 1000 training step(s), validation accuracy using average model is 0.9778 
After 2000 training step(s), validation accuracy using average model is 0.9824 
After 3000 training step(s), validation accuracy using average model is 0.9826 
After 4000 training step(s), validation accuracy using average model is 0.9844 
After 5000 training step(s), validation accuracy using average model is 0.9842 
After 6000 training step(s), validation accuracy using average model is 0.9842 
After 7000 training step(s), validation accuracy using average model is 0.9836 
After 8000 training step(s), validation accuracy using average model is 0.984 
After 9000 training step(s), validation accuracy using average model is 0.9842 
After 10000 training step(s), validation accuracy using average model is 0.9842 
After 11000 training step(s), validation accuracy using average model is 0.9838 
After 12000 training step(s), validation a

In [None]:
saver = tf.train.Saver()

with tf.Session() as sess:
    tf.global_variables_initializer().run()
    saver.save(sess, "./model/model.ckpt")