In [1]:
import os
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data

INPUT_NODE = 784
OUTPUT_NODE = 10
LAYER1_NODE = 500

def get_weight_variable(shape, regularizer):
    weights = tf.get_variable("weights", shape,
                             initializer=tf.truncated_normal_initializer(stddev=0.1))
    if regularizer != None:
        tf.add_to_collection('losses', regularizer(weights))
    return weights

def inference(input_tensor, regularizer):
    with tf.variable_scope('layer1'):
        weights = get_weight_variable([INPUT_NODE, LAYER1_NODE], regularizer)
        biases = tf.get_variable('biases', [LAYER1_NODE], initializer=tf.constant_initializer(0.0))
        layer1 = tf.nn.relu(tf.matmul(input_tensor, weights) + biases)
        
    with tf.variable_scope('layer2'):
        weights = get_weight_variable([LAYER1_NODE, OUTPUT_NODE], regularizer)
        biases = tf.get_variable('biases', [OUTPUT_NODE], initializer=tf.constant_initializer(0.0))
        layer2 = tf.matmul(layer1, weights) + biases
        
    return layer2

BATCH_SIZE = 100
LEARNING_RATE_BASE = 0.8
LEARNING_RATE_DECAY = 0.99
REGULARIZATION_RATE = 0.0001
TRAINING_STEPS = 20000
MOVING_AVERAGE_DECAY = 0.99
MODEL_SAVE_PATH = 'model'
MODEL_NAME = 'model_921.ckpt'

def train(mnist):
    with tf.name_scope('input'):
        x = tf.placeholder(tf.float32, [None, INPUT_NODE], name='x-input')
        y_ = tf.placeholder(tf.float32, [None, OUTPUT_NODE], name='y-input')
    
    regularizer = tf.contrib.layers.l2_regularizer(REGULARIZATION_RATE)
    y = inference(x, regularizer)
    
    global_step = tf.Variable(0, trainable=False)
    
    with tf.name_scope('moving_average'):
        variable_averages = tf.train.ExponentialMovingAverage(MOVING_AVERAGE_DECAY, global_step)
        variables_average_op = variable_averages.apply(tf.trainable_variables())

    with tf.name_scope('loss_function'):
        cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=y, labels=tf.argmax(y_, 1))
        cross_entropy_mean = tf.reduce_mean(cross_entropy)
        loss = cross_entropy_mean + tf.add_n(tf.get_collection('losses'))
        
    with tf.name_scope('train_step'):
        learning_rate = tf.train.exponential_decay(LEARNING_RATE_BASE, global_step, mnist.train.num_examples / BATCH_SIZE,
                                                  LEARNING_RATE_DECAY)
        train_step = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss, global_step=global_step)
        
    with tf.control_dependencies([train_step, variables_average_op]):
        train_op = tf.no_op(name='train')
        
    writer = tf.summary.FileWriter('log', tf.get_default_graph())
    
    saver = tf.train.Saver()
    with tf.Session() as sess:
        tf.global_variables_initializer().run()
        
        for i in range(TRAINING_STEPS):
            xs, ys = mnist.train.next_batch(BATCH_SIZE)
            
            if i % 100 == 99:
                run_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE)
                run_metadata = tf.RunMetadata()
                _, loss_value, step = sess.run([train_op, loss, global_step], feed_dict={x: xs, y_: ys},
                                              options=run_options, run_metadata=run_metadata)
                print("After %d training steps, loss on training batch is %g." % (step, loss_value))
                writer.add_run_metadata(run_metadata, 'step%03d' % step)
                saver.save(sess, os.path.join(MODEL_SAVE_PATH, MODEL_NAME), global_step=global_step)
            else:
                _, loss_value, step = sess.run([train_op, loss, global_step], feed_dict={x: xs, y_: ys})
                
    writer.close()

if True:
    if os.path.exists("/Users"):
        mnist = input_data.read_data_sets("/Users/colinzuo/work/github/personal_study/"
                                          "tensorflow/book_caicloud/mnist/", one_hot=True)
    else:
        mnist = input_data.read_data_sets("D:\\work\\DataAnalysis\\study\\mnist", one_hot=True)
        
    train(mnist)

Extracting /Users/colinzuo/work/github/personal_study/tensorflow/book_caicloud/mnist/train-images-idx3-ubyte.gz
Extracting /Users/colinzuo/work/github/personal_study/tensorflow/book_caicloud/mnist/train-labels-idx1-ubyte.gz
Extracting /Users/colinzuo/work/github/personal_study/tensorflow/book_caicloud/mnist/t10k-images-idx3-ubyte.gz
Extracting /Users/colinzuo/work/github/personal_study/tensorflow/book_caicloud/mnist/t10k-labels-idx1-ubyte.gz
After 100 training steps, loss on training batch is 0.372965.
After 200 training steps, loss on training batch is 0.449551.
After 300 training steps, loss on training batch is 0.331021.
After 400 training steps, loss on training batch is 0.346335.
After 500 training steps, loss on training batch is 0.309875.
After 600 training steps, loss on training batch is 0.192811.
After 700 training steps, loss on training batch is 0.281512.
After 800 training steps, loss on training batch is 0.209793.
After 900 training steps, loss on training batch is 0.2437

After 12300 training steps, loss on training batch is 0.0660718.
After 12400 training steps, loss on training batch is 0.0678057.
After 12500 training steps, loss on training batch is 0.0564256.
After 12600 training steps, loss on training batch is 0.0564369.
After 12700 training steps, loss on training batch is 0.0554723.
After 12800 training steps, loss on training batch is 0.0542701.
After 12900 training steps, loss on training batch is 0.0625511.
After 13000 training steps, loss on training batch is 0.0569361.
After 13100 training steps, loss on training batch is 0.0608923.
After 13200 training steps, loss on training batch is 0.0559123.
After 13300 training steps, loss on training batch is 0.0595475.
After 13400 training steps, loss on training batch is 0.05229.
After 13500 training steps, loss on training batch is 0.060093.
After 13600 training steps, loss on training batch is 0.0518224.
After 13700 training steps, loss on training batch is 0.0542938.
After 13800 training steps, 