In [61]:
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data

In [64]:
mnist = input_data.read_data_sets("./data/mnist", one_hot=True)

Extracting ./data/mnist/train-images-idx3-ubyte.gz
Extracting ./data/mnist/train-labels-idx1-ubyte.gz
Extracting ./data/mnist/t10k-images-idx3-ubyte.gz
Extracting ./data/mnist/t10k-labels-idx1-ubyte.gz


In [65]:
print("Training data size: %d." % mnist.train.num_examples)
print("Dev data size: %d." % mnist.validation.num_examples)
print("Testing data size: %d." % mnist.test.num_examples)

Training data size: 55000.
Dev data size: 5000.
Testing data size: 10000.


In [66]:
"""
Label 0-9 (One-Hot Encoding)
"""
print(mnist.train.labels[0])

"""
Images 28x28 (flatten)
"""
print(mnist.train.images[0].shape)

[ 0.  0.  0.  0.  0.  0.  0.  1.  0.  0.]
(784,)


In [101]:
"""
Start to train the neural network
"""
TRAINING_SIZE = mnist.train.num_examples

INPUT_NODE = mnist.train.images[0].shape[0]
OUTPUT_NODE = mnist.train.labels[0].shape[0]

FIRST_LAYER = 500
BATCH_SIZE = 100

START_LEARNING_RATE = 0.9
LEARNING_RATE_DECAY = 0.99
LAMBDA = 0.0001 # use to regularization

NUM_OF_ITERATION = 50000

MOVING_AVG_BETA = 0.99 # beta for moving avg


In [None]:
"""
Forward Propagation
"""

def forward_prop(input_tensor, weights, bias, avg_class=None):
    a1 = None
    a2 = None
    W1 = weights['W1']
    W2 = weights['W2']
    b1 = bias['b1']
    b2 = bias['b2']
    
    if avg_class == None:
        a1 = tf.nn.relu(tf.matmul(input_tensor, W1) + b1)
        a2 = tf.matmul(a1, W2) + b2
    else:
        a1 = tf.nn.relu(tf.matmul(input_tensor, avg_class.average(W1)) + avg_class.average(b1))
        a2 = tf.matmul(a1, avg_class.averate(W2)) + avg_class.average(b2)
 
    return a2

"""
Training without moving average optimize
"""
def train(mnist):
    iteration = tf.Variable(0, trainable=False)
   
    # tf.placeholder(type, shape, name)
    x = tf.placeholder(tf.float32, [None, INPUT_NODE], name='x-input')
    y_ = tf.placeholder(tf.float32, [None, OUTPUT_NODE], name='y-input')
    
    # initial with truncated normal distribution
    W1 = tf.Variable(tf.truncated_normal([INPUT_NODE, FIRST_LAYER], stddev=0.1))
    b1 = tf.Variable(tf.constant(0.1, shape=[FIRST_LAYER]))
    W2 = tf.Variable(tf.truncated_normal([FIRST_LAYER, OUTPUT_NODE], stddev=0.1))
    b2 = tf.Variable(tf.constant(0.1, shape=[OUTPUT_NODE]))
    
    weights = {
        'W1' : W1,
        'W2' : W2
    }
    
    bias = {
        'b1' : b1,
        'b2' : b2
    }
    
    # Forward-Prop    
    y = forward_prop(x, weights, bias)
   
    # Loss fucntion (cross-entropy: use to compute categorical labels) 
    # sparse_softmax_cross_entropy_with_logits: compute cross entropy after applying softmax
    cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=y, labels=tf.argmax(y_, 1))
    # Mean of batch loss
    cross_entropy_mean = tf.reduce_mean(cross_entropy)
    
    # Reglurization
    regularizer = tf.contrib.layers.l2_regularizer(LAMBDA)
    regularization = regularizer(W1) + regularizer(W2)
    
    # Compute Loss
    loss = cross_entropy_mean + regularization
    
    learning_rate = tf.train.exponential_decay(START_LEARNING_RATE, 
                                               iteration, 
                                               TRAINING_SIZE/BATCH_SIZE, 
                                               LEARNING_RATE_DECAY)
    
    # update weights
    optimizer = tf.train.GradientDescentOptimizer(learning_rate)
    train_step = optimizer.minimize(loss, global_step=iteration)
    
    # compare result and prediction 
    correct_predict = tf.equal(tf.argmax(y_, 1), tf.argmax(y, 1))
    accuracy = tf.reduce_mean(tf.cast(correct_predict, tf.float32))
  
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        
        dev_feed = { 
            x: mnist.validation.images,
            y_: mnist.validation.labels
        }
        
        test_feed = {
            x: mnist.test.images,
            y_: mnist.test.labels
        }
        
        for i in range(1, NUM_OF_ITERATION+1):
            # training set
            xs, ys = mnist.train.next_batch(BATCH_SIZE)
            _, predict_values, loss_value, step = sess.run([train_step, y, loss, iteration], feed_dict={x: xs, y_: ys})
            
            if i % 10000 == 0:
                # dev set
                validate_acc = sess.run(accuracy, feed_dict=dev_feed)
                
                print("After %d iterations, loss on training batch is %f, " 
                      "validation accuracy is %f." % (step, loss_value, validate_acc))
        
        # testing set
        test_acc = sess.run(accuracy, feed_dict=test_feed)
        print("After training, testing accuracy is %f." % test_acc)
        

# main 
def run(argv=None):
    mnist = input_data.read_data_sets('./data/mnist', one_hot=True)
    train(mnist)

run()

Extracting ./data/mnist/train-images-idx3-ubyte.gz
Extracting ./data/mnist/train-labels-idx1-ubyte.gz
Extracting ./data/mnist/t10k-images-idx3-ubyte.gz
Extracting ./data/mnist/t10k-labels-idx1-ubyte.gz
After 10000 iterations, loss on training batch is 0.062118, validation accuracy is 0.983000.
After 20000 iterations, loss on training batch is 0.039689, validation accuracy is 0.983600.
After 30000 iterations, loss on training batch is 0.031362, validation accuracy is 0.984800.
After 40000 iterations, loss on training batch is 0.030295, validation accuracy is 0.984400.
