## neural network training with relu for MNIST

In [2]:
from tensorflow.examples.tutorials.mnist import input_data
import tensorflow as tf
import numpy as np

mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)

# Parameters
training_epochs = 15
batch_size = 100
display_step = 1
#learning_rate = 10
#learning_rate = 0.1
#learning_rate = 0.01
learning_rate = 0.001


# MNIST data input (img shape: 28*28)
n_input = 784

# MNIST total classes (0-9 digits)
n_classes = 10

X = tf.placeholder("float", [None, n_input])
Y = tf.placeholder("float", [None, n_classes] )

Extracting MNIST_data/train-images-idx3-ubyte.gz
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz


In [3]:
W1 = tf.Variable(tf.random_normal([n_input, 256]))
W2 = tf.Variable(tf.random_normal([256, 256]))
W3 = tf.Variable(tf.random_normal([256, n_classes]))

b1 = tf.Variable(tf.random_normal([256]))
b2 = tf.Variable(tf.random_normal([256]))
b3 = tf.Variable(tf.random_normal([n_classes]))


L1 = tf.nn.relu( tf.add(tf.matmul(X, W1), b1) )
L2 = tf.nn.relu( tf.add(tf.matmul(L1, W2), b2) )
hypothesis = tf.add( tf.matmul(L2, W3), b3 )

# softmax_cross_entropy_with_logits
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(hypothesis,Y))

# Adam optimizer
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)

init = tf.global_variables_initializer()

print "Training starts"

with tf.Session() as sess:
    sess.run(init)
    
    # Training cycle
    for epoch in range(training_epochs):
        avg_cost = 0.0
        total_batch = int(mnist.train.num_examples/batch_size)
        
        # Loop over all branches
        for i in range(total_batch):
            batch_xs, batch_ys = mnist.train.next_batch(batch_size)
            sess.run( optimizer, feed_dict={X: batch_xs, Y: batch_ys} )
            avg_cost += sess.run( cost, feed_dict={X: batch_xs, Y: batch_ys} ) / total_batch
            
        if epoch % display_step == 0:
            print "Epoch:", '%04d'%(epoch+1), "cost=","{:.9f}".format(avg_cost)
    
    print "Optimization finished."
    
    # Test model.
    correct_prediction = tf.equal(tf.argmax(hypothesis,1), tf.argmax(Y,1))
    
    # Calculate accuracy
    accuracy = tf.reduce_mean( tf.cast(correct_prediction,"float") )
    print "Accuracy:", accuracy.eval({X: mnist.test.images, Y: mnist.test.labels })

Training starts
Epoch: 0001 cost= 224.389873910
Epoch: 0002 cost= 43.531463122
Epoch: 0003 cost= 27.328811730
Epoch: 0004 cost= 19.104866968
Epoch: 0005 cost= 13.628129606
Epoch: 0006 cost= 10.096757122
Epoch: 0007 cost= 7.541504701
Epoch: 0008 cost= 5.535305208
Epoch: 0009 cost= 3.948767251
Epoch: 0010 cost= 2.931504011
Epoch: 0011 cost= 2.241774844
Epoch: 0012 cost= 1.617383243
Epoch: 0013 cost= 1.192351140
Epoch: 0014 cost= 0.843807246
Epoch: 0015 cost= 0.654182307
Optimization finished.
Accuracy: 0.9422


## neural network training with relu and xavier initializer for MNIST

In [6]:
import math

def xavier_init(n_inputs, n_outputs, uniform=True):
    """Set the parameter initialization using the method described.
    This method is designed to keep the scale of the gradients roughly the same
    in all layers.
    Xavier Glorot and Yoshua Bengio (2010):
        Understanding the difficulty of training deep feedforward neural
        networks. International conference on artificial intelligence and
        statistics.
    Args:
        n_inputs: The number of input nodes into each output.
        n_outputs: The number of output nodes for each input.
        uniform: If true use a uniform distribution, otherwise use a normal.
    Returns:
        An initializer."""
    if uniform:
        # 6 was used in the paper.
        init_range = math.sqrt(6.0 / (n_inputs + n_outputs))
        return tf.random_uniform_initializer(-init_range, init_range)
    else:
        # 3 gives us approximately the same limits as above since this repicks
        # values greater than 2 standard deviations from the mean.
        stddev = math.sqrt(3.0 / (n_inputs + n_outputs))
        return tf.truncated_normal_initializer(stddev=stddev)

In [7]:
W1 = tf.get_variable("W1", shape=[n_input, 256], initializer=xavier_init(n_input, 256)) 
W2 = tf.get_variable("W2", shape=[256, 256], initializer=xavier_init(256, 256)) 
W3 = tf.get_variable("W3", shape=[256, n_classes], initializer=xavier_init(256, n_classes)) 

b1 = tf.Variable(tf.random_normal([256]))
b2 = tf.Variable(tf.random_normal([256]))
b3 = tf.Variable(tf.random_normal([n_classes]))

L1 = tf.nn.relu( tf.add(tf.matmul(X, W1), b1) )
L2 = tf.nn.relu( tf.add(tf.matmul(L1,W2), b2) )
hypothesis = tf.add( tf.matmul(L2, W3), b3 )

# softmax_cross_entropy_with_logits
cost = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(hypothesis, Y) )

# Adam optimizer
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost) 

init = tf.global_variables_initializer()

print "Training starts"

with tf.Session() as sess:
    sess.run(init)
    
    for epoch in range(training_epochs):
        avg_cost = 0.0
        total_batch = int(mnist.train.num_examples/batch_size)
        
        for i in range(total_batch):
            batch_xs, batch_ys = mnist.train.next_batch(batch_size)
            sess.run(optimizer, feed_dict={X: batch_xs, Y: batch_ys} )
            avg_cost += sess.run( cost, feed_dict={X:batch_xs, Y:batch_ys} ) / total_batch
            
        if epoch % display_step == 0:
            print "Epoch:", '%04d'%(epoch+1), "cost=","{:.9f}".format(avg_cost)
    
    print "Optimization finished."
    
    # Test model
    correct_prediction = tf.equal( tf.argmax(hypothesis, 1), tf.argmax(Y, 1) )
    
    # Calculate accuracy
    accuracy = tf.reduce_mean( tf.cast(correct_prediction,"float") )
    print "Accuracy:", accuracy.eval( {X: mnist.test.images, Y: mnist.test.labels} )

Training starts
Epoch: 0001 cost= 0.281054726
Epoch: 0002 cost= 0.099756627
Epoch: 0003 cost= 0.063108709
Epoch: 0004 cost= 0.041863447
Epoch: 0005 cost= 0.029057615
Epoch: 0006 cost= 0.020698789
Epoch: 0007 cost= 0.015488698
Epoch: 0008 cost= 0.011984326
Epoch: 0009 cost= 0.008722027
Epoch: 0010 cost= 0.006956365
Epoch: 0011 cost= 0.006103068
Epoch: 0012 cost= 0.004242062
Epoch: 0013 cost= 0.004166286
Epoch: 0014 cost= 0.004127756
Epoch: 0015 cost= 0.003108635
Optimization finished.
Accuracy: 0.9785


## neural network with relu, xavier initializer and dropout with MNIST

In [9]:

# dropout
dropout_rate = tf.placeholder("float")
_L1 = tf.nn.relu( tf.add(tf.matmul(X, W1), b1) )
L1 = tf.nn.dropout(_L1, dropout_rate)
_L2 = tf.nn.relu( tf.add(tf.matmul(L1,W2),b2) )
L2 = tf.nn.dropout(_L2, dropout_rate)
hypothesis = tf.add( tf.matmul(L2, W3), b3 )


# softmax_cross_entropy_with_logits
cost = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(hypothesis, Y) )

# Adam optimizer
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost) 

init = tf.global_variables_initializer()

print "Training starts"

with tf.Session() as sess:
    sess.run(init)
    
    for epoch in range(training_epochs):
        avg_cost = 0.0
        total_batch = int(mnist.train.num_examples/batch_size)
        
        for i in range(total_batch):
            batch_xs, batch_ys = mnist.train.next_batch(batch_size)
            sess.run(optimizer, feed_dict={X: batch_xs, Y: batch_ys, dropout_rate: 0.7} )
            avg_cost += sess.run(cost, feed_dict={X: batch_xs, Y: batch_ys, dropout_rate: 1.0}) / total_batch
            
        if epoch % display_step == 0:
            print "Epoch:", '%04d' % (epoch + 1), "cost=","{:.9f}".format(avg_cost)
    
    print "Optimization finished."
    
    # Test model
    correct_prediction = tf.equal( tf.argmax(hypothesis, 1), tf.argmax(Y, 1) )
    
    # Calculate accuracy
    accuracy = tf.reduce_mean( tf.cast(correct_prediction,"float") )
    print "Accuracy:", accuracy.eval({X: mnist.test.images, Y: mnist.test.labels, dropout_rate:1.0})

Training starts
Epoch: 0001 cost= 0.332085077
Epoch: 0002 cost= 0.122274912
Epoch: 0003 cost= 0.084446704
Epoch: 0004 cost= 0.063642124
Epoch: 0005 cost= 0.049384801
Epoch: 0006 cost= 0.040338699
Epoch: 0007 cost= 0.033810767
Epoch: 0008 cost= 0.027732727
Epoch: 0009 cost= 0.024717540
Epoch: 0010 cost= 0.021094276
Epoch: 0011 cost= 0.018501582
Epoch: 0012 cost= 0.016414222
Epoch: 0013 cost= 0.014675020
Epoch: 0014 cost= 0.013009646
Epoch: 0015 cost= 0.011495564
Optimization finished.
Accuracy: 0.9804
