In [1]:
from tensorflow.contrib.layers import fully_connected, batch_norm, dropout
from tensorflow.contrib.framework import arg_scope
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data

tf.set_random_seed(777)  # reproducibility

mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)

nb_classes = 10

X = tf.placeholder(tf.float32, [None, 784])
Y = tf.placeholder(tf.float32, [None, nb_classes])
train_mode = tf.placeholder(tf.bool, name='train_mode')


# Hyper-parameters
learning_rate = 0.01
training_epochs = 15
batch_size = 100
keep_prob = 0.7
hidden_output_size = 512
final_output_size = nb_classes

xavier_init = tf.contrib.layers.xavier_initializer()
bn_params = {
    'is_training': train_mode,
    'decay': 0.9,
    'updates_collections': None
}

#W = tf.Variable(tf.random_normal([784, nb_classes]))
#b = tf.Variable(tf.random_normal([nb_classes]))

## Multiple NN
#keep_prob = tf.placeholder(tf.float32)

with arg_scope([fully_connected], 
               activation_fn = tf.nn.relu,
               weights_initializer=xavier_init, 
               biases_initializer=None,
               normalizer_fn=batch_norm,
               normalizer_params=bn_params):
    H1 = fully_connected(X, hidden_output_size, scope="h1")
    H1_drop = dropout(H1, keep_prob, is_training=train_mode)
    H2 = fully_connected(H1_drop, hidden_output_size, scope="h2")
    H2_drop = dropout(H2, keep_prob, is_training=train_mode)
    H3 = fully_connected(H2_drop, hidden_output_size, scope="h3")
    H3_drop = dropout(H3, keep_prob, is_training=train_mode)
    H4 = fully_connected(H3_drop, hidden_output_size, scope="h4")
    H4_drop = dropout(H4, keep_prob, is_training=train_mode)
    H = fully_connected(H4_drop, final_output_size, activation_fn=None, scope="hypothesis")

#H = tf.matmul(X, W) + b
S = tf.nn.softmax(H)

loss_i = tf.nn.softmax_cross_entropy_with_logits(logits=H, labels=Y)
loss = tf.reduce_mean(loss_i) 

#optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.05).minimize(loss)
optimizer = tf.train.AdamOptimizer(learning_rate=0.05).minimize(loss)

is_correct = tf.equal(tf.argmax(S, 1), tf.argmax(Y, 1))
accuracy = tf.reduce_mean(tf.cast(is_correct, tf.float32))

# one epoch = one forward pass and backward pass of all training instance
# batch size = the number of training examples in one f/b pass

#with tf.Session() as sess:
sess = tf.Session()
sess.run(tf.global_variables_initializer())
    
for epoch in range(training_epochs):
    avg_cost = 0
    total_batch = int(mnist.train.num_examples / batch_size)

    for i in range(total_batch):
        batch_xs, batch_ys = mnist.train.next_batch(batch_size)
        feed_dict_train = {X: batch_xs, Y: batch_ys, train_mode: True}
        feed_dict_cost  = {X: batch_xs, Y: batch_ys, train_mode: False}
        opt = sess.run(optimizer, feed_dict=feed_dict_train)
        c   = sess.run(loss, feed_dict=feed_dict_cost)
        avg_cost += c / total_batch

    print('Epoch: ', '%04d' % (epoch + 1), "cost = ", '{:.9f}'.format(avg_cost))

print("Accuracy: ", accuracy.eval(session=sess, 
                                  feed_dict={X: mnist.test.images, Y: mnist.test.labels, train_mode: False}))

Extracting MNIST_data/train-images-idx3-ubyte.gz
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz
Epoch:  0001 cost =  1.671951395
Epoch:  0002 cost =  0.332086626
Epoch:  0003 cost =  0.321519279
Epoch:  0004 cost =  0.316064875
Epoch:  0005 cost =  0.315140411
Epoch:  0006 cost =  0.309960711
Epoch:  0007 cost =  0.308618021
Epoch:  0008 cost =  0.308396633
Epoch:  0009 cost =  0.305310422
Epoch:  0010 cost =  0.305289271
Epoch:  0011 cost =  0.305113808
Epoch:  0012 cost =  0.302933251
Epoch:  0013 cost =  0.304363705
Epoch:  0014 cost =  0.302565160
Epoch:  0015 cost =  0.303438830
Accuracy:  0.983
