# Multi-layer perceptron

In [1]:
import tensorflow as tf
import numpy as np
from matplotlib import pyplot as plt
%matplotlib inline
# Import MINST data
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets('data/', one_hot=True)

Extracting data/train-images-idx3-ubyte.gz
Extracting data/train-labels-idx1-ubyte.gz
Extracting data/t10k-images-idx3-ubyte.gz
Extracting data/t10k-labels-idx1-ubyte.gz


---

## Define model

In [2]:
# parameters
learning_rate = 0.001
training_epochs = 15
batch_size = 100
display_step = 1

In [3]:
# network parameters
n_hidden_1 = 256 # 1st layer number of features
n_hidden_2 = 256 # 2nd layer number of features
n_input = 784 # MNIST data input (img shape: 28*28)
n_classes = 10 # MNIST total classes (0-9 digits)

In [4]:
# graph input
x = tf.placeholder('float', [None, n_input])
y = tf.placeholder('float', [None, n_classes])

In [5]:
# variables to optimize
weights = {
    'h1': tf.Variable(tf.random_normal([n_input, n_hidden_1])),
    'h2': tf.Variable(tf.random_normal([n_hidden_1, n_hidden_2])),
    'out': tf.Variable(tf.random_normal([n_hidden_2, n_classes])) }
biases = {
    'b1': tf.Variable(tf.random_normal([n_hidden_1])),
    'b2': tf.Variable(tf.random_normal([n_hidden_2])),
    'out': tf.Variable(tf.random_normal([n_classes]))
}

In [6]:
# define model: write a function to do so
def mlp(x, weights, biases):
    layer_1 = tf.add( tf.matmul(x, weights['h1']), biases['b1'])
    layer_1 = tf.nn.relu(layer_1)
    layer_2 = tf.add( tf.matmul(layer_1, weights['h2']), biases['b2'])
    layer_2 = tf.nn.relu(layer_2)
    out_layer = tf.matmul(layer_2, weights['out']) + biases['out'] 
    '''
    in fact, tf.add and the operator + are exactly the same: 
    http://stackoverflow.com/questions/37900780/in-tensorflow-what-is-the-difference-between-tf-add-and-operator
    '''
    return out_layer

pred = mlp(x, weights, biases) # the model, pred is the logits, to add a softmax over

In [7]:
# cost function: same as logistic regression, use cross entropy
'''
here use Adam optimizer, which is better than the GD optimizer, see: 
http://stats.stackexchange.com/questions/184448/difference-between-gradientdescentoptimizer-and-adamoptimizer-tensorflow
'''
cost = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(pred, y) )
optimizer = tf.train.AdamOptimizer(learning_rate).minimize(cost)

In [8]:
# for evaluation
correct_pred = tf.equal( tf.argmax(pred, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, 'float'))

In [13]:
'''
==> need to define this init AFTER all ops are defined ! 
this function initialize the variables present in the graph **when it is called**
http://stackoverflow.com/questions/33788989/tensorflow-using-adam-optimizer
'''
init = tf.initialize_all_variables()

---

## Train and save model

In [14]:
MODEL_PATH = 'model/mlp.ckpt'
saver = tf.train.Saver(name='mlp')
print saver

<tensorflow.python.training.saver.Saver object at 0x7f70e81d2750>


In [15]:
with tf.Session() as sess:
    sess.run( init)
    for epoch in xrange(1, training_epochs+1):
        avg_cost = 0.0
        total_batch = int(mnist.train.num_examples / batch_size)
        for i in xrange(total_batch):
            xs, ys = mnist.train.next_batch(batch_size)
            _, c = sess.run([optimizer, cost], feed_dict={x:xs, y:ys})
            avg_cost += c/total_batch
        if epoch % display_step == 0: 
            print 'epoch %2d, cost = %.9f' %(epoch, avg_cost)
    print 'finish!'
    print 'accurracy = %.9f' % sess.run(accuracy, 
                                        {x:mnist.test.images, y:mnist.test.labels}) # or `accuracy.eval)()`
    
    # save the model using saver.save()
    save_path = saver.save(sess, MODEL_PATH)
    print 'model is saved at %s' % save_path

epoch  1, cost = 159.611814384
epoch  2, cost = 39.596413512
epoch  3, cost = 25.040420440
epoch  4, cost = 17.685205268
epoch  5, cost = 12.825376380
epoch  6, cost = 9.576980130
epoch  7, cost = 7.225939726
epoch  8, cost = 5.344362521
epoch  9, cost = 4.168451068
epoch 10, cost = 3.240559319
epoch 11, cost = 2.440037495
epoch 12, cost = 1.896532100
epoch 13, cost = 1.434195745
epoch 14, cost = 1.163079214
epoch 15, cost = 0.883589673
finish!
accurracy = 0.942499995
model is saved at model/mlp.ckpt


---

## Restore model

In [16]:
with tf.Session() as sess2: 
#     sess2.run(init) 
    '''The variables to
    restore do not have to have been initialized, as restoring is itself a way
    to initialize variables.'''
    saver.restore(sess2, save_path)
    print 'model loaded from file: %s' % save_path
    
    # resume training
    for epoch in xrange(5):
        avg_cost = 0
        total_batch = int(mnist.train.num_examples/batch_size)
        for i in xrange(total_batch):
            xs, ys = mnist.train.next_batch(batch_size)
            _,c = sess2.run([optimizer, cost], feed_dict = {x:xs, y:ys})
            avg_cost += c / total_batch
        if epoch % display_step == 0: 
            print 'epoch %2d, cost = %.9f' %(epoch, avg_cost)
    print 'second optimization finished !'
    print 'accurracy = %.9f' % sess2.run(accuracy, 
                                        {x:mnist.test.images, y:mnist.test.labels})

model loaded from file: model/mlp.ckpt
epoch  0, cost = 0.755212577
epoch  1, cost = 0.556708338
epoch  2, cost = 0.546690165
epoch  3, cost = 0.472998166
epoch  4, cost = 0.412938899
second optimization finished !
accurracy = 0.944500029
