In [1]:
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
from tensorflow.examples.tutorials.mnist import input_data
%matplotlib inline

from keras import backend as K

#Clear TF memory
cfg = K.tf.ConfigProto()
cfg.gpu_options.allow_growth = True
K.set_session(K.tf.Session(config=cfg))

mnist = input_data.read_data_sets('/Basic TF & thing/mnist/', one_hot=True)

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


Extracting /Basic TF & thing/mnist/train-images-idx3-ubyte.gz
Extracting /Basic TF & thing/mnist/train-labels-idx1-ubyte.gz
Extracting /Basic TF & thing/mnist/t10k-images-idx3-ubyte.gz
Extracting /Basic TF & thing/mnist/t10k-labels-idx1-ubyte.gz


# Xavier Init Function

In [2]:
def xavier_init(num_inputs, num_outputs, uniform=True):
    """ This method is designed to keep the scale of the gradients roughly the same in all layers """
    if uniform:
        #6 was used in paper
        init_range = tf.sqrt(6.0/(num_inputs + num_outputs))
        return tf.random_uniform_initializer(-init_range, init_range)
    else:
        #3 give us approximately the same limits as above since this repicks
        #values greater than 2 standard deviations from the mean
        stddev = tf.sqrt(3.0/(num_inputs + num_outputs))
        return tf.truncated_normal_initializer(stddev=stddev)

# Define model's parameters

In [3]:
#Params
learning_rate = 0.001
epochs = 50
batch_size = 100
display_step = 10

#Network Params
n_inputs = 784
n_hidden_1 = 256 #1st hidden layer num features
n_hidden_2 = 256 #2nd hidden layer num features
n_hidden_3 = 256 #3rd hidden layer num features
n_hidden_4 = 256 #4th hidden layer num features
n_classes = 10   #Digits(0-9)

#Placeholder for x and y
x = tf.placeholder(dtype=tf.float32, shape=[None, n_inputs])
y = tf.placeholder(dtype=tf.float32, shape=[None, n_classes])
dropout_keep_prob = tf.placeholder(dtype=tf.float32)

#Weights and Bias with Xavier Init
weights = {
    'h1': tf.get_variable(shape=[n_inputs, n_hidden_1], initializer=xavier_init(num_inputs=n_inputs, num_outputs=n_hidden_1), name='h1'),
    'h2': tf.get_variable(shape=[n_hidden_1, n_hidden_2], initializer=xavier_init(num_inputs=n_hidden_1, num_outputs=n_hidden_2), name='h2'),
    'h3': tf.get_variable(shape=[n_hidden_2, n_hidden_3], initializer=xavier_init(num_inputs=n_hidden_2, num_outputs=n_hidden_3), name='h3'),
    'h4': tf.get_variable(shape=[n_hidden_3, n_hidden_4], initializer=xavier_init(num_inputs=n_hidden_3, num_outputs=n_hidden_4), name='h4'),
    'out': tf.get_variable(shape=[n_hidden_4, n_classes], initializer=xavier_init(num_inputs=n_hidden_4, num_outputs=n_classes), name='out')
}

biases = {
    'b1': tf.Variable(tf.random_normal([n_hidden_1])),
    'b2': tf.Variable(tf.random_normal([n_hidden_2])),
    'b3': tf.Variable(tf.random_normal([n_hidden_3])),
    'b4': tf.Variable(tf.random_normal([n_hidden_4])),
    'out': tf.Variable(tf.random_normal([n_classes]))
}

# Create model function

In [4]:
def multilayer_perceptron(x, weights, biases, keep_prob):
    layer_1 = tf.nn.dropout(tf.nn.relu(tf.add(tf.matmul(x, weights['h1']), biases['b1'])), keep_prob)
    layer_2 = tf.nn.dropout(tf.nn.relu(tf.add(tf.matmul(layer_1, weights['h2']), biases['b2'])), keep_prob)
    layer_3 = tf.nn.dropout(tf.nn.relu(tf.add(tf.matmul(layer_2, weights['h3']), biases['b3'])), keep_prob)
    layer_4 = tf.nn.dropout(tf.nn.relu(tf.add(tf.matmul(layer_3, weights['h4']), biases['b4'])), keep_prob)
    return (tf.matmul(layer_4, weights['out']) + biases['out'])

# Define other functions for the training

In [5]:
#Construct model
y_pred = multilayer_perceptron(x, weights, biases, dropout_keep_prob)

#Defind Loss and optimizer
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=y_pred, labels=y)) #Softmax loss
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(loss) # Adam optimizer

#Correct prediction & Accuracy function
correct_prediction = tf.equal(tf.argmax(y_pred,1), tf.argmax(y,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, dtype=tf.float32))

#Init Global varibale
init = tf.global_variables_initializer()

Instructions for updating:

Future major versions of TensorFlow will allow gradients to flow
into the labels input on backprop by default.

See tf.nn.softmax_cross_entropy_with_logits_v2.



# Training phase 

In [6]:
#Launch the graph
sess = tf.Session()
sess.run(init)

#Training 
for epoch in range(epochs):
    avg_loss = 0
    total_batch = int(mnist.train.num_examples/batch_size)
    #Loop over all batches
    for i in range(total_batch):
        batch_x, batch_y = mnist.train.next_batch(batch_size)
        #Fit training using batch data
        sess.run(optimizer, feed_dict={x: batch_x, y: batch_y, dropout_keep_prob: 0.7})
        
        #Compute avg loss
        avg_loss += sess.run(loss, feed_dict={x: batch_x, y: batch_y, dropout_keep_prob:1.0})/total_batch
    
    #Display logs per epoch steps
    if epoch % display_step == 0:
        print('Epoch: %03d/%03d, Loss: %.4f' % (epoch, epochs, avg_loss))
        train_accuracy = sess.run(accuracy, feed_dict={x: batch_x, y: batch_y, dropout_keep_prob: 1.0})
        test_accuracy = sess.run(accuracy, feed_dict={x: mnist.test.images, y: mnist.test.labels, dropout_keep_prob: 1.0})
        print('-->    Training accuracy: ', train_accuracy, '\n-->    Test accuracy: ', test_accuracy)

Epoch: 000/050, Loss: 0.3959
-->    Training accuracy:  0.97 
-->    Test accuracy:  0.9471
Epoch: 010/050, Loss: 0.0269
-->    Training accuracy:  0.98 
-->    Test accuracy:  0.9795
Epoch: 020/050, Loss: 0.0114
-->    Training accuracy:  1.0 
-->    Test accuracy:  0.9826
Epoch: 030/050, Loss: 0.0065
-->    Training accuracy:  1.0 
-->    Test accuracy:  0.9835
Epoch: 040/050, Loss: 0.0049
-->    Training accuracy:  1.0 
-->    Test accuracy:  0.9829
