In [1]:
from __future__ import division, print_function, absolute_import

import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import norm

# Import MNIST data

from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets('../mnist', one_hot=True)

from keras import backend as K
#Clear TF memory
cfg = K.tf.ConfigProto()
cfg.gpu_options.allow_growth = True
K.set_session(K.tf.Session(config=cfg))

  from ._conv import register_converters as _register_converters


Extracting ../mnist\train-images-idx3-ubyte.gz
Extracting ../mnist\train-labels-idx1-ubyte.gz
Extracting ../mnist\t10k-images-idx3-ubyte.gz
Extracting ../mnist\t10k-labels-idx1-ubyte.gz


Using TensorFlow backend.


In [2]:
#Parameters
learning_rate = 0.01
training_epochs = 25
batch_size = 100
display_step = 5
logs_path = 'log/tensorboard_logs/'

#Network Parameters
n_hidden_1 = 256 #1st layer number of features
n_hidden_2 = 256 #2nd layer number of features
n_input = 784
n_classes = 10

#TF graph input
x = tf.placeholder(dtype=tf.float32, shape=[None, n_input], name='InputData')
y = tf.placeholder(dtype=tf.float32, shape=[None, n_classes], name='LabelData')

#Weights and Biases
weights = {
    'w1': tf.Variable(tf.random_normal(shape=[n_input, n_hidden_1]), name='W1'),
    'w2': tf.Variable(tf.random_normal(shape=[n_hidden_1, n_hidden_2]), name='W2'),
    'out': tf.Variable(tf.random_normal(shape=[n_hidden_2, n_classes]), name='W_out')
}

biases = {
    'b1': tf.Variable(tf.random_normal(shape=[n_hidden_1]), name='b1'),
    'b2': tf.Variable(tf.random_normal(shape=[n_hidden_2]), name='b2'),
    'out': tf.Variable(tf.random_normal(shape=[n_classes]), name='b_out')
}

In [3]:
def multilayer_perceptron(x, weights, biases):
    #Hidden layer 1 with relu activation
    layer_1 = tf.add(tf.matmul(x, weights['w1']), biases['b1'])
    layer_1 = tf.nn.relu(layer_1)
    #Create a summary to visualize the 1st layer relu activation
    tf.summary.histogram('relu1', layer_1)
    
    #Hidden layer 2 with relu activation
    layer_2 = tf.add(tf.matmul(layer_1, weights['w2']), biases['b2'])
    layer_2 = tf.nn.relu(layer_2)
    #Create a summary to visualize the 2nd layer relu activation
    tf.summary.histogram('relu2', layer_2)
    
    #Output layer
    out_layer = tf.add(tf.matmul(layer_2, weights['out']), biases['out'])
    return out_layer

In [4]:
#Encapsulating all the ops into scopes, making tensorboard's graph
#visualization more convenient
with tf.name_scope('Model'):
    #Build model
    y_pred = multilayer_perceptron(x, weights, biases)
    
with tf.name_scope('loss'):
    #Softmax Cross entropy(loss function)
    loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=y_pred, labels=y))
    
with tf.name_scope('SGD'):
    #Gradient descent
    optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
    #op to calculate every variable gradient
    grads = tf.gradients(loss, tf.trainable_variables())
    grads = list(zip(grads, tf.trainable_variables()))
    #Op to update all variables according to their gradient
    apply_grads = optimizer.apply_gradients(grads_and_vars=grads)
    
with tf.name_scope('accuracy'):
    #Accuracy
    acc = tf.equal(tf.argmax(y_pred, 1), tf.argmax(y, 1))
    acc = tf.reduce_mean(tf.cast(acc, dtype=tf.float32))

Instructions for updating:

Future major versions of TensorFlow will allow gradients to flow
into the labels input on backprop by default.

See tf.nn.softmax_cross_entropy_with_logits_v2.



In [5]:
#Init global variables
init = tf.global_variables_initializer()

#Create a summary to monitor loss tensor
tf.summary.scalar('loss', loss)
#Create a summary to monitor accuracy tensor
tf.summary.scalar('accuracy', acc)
#Create summaries to visalize weights
for var in tf.trainable_variables():
    tf.summary.histogram(var.name, var)
#Summarize all gradients
for grad, var in grads:
    tf.summary.histogram(var.name + '/gradient', grad)

#Merge all summaries into a single op
merged_summary_op = tf.summary.merge_all()

INFO:tensorflow:Summary name W1:0 is illegal; using W1_0 instead.
INFO:tensorflow:Summary name W2:0 is illegal; using W2_0 instead.
INFO:tensorflow:Summary name W_out:0 is illegal; using W_out_0 instead.
INFO:tensorflow:Summary name b1:0 is illegal; using b1_0 instead.
INFO:tensorflow:Summary name b2:0 is illegal; using b2_0 instead.
INFO:tensorflow:Summary name b_out:0 is illegal; using b_out_0 instead.
INFO:tensorflow:Summary name W1:0/gradient is illegal; using W1_0/gradient instead.
INFO:tensorflow:Summary name W2:0/gradient is illegal; using W2_0/gradient instead.
INFO:tensorflow:Summary name W_out:0/gradient is illegal; using W_out_0/gradient instead.
INFO:tensorflow:Summary name b1:0/gradient is illegal; using b1_0/gradient instead.
INFO:tensorflow:Summary name b2:0/gradient is illegal; using b2_0/gradient instead.
INFO:tensorflow:Summary name b_out:0/gradient is illegal; using b_out_0/gradient instead.


In [6]:
#Start training
with tf.Session() as sess:
    #Run the init
    sess.run(init)
    
    #Op to write logs to tensorboard
    summary_writer = tf.summary.FileWriter(logdir=logs_path, graph=tf.get_default_graph())
    
    #Training
    for epoch in range(training_epochs):
        avg_loss = 0
        total_batch = int(mnist.train.num_examples/batch_size)
        
        #Loop over all batches
        for i in range(total_batch):
            batch_x, batch_y = mnist.train.next_batch(batch_size)
            #Run optimization, loss op, and summary nodes
            _, l, summary = sess.run([apply_grads, loss, merged_summary_op], feed_dict={x: batch_x, y: batch_y})
            
            #Write logs at every iteration
            summary_writer.add_summary(summary=summary, global_step=epoch*total_batch+i)
            
            #Compute avg loss
            avg_loss = l / total_batch
            
        #display logs per epoch step
        if (epoch+1) % display_step == 0:
            print('Epoch: %03d, loss= %.4f' % (epoch+1, avg_loss))
            
    print('Training completed!!!')
    
    #Test model
    #Calculate accuracy
    print('Accuracy: ', acc.eval(feed_dict={x: mnist.test.images, y: mnist.test.labels}))
    
    print('Run the command line: \n --> tensorboard --logdir=log/tensorboard_logs/')

Epoch: 005, loss= 0.0021
Epoch: 010, loss= 0.0011
Epoch: 015, loss= 0.0015
Epoch: 020, loss= 0.0000
Epoch: 025, loss= 0.0001
Training completed!!!
Accuracy:  0.9253
Run the command line: 
 --> tensorboard --logdir=log/tensorboard_logs/
