In [None]:
import tensorflow as tf
import datetime

<img src="optimizer_graph.png" width="200px">

## Reading Data

We are reading MNIST data from http://yann.lecun.com/exdb/mnist/

In [None]:
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("/tmp/data/", one_hot=True)

## Setting hyper-parameters

In [None]:
# Hyper-parameters
learningrate = 0.001
nsteps = 100000
batchsize = 128
displaystep = 100

nhidden1 = 256
nhidden2 = 256 
ninput = 784
noutput = 10

## Defining X and Y as placeholders

In [None]:
X = tf.placeholder(tf.float32, [None, ninput])
Y = tf.placeholder(tf.float32, [None, noutput])

## Initializing Weights and Biases

In [None]:
weights = \
{
        'h1': tf.Variable(tf.random_normal([ninput, nhidden1])),
        'h2': tf.Variable(tf.random_normal([nhidden1, nhidden2])),
        'out': tf.Variable(tf.random_normal([nhidden2, noutput]))
}

biases = \
{
    'b1': tf.Variable(tf.random_normal([nhidden1])),
    'b2': tf.Variable(tf.random_normal([nhidden2])),
    'out': tf.Variable(tf.random_normal([noutput]))
}

## Defining model using sigmoid activation function

In [None]:
def multiperceptron(x):
    l1 = tf.nn.sigmoid(tf.add(tf.matmul(x, weights['h1']), biases['b1']))
    l2 = tf.nn.sigmoid(tf.add(tf.matmul(l1, weights['h2']), biases['b2']))
    outl = tf.nn.sigmoid(tf.add(tf.matmul(l2, weights['out']), biases['out']))
    return outl
    
model = multiperceptron(X)

## Define cost, optimizer and accuracy

In [None]:
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits = model, labels=Y))
optimizer = tf.train.AdamOptimizer(learning_rate=learningrate)
train_min = optimizer.minimize(loss)

correct_prediction = tf.equal(tf.argmax(model, 1), tf.argmax(Y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

tf.summary.histogram("weight_1",weights['h1'])
tf.summary.histogram("weight_2",weights['h2'])
tf.summary.scalar("loss", loss)
merged_summary_op = tf.summary.merge_all()
filename="./summary_log/run"+datetime.datetime.now().strftime("%Y-%m-%d-%H-%M-%S")
writer = tf.summary.FileWriter(filename, tf.get_default_graph())

init = tf.global_variables_initializer()

## Running the session 
This will run the graph and use all the tensors that were previously defined

In [None]:
with tf.Session() as sess:
    sess.run(init)
    
    for steps in range(1, nsteps+1):
        batch_x, batch_y=mnist.train.next_batch(batchsize)
        sess.run(train_min, feed_dict={X:batch_x, Y:batch_y})
        
        if steps % 100 == 0:
            losscalc, accuracycalc, merged_summary = \
                      sess.run([loss, accuracy, merged_summary_op], feed_dict={X:batch_x, Y:batch_y})
            print("Training Loss: %0.4f, Accuracy: %0.4f"%(losscalc, accuracycalc))
            writer.add_summary(merged_summary, steps)
        
writer.close()

### Loss:
In the figure below, you will notice that the loss decreases rapidly initially and then stabilizes. The accuracy is around 95%. 

<img src="results/loss_mnist.png" height="200px">

### Weights: 
If you compare the histogram of the various weights, you will see that the histogram of weight1 and weight2 both have changed indicating that both layers learnt the data in the process of training. 

<img src="results/weight1_mnist.png" height="200px">
<img src="results/weight2_mnist.png" height="200px">