# MNIST tensorflow low level api DNN

In [1]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt

## Construction phase, create tensorflow graph

In [2]:
n_inputs = 28*28 #MNIST
n_hidden1 = 300
n_hidden2 = 100
n_output = 10

In [3]:
X = tf.placeholder(dtype=np.float32, shape=(None,n_inputs), name="X")
y = tf.placeholder(dtype=np.int64, shape=(None), name="y")

Specify a "neuron_layer" function for reusability

In [4]:
def neuron_layer(X, n_neurons, name, activation=None):
    with tf.name_scope(name):
        n_inputs = int(X.get_shape()[1])
        stddev = 2/np.sqrt(n_inputs+n_neurons)
        init = tf.truncated_normal((n_inputs,n_neurons), stddev=stddev)
        W = tf.Variable(init, name="kernel")
        b = tf.Variable(tf.zeros([n_neurons]), name="bias")
        Z = tf.matmul(X,W) + b
        
        if activation is not None:
            return activation(Z)
        else:
            return Z

Create the Deep neural network

In [5]:
with tf.name_scope("dnn"):
    hidden1 = neuron_layer(X, n_hidden1, name="hidden1",
                           activation=tf.nn.relu)
    hidden2 = neuron_layer(hidden1, n_hidden2, name="hidden2",
                           activation=tf.nn.relu)
    logits = neuron_layer(hidden2, n_output, name="outputs")

Logits is the output of DNN. We will deal with softmax activation after for optimization reasons.

Tensorflow has already-made layers so we don't need to define our own as above (neuron_layer). Let's use tf instead.

In [6]:
with tf.name_scope("dnn"):
    hidden1 = tf.layers.dense(X, n_hidden1, name="hidden1",
                           activation=tf.nn.relu)
    hidden2 = tf.layers.dense(hidden1, n_hidden2, name="hidden2",
                           activation=tf.nn.relu)
    logits = tf.layers.dense(hidden2, n_output, name="outputs")

Specify the loss function

In [7]:
with tf.name_scope("loss"):
    xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y,
                                                             logits=logits)

    loss = tf.reduce_mean(xentropy,name="loss")

Sparse_softmax_cross_entropy_with_logits expects logits of shape (num_instances, num_classes) and of type float. Ground truth labels are of type int and are numbers [0,num_classes-1]

Equivalent to passing through softmax, then computing cross entropy. Takes care of edge cases like large logit values resulting in a log(0).

Specify the Optimizer on which to minimize loss

In [8]:
learning_rate = 0.01

with tf.name_scope("train"):
    optimizer = tf.train.GradientDescentOptimizer(learning_rate)
    training_op = optimizer.minimize(loss)
    


Specify how to evaluate the model. We'll choose the top logit value and see if it corresponds to the right label.

In [9]:
with tf.name_scope("eval"):
    correct = tf.nn.in_top_k(logits,y,1)
    accuracy = tf.reduce_mean(tf.cast(correct,tf.float32))
    #Reduce_mean calculates basically correctly predicted/all instances
    

In [10]:
init = tf.global_variables_initializer()
saver = tf.train.Saver()

## Execution Phase

In [11]:
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("/tmp/data/")

Instructions for updating:
Please use alternatives such as official/mnist/dataset.py from tensorflow/models.
Instructions for updating:
Please write your own downloading logic.
Instructions for updating:
Please use tf.data to implement this functionality.
Extracting /tmp/data/train-images-idx3-ubyte.gz
Instructions for updating:
Please use tf.data to implement this functionality.
Extracting /tmp/data/train-labels-idx1-ubyte.gz
Extracting /tmp/data/t10k-images-idx3-ubyte.gz
Extracting /tmp/data/t10k-labels-idx1-ubyte.gz
Instructions for updating:
Please use alternatives such as official/mnist/dataset.py from tensorflow/models.


In [12]:
n_epochs = 40
batch_size = 50

Train the model

In [13]:
with tf.Session() as sess:
    init.run()
    for epoch in range(n_epochs):
        for iteration in range(mnist.train.num_examples // batch_size):
            X_batch, y_batch = mnist.train.next_batch(batch_size)
            sess.run(training_op, feed_dict={X:X_batch, y:y_batch})
        
        #Accuracy on training set
        acc_train = accuracy.eval(feed_dict={X:X_batch, y:y_batch})
        #Accuracy on validation set
        acc_val = accuracy.eval(feed_dict={X:mnist.validation.images,
                                           y:mnist.validation.labels})
        
        print(epoch, "Train Acc:", acc_train, " Val Acc:", acc_val)
        
        save_path = saver.save(sess, "./my_model_final.ckpt")
        

0 Train Acc: 0.94  Val Acc: 0.9038
1 Train Acc: 0.84  Val Acc: 0.923
2 Train Acc: 0.96  Val Acc: 0.9354
3 Train Acc: 0.94  Val Acc: 0.9416
4 Train Acc: 0.94  Val Acc: 0.9482
5 Train Acc: 0.88  Val Acc: 0.9524
6 Train Acc: 0.94  Val Acc: 0.9558
7 Train Acc: 0.94  Val Acc: 0.9602
8 Train Acc: 0.96  Val Acc: 0.9628
9 Train Acc: 0.98  Val Acc: 0.9648
10 Train Acc: 1.0  Val Acc: 0.9664
11 Train Acc: 1.0  Val Acc: 0.9682
12 Train Acc: 0.98  Val Acc: 0.9688
13 Train Acc: 0.98  Val Acc: 0.9706
14 Train Acc: 0.98  Val Acc: 0.9722
15 Train Acc: 0.98  Val Acc: 0.9716
16 Train Acc: 0.96  Val Acc: 0.9736
17 Train Acc: 1.0  Val Acc: 0.9752
18 Train Acc: 0.98  Val Acc: 0.9732
19 Train Acc: 0.98  Val Acc: 0.9746
20 Train Acc: 1.0  Val Acc: 0.9756
21 Train Acc: 1.0  Val Acc: 0.9768
22 Train Acc: 0.98  Val Acc: 0.976
23 Train Acc: 1.0  Val Acc: 0.978
24 Train Acc: 1.0  Val Acc: 0.9776
25 Train Acc: 0.98  Val Acc: 0.976
26 Train Acc: 0.96  Val Acc: 0.9776
27 Train Acc: 1.0  Val Acc: 0.977
28 Train Acc: 1

# Using the Neural Network

In [14]:
with tf.Session() as sess:
    saver.restore(sess, "./my_model_final.ckpt")
    X_new_scaled = mnist.test.images
    Z = logits.eval(feed_dict={X:X_new_scaled})
    y_pred = np.argmax(Z, axis=1)

INFO:tensorflow:Restoring parameters from ./my_model_final.ckpt


In [15]:
y_pred

array([7, 2, 1, ..., 4, 5, 6])