In [1]:
import tensorflow as tf
import numpy.random as rnd

In [2]:
#CONSTRUCTION PHASE
n_steps = 28
n_inputs = 28 #28 features, 1 per pixel
n_neurons = 150
n_outputs = 10
n_layers = 3

learning_rate = 0.001

#create input and output placeholders
#X has shape [    t = 0              t = 1          t = 28 (n_steps)
#             [[x1,x2,...,x28],[x1,x2,...,x28],...,[x1,x2,...,x28]]  instance 1
#             [[x1,x2,...,x28],[x1,x2,...,x28],...,[x1,x2,...,x28]]  instance 2
#             [[x1,x2,...,x28],[x1,x2,...,x28],...,[x1,x2,...,x28]]  instance 3
#             ...
#             [[x1,x2,...,x28],[x1,x2,...,x28],...,[x1,x2,...,x28]]  total instances in batch (None | Undefined)
#            ]
X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])

#Y is an integer 0 - 9
y = tf.placeholder(tf.int32, [None])

#create the cells and the layers of the network
layers = [tf.contrib.rnn.BasicRNNCell(num_units=n_neurons) for layer in range(n_layers)]
multi_layer_cell = tf.contrib.rnn.MultiRNNCell(layers, state_is_tuple=True)
outputs, states = tf.nn.dynamic_rnn(multi_layer_cell, X, dtype=tf.float32)

#For classification, we only want the output activation at the last time step so we use 'outputs' 
#We transpose so that the time axis is first and use tf.gather() for selecting the last index, x28. 

#X has shape [    instance 1       instance 2      instance n (150)
#             [[x1,x2,...,x28],[x1,x2,...,x28],...,[x1,x2,...,x28]]  t = 0
#             [[x1,x2,...,x28],[x1,x2,...,x28],...,[x1,x2,...,x28]]  t = 1
#             [[x1,x2,...,x28],[x1,x2,...,x28],...,[x1,x2,...,x28]]  t = 2
#             ...
#             [[x1,x2,...,x28],[x1,x2,...,x28],...,[x1,x2,...,x28]]  t = 28
#            ]
outputs = tf.transpose(outputs, [1, 0, 2])

#gather the final values across t = 28
last = tf.gather(outputs, int(outputs.get_shape()[0]) - 1)

#Create Softmax layer with XEntropy 
logits = tf.layers.dense(last, n_outputs)
xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y,logits=logits)

#Compute the loss, mean xentropy score
loss = tf.reduce_mean(xentropy)

#Set the optimizer
#Here we use Adaptive Momement Estimation which keeps track of an exponentially decaying average of past gradients
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
training_op = optimizer.minimize(loss)

#Compute accuracy
#Does the highest logit correspond to the target class?
#in_top_k returns a 1D tensor full of boolean values
correct = tf.nn.in_top_k(logits, y, 1)
#We cast to floating point numbers and then take the mean to compute an accuracy score
accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))

#Adds an op to initialize all variables in the model
init = tf.global_variables_initializer()

  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


In [None]:
#IMPORT MNIST DATASET
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("tmp/data/")
X_test = mnist.test.images.reshape((-1, n_steps, n_inputs))
y_test = mnist.test.labels

Extracting tmp/data/train-images-idx3-ubyte.gz
Extracting tmp/data/train-labels-idx1-ubyte.gz
Extracting tmp/data/t10k-images-idx3-ubyte.gz
Extracting tmp/data/t10k-labels-idx1-ubyte.gz


In [None]:
#TRAINING PHASE
n_epochs = 100
batch_size = 150

#open a TensorFlow Session
with tf.Session() as sess:
    init.run()
    
    #Epoch is a single pass through the entire training set, followed by testing of the verification set.
    for epoch in range(n_epochs):
        #Number of batches, here we exhaust the training set
        for iteration in range(mnist.train.num_examples // batch_size):   

            #obtain batch of specified batch size
            X_batch, y_batch = mnist.train.next_batch(batch_size) 
            
            #reshape the array to size n_steps * n_inputs
            X_batch = X_batch.reshape((-1, n_steps, n_inputs)) 
            
            #feed in the batch
            sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
        
        #compute accuracy of RNN against Training Set and Test Set
        acc_train = accuracy.eval(feed_dict={X: X_batch, y: y_batch})
        acc_test = accuracy.eval(feed_dict={X: X_test, y: y_test})
        print(epoch, "Train accuracy:", acc_train, "Test accuracy:", acc_test)

0 Train accuracy: 0.966667 Test accuracy: 0.9499
1 Train accuracy: 0.986667 Test accuracy: 0.9585
2 Train accuracy: 0.96 Test accuracy: 0.965
3 Train accuracy: 0.986667 Test accuracy: 0.968
4 Train accuracy: 0.96 Test accuracy: 0.9752
5 Train accuracy: 0.973333 Test accuracy: 0.9727
6 Train accuracy: 0.973333 Test accuracy: 0.9716
7 Train accuracy: 0.96 Test accuracy: 0.9745
8 Train accuracy: 1.0 Test accuracy: 0.971
9 Train accuracy: 0.986667 Test accuracy: 0.9738
10 Train accuracy: 0.973333 Test accuracy: 0.9753
11 Train accuracy: 0.993333 Test accuracy: 0.9723
12 Train accuracy: 0.986667 Test accuracy: 0.9716
13 Train accuracy: 0.993333 Test accuracy: 0.9743
