In [1]:
#Rnn on mnist dataset

In [2]:
import tensorflow as tf  #For ML
from tensorflow.examples.tutorials.mnist import input_data#Import MNIST data
import numpy as np #For Mathematical Operations
import time #For calculating running time
import os  #For fetching from path

In [3]:
#Resetting tensorflow graph
tf.reset_default_graph()

In [4]:
#Setting Session
sess = tf.InteractiveSession()

In [5]:
#Load Dataset
MNIST = input_data.read_data_sets('MNIST_data/', one_hot = True)    #Load MNIST dataset

Extracting MNIST_data/train-images-idx3-ubyte.gz
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz


In [6]:
#Defining parameters for RNN
n_epochs = 4 #Number of epochs
seq_size = 28 #Input sequence size
n_seqs = 28  #Number of sequences per example
batch_size = 128 #size of batch
hidden_size = 100 #Hidden units size of RNN
n_classes = 10 #Number of output classes

In [7]:
#Defining Input Placeholders
with tf.variable_scope('placeholders') as scope:
    X = tf.placeholder(shape=[None,n_seqs,seq_size],dtype=tf.float32,
                      name='X_placeholder')
    Y = tf.placeholder(shape=(None,10),dtype=tf.float32,name='Y_placeholder')

In [8]:
#Defining RNN architecture
with tf.name_scope("rnn"):
    lstm_cell = tf.nn.rnn_cell.BasicLSTMCell(hidden_size)
    initial_state = lstm_cell.zero_state(batch_size, dtype=tf.float32)
    sess.run(tf.shape(initial_state))
    outputs, states = tf.nn.dynamic_rnn(lstm_cell,inputs=X,
                                        initial_state=initial_state,
                                        dtype=tf.float32,
                                       )

In [9]:
#Output layer weights and biases for softmax
with tf.variable_scope('output_layer_weights') as scope:
    #outputs shape is (batch_size,n_seqs,hidden_size)
    #For each batch we form a matrix of shape (batch_size,n_seqs*hidden_size)
    #Each example is a vector (1,n_seqs*hidden_size) given by n_inputs
    input_features = tf.reshape(outputs,(batch_size,-1))
    n_inputs = sess.run(tf.shape(input_features))[1]
    w = tf.Variable(tf.random_normal(shape=(n_inputs,n_classes)),
                   dtype=tf.float32,name='weights')
    b = tf.Variable(tf.random_normal(shape=(1,n_classes)),dtype=tf.float32,
                   name='biases')
    

In [10]:
#Defining Loss
with tf.name_scope('loss'):
    #Logits shape = (batch_size,n_classes)
    logits = tf.matmul(input_features,w) + b #scores
    #Softmax entropy for each score
    entropy = tf.nn.softmax_cross_entropy_with_logits(logits=logits,
                                                     labels=Y)
    loss = tf.reduce_mean(entropy)

In [11]:
#Optimizer for RNN with default learning_rate = 0.01 
optimizer = tf.train.AdamOptimizer().minimize(loss)

In [12]:
#Creating summaries for tensorboard
with tf.name_scope('summaries'):
    tf.summary.scalar('loss',loss)
    tf.summary.histogram('loss', loss)
    summary_op = tf.summary.merge_all()

In [13]:
#Initialize variables
sess.run(tf.global_variables_initializer())
#Create object to save/restore model
saver = tf.train.Saver()
#For tensorboard visualizations
writer = tf.summary.FileWriter('/graphs/rnnmnist', sess.graph)
#Check if checkpoint present 
ckpt = tf.train.get_checkpoint_state(os.path.dirname('/checkpoints/rnn_mnist/checkpoint'))
#Restore the latest checkpoint if present
if ckpt and ckpt.model_checkpoint_path:
    saver.restore(sess, ckpt.model_checkpoint_path)

#Start Training
n_batches = int(MNIST.train.num_examples/batch_size)
loss_sum = 0.0
start_time = time.time()
for i in range(n_epochs*n_batches):
    X_batch,Y_batch = MNIST.train.next_batch(batch_size)
    #X_batch of shape (batch_size,784)
    X_batch = np.reshape(X_batch,(batch_size,n_seqs,seq_size))
    opt, loss_batch, summary = sess.run([optimizer, loss, summary_op], feed_dict = {X: X_batch,Y: Y_batch})

    loss_sum += loss_batch
    #Adding data to tensorboard summary
    writer.add_summary(summary, global_step=i)
    if (i+1)%n_batches == 0:
        print( "Loss at step {}: {:5.1f} ".format(i+1, loss_sum))
        saver.save(sess, '/checkpoints/rnn_mnist/mnist-rnn', i)
        loss_sum = 0.0
print('Optimization finished!')
print('Time taken: {}'.format(time.time() - start_time))

#Start testing
n_batches = int(MNIST.test.num_examples/batch_size)
total_correct_preds = 0
for _ in range(n_batches):
    X_batch,Y_batch = MNIST.test.next_batch(batch_size)
    X_batch = np.reshape(X_batch,(batch_size,n_seqs,seq_size))
    opt, loss_batch, logits_batch = sess.run([optimizer, loss, logits], feed_dict = {X: X_batch,Y: Y_batch})
    preds = tf.nn.softmax(logits_batch)
    correct_preds = tf.equal(tf.argmax(preds,1), tf.argmax(Y_batch,1))
    accuracy = tf.reduce_sum(tf.cast(correct_preds, tf.float32))
    total_correct_preds += sess.run(accuracy)
print("Accuracy = {}". format(total_correct_preds/MNIST.test.num_examples))
        
    
writer.close()  
    

Loss at step 429: 220.5 
Loss at step 858:  75.5 
Loss at step 1287:  52.5 
Loss at step 1716:  38.0 
Optimization finished!
Time taken: 84.44583010673523
Accuracy = 0.9746
