In [1]:
import numpy as np
import tensorflow as tf
from tensorflow.contrib import rnn
import matplotlib.pyplot as plt
from tensorflow.examples.tutorials.mnist import input_data
%matplotlib inline

from keras import backend as K

#Clear TF memory
cfg = K.tf.ConfigProto()
cfg.gpu_options.allow_growth = True
K.set_session(K.tf.Session(config=cfg))

mnist = input_data.read_data_sets('../Basic TF & things/mnist', one_hot=True)

  from ._conv import register_converters as _register_converters


AttributeError: module 'tensorflow.python.ops.rnn_cell_impl' has no attribute '_Linear'

# Simple RNN consists of
## 1. One input layer which converts a 28 dimensional input to an 128 dimensional hidden layer
## 2. One intermediate RNN (LSTM)
## 3. One output layer which converts an 128 dimensional output of the LSTM to 10 dimensional output indicationg a class label

![title](images/rnn_input.jpg)

# Construct a RNN

In [2]:
#Params
n_classes = mnist.train.labels.shape[1]
learning_rate = 0.001
epochs = 20
batch_size = 128
display_step = 2

#Netword params
dim_input = 28
dim_hidden = 128
dim_output = n_classes
n_steps = 28

#Weights & Biases
weights = {
    'hidden': tf.Variable(tf.random_normal(shape=[dim_input, dim_hidden])),
    'out': tf.Variable(tf.random_normal(shape=[dim_hidden, dim_output]))
}

biases = {
    'hidden': tf.Variable(tf.random_normal(shape=[dim_hidden])),
    'out': tf.Variable(tf.random_normal(shape=[dim_output]))
}

#Placeholders
x = tf.placeholder(dtype=tf.float32, shape=[None, n_steps, dim_input])
y = tf.placeholder(dtype=tf.float32, shape=[None, dim_output])
istate = tf.placeholder(dtype=tf.float32, shape=[None, 2*dim_hidden])         #state & cell => 2 * n_hidden

## RNN function

In [3]:
def RNN(x, istate, weights, biases, nsteps, name):
    # Permute input from [batch_size, nsteps, dim_input] -> [nsteps, batch_size, dim_input]
    x = tf.transpose(a=x, perm=[1,0,2])
    
    # Reshape input to [nsteps * batch_size, dim_input]
    x = tf.reshape(tensor=x, shape=[-1, dim_input])
    
    # Input layer -> Hidden layer
    H = tf.matmul(x, weights['hidden']) + biases['hidden']
    
    # Split data to 'nsteps' chunks. An i-th chunk indicates i-th batch data
#     H_split = tf.split(0, nsteps, H)
    H_split = tf.split(value=H, num_or_size_splits=nsteps, axis=0)
    
    # Get LSTM's final output (LSTM_O) and state (LSTM_S)
    #   Both LSTM_O & LSTM_S consist of batchsize elements
    #   Only LSTM_O will be used to predict the output
    
    with tf.variable_scope(name):
        lstm_cell = rnn.BasicLSTMCell(dim_hidden, forget_bias=1.0)
        LSTM_O, LSTM_S = rnn.static_rnn(lstm_cell, H_split, dtype=tf.float32)
        
    # Output
    O = tf.matmul(LSTM_O[-1], weights['out']) + biases['out']
    
    #Return
    return {
        'x': x,
        'H': H,
        'H_split': H_split,
        'LSTM_O': LSTM_O,
        'LSTM_S': LSTM_S,
        'O': O
    }

![title](images/rnn_mnist_look.jpg)

## Define other functions

In [4]:
my_rnn = RNN(x, istate, weights, biases, n_steps, 'basic')
y_pred = my_rnn['O']

loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=y_pred, labels=y))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(loss)

correct_pred = tf.equal(tf.argmax(y_pred, 1), tf.argmax(y,1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, dtype=tf.float32))

init = tf.global_variables_initializer()

Instructions for updating:

Future major versions of TensorFlow will allow gradients to flow
into the labels input on backprop by default.

See tf.nn.softmax_cross_entropy_with_logits_v2.



# Training

In [5]:
sess = tf.Session()
sess.run(init)
# summary_writer = tf.train.SummaryWriter('/tmp/tensorflow_logs', graph=sess.graph)

for epoch in range(epochs):
    avg_loss = 0
    total_batch = int(mnist.train.num_examples/batch_size)
    
    #Loop over all batches
    for i in range(total_batch):
        batch_x, batch_y = mnist.train.next_batch(batch_size)
        #Reshape the batch_x to use as input training RNN
        batch_x = batch_x.reshape((batch_size, n_steps, dim_input))
        
        #Fit training using batch data
        sess.run(optimizer, feed_dict={x: batch_x, y: batch_y, istate:np.zeros((batch_size, 2*dim_hidden))})
        
        #Compute avg_loss
        avg_loss += sess.run(loss, feed_dict={x: batch_x, y: batch_y, istate: np.zeros((batch_size, 2*dim_hidden))}) / total_batch
        
    #Display logs per epoch step
    if epoch % display_step == 0:
        print('Epoch %03d/%03d,    Loss: %.4f' % (epoch, epochs, avg_loss))
        train_accuracy = sess.run(accuracy, feed_dict={x: batch_x, y: batch_y, istate: np.zeros((batch_size, 2*dim_hidden))})
        test_images = mnist.test.images.reshape((mnist.test.images.shape[0], n_steps, dim_input))
        test_accuracy = sess.run(accuracy, feed_dict={x: test_images, y: mnist.test.labels, istate: np.zeros((mnist.test.images.shape[0], 2*dim_hidden))})
        print('-->    Train Accuracy:', train_accuracy)
        print('-->    Test Accuracy:', test_accuracy)

Epoch 000/020,    Loss: 0.5199
-->    Train Accuracy: 0.9921875
-->    Test Accuracy: 0.9299
Epoch 002/020,    Loss: 0.0883
-->    Train Accuracy: 0.984375
-->    Test Accuracy: 0.9734
Epoch 004/020,    Loss: 0.0565
-->    Train Accuracy: 1.0
-->    Test Accuracy: 0.9796
Epoch 006/020,    Loss: 0.0387
-->    Train Accuracy: 1.0
-->    Test Accuracy: 0.9772
Epoch 008/020,    Loss: 0.0273
-->    Train Accuracy: 0.984375
-->    Test Accuracy: 0.9786
Epoch 010/020,    Loss: 0.0225
-->    Train Accuracy: 0.9921875
-->    Test Accuracy: 0.978
Epoch 012/020,    Loss: 0.0181
-->    Train Accuracy: 0.9921875
-->    Test Accuracy: 0.9794
Epoch 014/020,    Loss: 0.0109
-->    Train Accuracy: 1.0
-->    Test Accuracy: 0.9826
Epoch 016/020,    Loss: 0.0107
-->    Train Accuracy: 1.0
-->    Test Accuracy: 0.9805
Epoch 018/020,    Loss: 0.0085
-->    Train Accuracy: 1.0
-->    Test Accuracy: 0.9829


## What will happen if we feed first 25 seq of x

In [24]:
n_steps2 = 25

#Test with truncated inputs
test_img = mnist.test.images.reshape((mnist.test.images.shape[0], n_steps, dim_input))
test_img_truncated = np.zeros((test_img.shape))
test_img_truncated[:, 28-n_steps2:] = test_img[:, :n_steps2, :]

test_accuracy = sess.run(accuracy, feed_dict={x: test_img_truncated, y: mnist.test.labels, istate: np.zeros((mnist.test.images.shape[0], 2*dim_hidden))})
print('If we use %d seqs, Test accuracy becomes %.3f' % (n_steps2, test_accuracy))

If we use 25 seqs, Test accuracy becomes 0.859


## What is going on inside the RNN?

### Input to the RNN

In [28]:
batch_size = 5
x_test, _ = mnist.test.next_batch(batch_size)
print('Shape of x_test is ', x_test.shape)

Shape of x_test is  (5, 784)


### Reshaped inputs

In [29]:
#Reshape (this will go into the network)
x_test1 = x_test.reshape((batch_size, n_steps, dim_input))
print('Shape of x_test1 is ', x_test1.shape)

Shape of x_test1 is  (5, 28, 28)


### Feeds: inputs and initial states

In [30]:
feeds = {x: x_test1, istate: np.zeros((batch_size, 2*dim_hidden))}

### Each individual input the LSTM

In [35]:
rnn_out_x = sess.run(my_rnn['x'], feed_dict=feeds)
print('Shape of rnn_out_x is ', rnn_out_x.shape)

Shape of rnn_out_x is  (140, 28)


### Each individual intermediate state 

In [36]:
rnn_out_H = sess.run(my_rnn['H'], feed_dict=feeds)
print('Shape of rnn_out_H is ', rnn_out_H.shape)

Shape of rnn_out_H is  (140, 128)


### Actual input to the LSTM (list)

In [40]:
rnn_out_Hsplit = sess.run(my_rnn['H_split'], feed_dict=feeds)
print('Type of rnn_out_Hsplit is', type(rnn_out_Hsplit))
print('Length of rnn_out_Hsplit is %s and the shape of each item is %s' % (len(rnn_out_Hsplit), rnn_out_Hsplit[0].shape))

Type of rnn_out_Hsplit is <class 'list'>
Length of rnn_out_Hsplit is 28 and the shape of each item is (5, 128)


### Output from the LSTM (list) 

In [41]:
rnn_out_LSTM_O = sess.run(my_rnn['LSTM_O'], feed_dict=feeds)
print('Type of rnn_out_LSTM_O is', type(rnn_out_LSTM_O))
print('Length of rnn_out_LSTM_O is %s and the shape of each item is %s' % (len(rnn_out_LSTM_O), rnn_out_LSTM_O[0].shape))

Type of rnn_out_LSTM_O is <class 'list'>
Length of rnn_out_LSTM_O is 28 and the shape of each item is (5, 128)


### State from the LSTM (LSTMStateTuple)

In [42]:
rnn_out_LSTM_S = sess.run(my_rnn['LSTM_S'], feed_dict=feeds)
print('Type of rnn_out_LSTM_S is', type(rnn_out_LSTM_S))
print('Length of rnn_out_LSTM_S is %s and the shape of each item is %s' % (len(rnn_out_LSTM_S), rnn_out_LSTM_S[0].shape))

Type of rnn_out_LSTM_S is <class 'tensorflow.python.ops.rnn_cell_impl.LSTMStateTuple'>
Length of rnn_out_LSTM_S is 2 and the shape of each item is (5, 128)


### Final Prediction

In [45]:
rnn_out_O = sess.run(my_rnn['O'], feed_dict=feeds)
print('Type of rnn_out_O is', type(rnn_out_O))
print('Shape of rnn_out_O is', rnn_out_O.shape)

Type of rnn_out_O is <class 'numpy.ndarray'>
Shape of rnn_out_O is (5, 10)
