# LSTM with static rnn 

Solve MNIST problem using one layer LSTM

In [1]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
from tensorflow.contrib.rnn import BasicRNNCell, BasicLSTMCell
from tensorflow.contrib import rnn 

### Read MNIST dataset

In [2]:
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("../MNIST_data/", one_hot=True)

Extracting ../MNIST_data/train-images-idx3-ubyte.gz
Extracting ../MNIST_data/train-labels-idx1-ubyte.gz
Extracting ../MNIST_data/t10k-images-idx3-ubyte.gz
Extracting ../MNIST_data/t10k-labels-idx1-ubyte.gz


### Define Hyperparameters

In [14]:
learning_rate = 0.001
n_classes = 10
batch_size = 100
lstm_size = 32
num_layers = 3

### Define placeholders 

In [15]:
# input image placeholder
x = tf.placeholder(tf.float32, [None, 28, 28])
# input label placeholder
y_ = tf.placeholder(tf.float32, [None, n_classes])
# keep prob for dropout
keep_prob = tf.placeholder(tf.float32, shape=())

### Define LSTM cell 

In [16]:
def build_cell(lstm_size, keep_prob):
    lstm = BasicLSTMCell(lstm_size)
    # adding dropout to cell 
    lstm = tf.contrib.rnn.DropoutWrapper(lstm, output_keep_prob=keep_prob)
    return lstm

### Define Multi-cell lstm

In [17]:
def build_multi_layer_lstm(lstm_size, num_layers, batch_size, keep_prob):
    cells = [build_cell(lstm_size, keep_prob) for _ in range(num_layers)]
    multi_cell = tf.contrib.rnn.MultiRNNCell(cells=cells)
    initial_state = multi_cell.zero_state(batch_size, tf.float32)
    
    return multi_cell, initial_state

### Define Network

In [18]:
lstm, init_state = build_multi_layer_lstm(lstm_size, num_layers, batch_size, keep_prob)

In [19]:
out_weights=tf.Variable(tf.random_normal([lstm_size, n_classes]))
out_bias=tf.Variable(tf.random_normal([n_classes]))

### static rnn

In [None]:
x_unstack = tf.unstack(x, num=28, axis=1)

outputs_static, _statifc = rnn.static_rnn(
    cell=lstm, inputs=x_unstack, initial_state=init_state, dtype=tf.float32)

In [None]:
(outputs_static)

In [None]:
y_static = tf.matmul(outputs_static[-1], out_weights) + out_bias

In [None]:
y_static

In [None]:
#loss_function
loss=tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(
        logits=y_static,labels=y_))
#optimization
opt=tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(loss)

#model evaluation
correct_prediction=tf.equal(tf.argmax(y_static,1),tf.argmax(y_,1))
accuracy=tf.reduce_mean(tf.cast(correct_prediction,tf.float32))

### Training static rnn

In [None]:
#initialize variables
init=tf.global_variables_initializer()
with tf.Session() as sess:
    sess.run(init)
    for i in range(1500):
        
        batch_x, batch_y = mnist.train.next_batch(batch_size)
        batch_x = batch_x.reshape((-1, 28, 28))
        sess.run(opt, feed_dict={x: batch_x, y_: batch_y, keep_prob: 0.5})

        if (i+1)%50 == 0 or i==0:
            acc = sess.run(accuracy, feed_dict={x: batch_x, y_: batch_y, keep_prob: 1})
            los = sess.run(loss, feed_dict={x: batch_x, y_: batch_y, keep_prob: 1})
            print("For iter {0}, Accuracy: {1}".format(i+1, acc))
            
    #calculating test accuracy
    test_x, test_y = mnist.test.next_batch(batch_size)
    test_x = test_x.reshape((-1, 28, 28))
    test_acc = sess.run(accuracy, feed_dict={x: test_x, y_: test_y, keep_prob: 1})
    print("Testing Accuracy: {0:.4}".format(test_acc))

### dynamic rnn 

In [37]:
outputs_dynamic, _dynamic = tf.nn.dynamic_rnn(
    cell=lstm, inputs=x, initial_state=init_state, dtype=tf.float32)

In [38]:
final_output = outputs_dynamic[:, -1, :]

In [39]:
outputs_dynamic

<tf.Tensor 'rnn_2/transpose:0' shape=(100, 28, 32) dtype=float32>

In [22]:
y_dynamic = tf.matmul(final_output, out_weights) + out_bias

In [23]:
y_dynamic

<tf.Tensor 'add:0' shape=(100, 10) dtype=float32>

In [24]:
#loss_function
loss=tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(
        logits=y_dynamic,labels=y_))
#optimization
opt=tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(loss)

#model evaluation
correct_prediction=tf.equal(tf.argmax(y_dynamic,1),tf.argmax(y_,1))
accuracy=tf.reduce_mean(tf.cast(correct_prediction,tf.float32))

In [30]:
#initialize variables
init=tf.global_variables_initializer()
with tf.Session() as sess:
    sess.run(init)
    for i in range(1500):
        
        batch_x, batch_y = mnist.train.next_batch(batch_size)
        batch_x = batch_x.reshape((batch_size, -1, 28))
        sess.run(opt, feed_dict={x: batch_x, y_: batch_y, keep_prob: 0.5})

        if (i+1)%50 == 0 or i==0:
            acc = sess.run(accuracy, feed_dict={x: batch_x, y_: batch_y, keep_prob: 1})
            los = sess.run(loss, feed_dict={x: batch_x, y_: batch_y, keep_prob: 1})
            print("For iter {0}, Accuracy: {1}".format(i+1, acc))
            
    #calculating test accuracy
    test_x, test_y = mnist.test.next_batch(batch_size)
    test_x = test_x.reshape((-1, 28, 28))
    test_acc = sess.run(accuracy, feed_dict={x: test_x, y_: test_y, keep_prob: 1})
    print("Testing Accuracy: {0:.4}".format(test_acc))

For iter 1, Accuracy: 0.1599999964237213
For iter 50, Accuracy: 0.3499999940395355
For iter 100, Accuracy: 0.5199999809265137
For iter 150, Accuracy: 0.6800000071525574
For iter 200, Accuracy: 0.699999988079071
For iter 250, Accuracy: 0.8199999928474426
For iter 300, Accuracy: 0.8299999833106995
For iter 350, Accuracy: 0.8799999952316284
For iter 400, Accuracy: 0.9200000166893005
For iter 450, Accuracy: 0.8600000143051147
For iter 500, Accuracy: 0.8999999761581421
For iter 550, Accuracy: 0.8899999856948853
For iter 600, Accuracy: 0.8799999952316284
For iter 650, Accuracy: 0.9200000166893005
For iter 700, Accuracy: 0.9200000166893005
For iter 750, Accuracy: 0.9100000262260437
For iter 800, Accuracy: 0.9300000071525574
For iter 850, Accuracy: 0.8899999856948853
For iter 900, Accuracy: 0.949999988079071
For iter 950, Accuracy: 0.9599999785423279
For iter 1000, Accuracy: 0.9300000071525574
For iter 1050, Accuracy: 0.949999988079071
For iter 1100, Accuracy: 0.8999999761581421
For iter 1150,

### Bidirectional lstm

In [31]:
lstm_fw, init_state_fw = build_multi_layer_lstm(lstm_size, num_layers, batch_size, keep_prob)
lstm_bw, init_state_bw = build_multi_layer_lstm(lstm_size, num_layers, batch_size, keep_prob)

In [40]:
out_weights=tf.Variable(tf.random_normal([lstm_size, n_classes]))
out_bias=tf.Variable(tf.random_normal([n_classes]))

In [41]:
outputs_dynamic, _dynamic = tf.nn.bidirectional_dynamic_rnn(
    cell_fw=lstm_fw, cell_bw=lstm_bw, inputs=x, 
    initial_state_fw=init_state_fw, initial_state_bw=init_state_bw, dtype=tf.float32)

In [42]:
outputs_dynamic

(<tf.Tensor 'bidirectional_rnn_1/fw/fw/transpose:0' shape=(100, 28, 32) dtype=float32>,
 <tf.Tensor 'ReverseV2_1:0' shape=(100, 28, 32) dtype=float32>)

In [43]:
# TODO 
# decide what to do with outputs 