# LSTM 

Solve MNIST problem using one layer LSTM

In [1]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
from tensorflow.contrib.rnn import BasicRNNCell, BasicLSTMCell
from tensorflow.contrib import rnn 

### Read MNIST dataset

In [2]:
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("../MNIST_data/", one_hot=True)

Extracting ../MNIST_data/train-images-idx3-ubyte.gz
Extracting ../MNIST_data/train-labels-idx1-ubyte.gz
Extracting ../MNIST_data/t10k-images-idx3-ubyte.gz
Extracting ../MNIST_data/t10k-labels-idx1-ubyte.gz


### Define Hyperparameters

In [3]:
learning_rate = 0.001
n_classes = 10
batch_size = 100

### Define placeholders 

In [4]:
# input image placeholder
x = tf.placeholder(tf.float32, [None, 28, 28])
x_unstack = tf.unstack(x, num=28, axis=1)
# input label placeholder
y_ = tf.placeholder(tf.float32, [None, n_classes])
# keep prob for dropout
keep_prob = tf.placeholder(tf.float32, shape=())

### Define LSTM cell 

In [5]:
lstm_size = 128
lstm = BasicLSTMCell(lstm_size)
# adding dropout to cell 
lstm = tf.contrib.rnn.DropoutWrapper(lstm, output_keep_prob=keep_prob)

### Define Network

In [6]:
init_state = lstm.zero_state(batch_size, dtype=tf.float32)
print(init_state)

LSTMStateTuple(c=<tf.Tensor 'DropoutWrapperZeroState/BasicLSTMCellZeroState/zeros:0' shape=(100, 128) dtype=float32>, h=<tf.Tensor 'DropoutWrapperZeroState/BasicLSTMCellZeroState/zeros_1:0' shape=(100, 128) dtype=float32>)


In [7]:
out_weights=tf.Variable(tf.random_normal([lstm_size, n_classes]))
out_bias=tf.Variable(tf.random_normal([n_classes]))

In [8]:
outputs, _ = rnn.static_rnn(cell=lstm, inputs=x_unstack, initial_state=init_state, dtype=tf.float32)

In [9]:
y = tf.matmul(outputs[-1], out_weights) + out_bias

In [10]:
#loss_function
loss=tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(
        logits=y,labels=y_))
#optimization
opt=tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(loss)

#model evaluation
correct_prediction=tf.equal(tf.argmax(y,1),tf.argmax(y_,1))
accuracy=tf.reduce_mean(tf.cast(correct_prediction,tf.float32))

### Training 

In [14]:
#initialize variables
init=tf.global_variables_initializer()
with tf.Session() as sess:
    sess.run(init)
    for i in range(1500):
        
        batch_x, batch_y = mnist.train.next_batch(batch_size)
        batch_x = batch_x.reshape((-1, 28, 28))
        sess.run(opt, feed_dict={x: batch_x, y_: batch_y, keep_prob: 0.8})

        if (i+1)%50 == 0 or i==0:
            acc = sess.run(accuracy, feed_dict={x: batch_x, y_: batch_y, keep_prob: 1})
            los = sess.run(loss, feed_dict={x: batch_x, y_: batch_y, keep_prob: 1})
            print("For iter {0}, Accuracy: {1}".format(i+1, acc))
            
    #calculating test accuracy
    test_x, test_y = mnist.test.next_batch(batch_size)
    test_x = test_x.reshape((-1, 28, 28))
    test_acc = sess.run(accuracy, feed_dict={x: test_x, y_: test_y, keep_prob: 1})
    print("Testing Accuracy: {0:.4}".format(test_acc))

For iter 1, Accuracy: 0.1599999964237213
For iter 50, Accuracy: 0.5799999833106995
For iter 100, Accuracy: 0.7300000190734863
For iter 150, Accuracy: 0.8799999952316284
For iter 200, Accuracy: 0.9100000262260437
For iter 250, Accuracy: 0.9399999976158142
For iter 300, Accuracy: 0.949999988079071
For iter 350, Accuracy: 0.9300000071525574
For iter 400, Accuracy: 0.9800000190734863
For iter 450, Accuracy: 0.949999988079071
For iter 500, Accuracy: 0.949999988079071
For iter 550, Accuracy: 0.9800000190734863
For iter 600, Accuracy: 0.9800000190734863
For iter 650, Accuracy: 0.9800000190734863
For iter 700, Accuracy: 0.9599999785423279
For iter 750, Accuracy: 0.949999988079071
For iter 800, Accuracy: 0.9599999785423279
For iter 850, Accuracy: 0.9900000095367432
For iter 900, Accuracy: 0.9700000286102295
For iter 950, Accuracy: 0.949999988079071
For iter 1000, Accuracy: 0.949999988079071
For iter 1050, Accuracy: 0.9700000286102295
For iter 1100, Accuracy: 0.9800000190734863
For iter 1150, Ac