# Listing 4-3. TensorFlow Implementation of Reccurent Neural Network using LSTM for Classification 

In [1]:
##################################
#Import the Required Libraries
##################################

import tensorflow as tf
from tensorflow.contrib import rnn
import numpy as np

# Import MINST data
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)

# Parameters
learning_rate = 0.001
training_iters = 100000
batch_size = 128
display_step = 50

# Network Parameters
n_input = 28 # MNIST data input (img shape: 28*28)
n_steps = 28 # timesteps
n_hidden = 128 # hidden layer num of features
n_classes = 10 # MNIST total classes (0-9 digits)
epochs = 2
num_train = mnist.train.num_examples 
num_batches = (num_train//batch_size) + 1


def RNN(x, weights, biases):

    # Prepare data shape to match LSTM  
    # Unstack to get a list of 'n_steps' tensors of shape (batch_size, n_input)
    x = tf.unstack(x, n_steps, 1)

    # Define a lstm cell with tensorflow
    lstm_cell = rnn.BasicLSTMCell(n_hidden, forget_bias=1.0)

    # Get lstm cell output
    outputs, states = rnn.static_rnn(lstm_cell, x, dtype=tf.float32)

    # Linear activation, using rnn inner loop last output
    return tf.matmul(outputs[-1], weights['out']) + biases['out']

# tf Graph input
x = tf.placeholder("float", [None, n_steps, n_input])
y = tf.placeholder("float", [None, n_classes])

# Define weights
weights = {
    'out': tf.Variable(tf.random_normal([n_hidden, n_classes]))
}
biases = {
    'out': tf.Variable(tf.random_normal([n_classes]))
}


pred = RNN(x, weights, biases)

# Define loss and optimizer
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=pred, labels=y))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)

# Evaluate model
correct_pred = tf.equal(tf.argmax(pred,1), tf.argmax(y,1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

# Initializing the variables
init = tf.global_variables_initializer()



with tf.Session() as sess:
    sess.run(init)
    i = 0
    
    while i < epochs:
        for step in xrange(num_batches):
            batch_x, batch_y = mnist.train.next_batch(batch_size)
            batch_x = batch_x.reshape((batch_size, n_steps, n_input))
        # Run optimization op (backprop)
            sess.run(optimizer, feed_dict={x: batch_x, y: batch_y})
            if (step + 1) % display_step == 0:
                # Calculate batch accuracy
                acc = sess.run(accuracy, feed_dict={x: batch_x, y: batch_y})
                # Calculate batch loss
                loss = sess.run(cost, feed_dict={x: batch_x, y: batch_y})
                print "Epoch: " + str(i+1) + ",step:"+ str(step+1) +", Minibatch Loss= " + \
                      "{:.6f}".format(loss) + ", Training Accuracy= " + \
                      "{:.5f}".format(acc)
        i += 1
    print "Optimization Finished!"

    # Calculate accuracy for 128 mnist test images
    test_len = 1024
    test_data = mnist.test.images[:test_len].reshape((-1, n_steps, n_input))
    test_label = mnist.test.labels[:test_len]
    print "Testing Accuracy:", \
        sess.run(accuracy, feed_dict={x: test_data, y: test_label})

Extracting MNIST_data/train-images-idx3-ubyte.gz
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz
Epoch: 1,step:50, Minibatch Loss= 0.874432, Training Accuracy= 0.67188
Epoch: 1,step:100, Minibatch Loss= 0.837254, Training Accuracy= 0.77344
Epoch: 1,step:150, Minibatch Loss= 0.261696, Training Accuracy= 0.94531
Epoch: 1,step:200, Minibatch Loss= 0.406846, Training Accuracy= 0.88281
Epoch: 1,step:250, Minibatch Loss= 0.227278, Training Accuracy= 0.93750
Epoch: 1,step:300, Minibatch Loss= 0.159254, Training Accuracy= 0.92969
Epoch: 1,step:350, Minibatch Loss= 0.260627, Training Accuracy= 0.91406
Epoch: 1,step:400, Minibatch Loss= 0.130052, Training Accuracy= 0.94531
Epoch: 2,step:50, Minibatch Loss= 0.252877, Training Accuracy= 0.92188
Epoch: 2,step:100, Minibatch Loss= 0.096116, Training Accuracy= 0.97656
Epoch: 2,step:150, Minibatch Loss= 0.099564, Training Accuracy= 0.97656
Epoch: 2,step:20

# Listing 4-4. Next word Prediction and sentence completion in TensorFlow using Recurrent Neural Network 

In [3]:
import numpy as np
import tensorflow as tf
from tensorflow.contrib import rnn
import random
import collections
import time
# Parameters
learning_rate = 0.001
training_iters = 50000
display_step = 500
n_input = 3

# number of units in RNN cell
n_hidden = 512

tf.reset_default_graph()

def read_data(fname):
    with open(fname) as f:
        data = f.readlines()
    data = [x.strip() for x in data]
    data = [data[i].lower().split() for i in range(len(data))]
    data = np.array(data)
    data = np.reshape(data, [-1, ])
    return data

train_file = 'alice in wonderland.txt'
train_data = read_data(train_file)


def build_dataset(train_data):
    count = collections.Counter(train_data).most_common()
    dictionary = dict()
    for word, _ in count:
        dictionary[word] = len(dictionary)
    reverse_dictionary = dict(zip(dictionary.values(), dictionary.keys()))
    return dictionary, reverse_dictionary

dictionary, reverse_dictionary = build_dataset(train_data)
vocab_size = len(dictionary)


# Place holder for Mini batch input output
x = tf.placeholder("float", [None, n_input, 174])
y = tf.placeholder("float", [None, vocab_size])

# RNN output node weights and biases
weights = {
    'out': tf.Variable(tf.random_normal([n_hidden, vocab_size]))
}
biases = {
    'out': tf.Variable(tf.random_normal([vocab_size]))
}

def input_one_hot(num):
    x = np.zeros(vocab_size)
    x[num] = 1 
    return x.tolist()

def RNN(x, weights, biases):
    x = tf.unstack(x, n_input, 1)
    print(np.shape(x))
    ## 2 layered LSTM 
    rnn_cell = rnn.MultiRNNCell([rnn.BasicLSTMCell(n_hidden),rnn.BasicLSTMCell(n_hidden)])

    # generate prediction
    outputs, states = rnn.static_rnn(rnn_cell, x, dtype=tf.float32)

    # there are n_input outputs but we only require the last output
    return tf.matmul(outputs[-1], weights['out']) + biases['out']

pred = RNN(x, weights, biases)

# Loss and optimizer
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=pred, labels=y))
optimizer = tf.train.RMSPropOptimizer(learning_rate=learning_rate).minimize(cost)

# Model evaluation
correct_pred = tf.equal(tf.argmax(pred,1), tf.argmax(y,1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

# Initializing the variables
init = tf.global_variables_initializer()

# Launch the graph
with tf.Session() as session:
    session.run(init)
    step = 0
    offset = random.randint(0,n_input+1)
    end_offset = n_input + 1
    acc_total = 0
    loss_total = 0

    
    while step < training_iters:
        if offset > (len(train_data)-end_offset):
            offset = random.randint(0, n_input+1)

        symbols_in_keys = [ input_one_hot(dictionary[ str(train_data[i])]) for i in range(offset, offset+n_input) ]
        symbols_in_keys = np.reshape(np.array(symbols_in_keys), [-1, n_input,vocab_size])
        symbols_out_onehot = np.zeros([vocab_size], dtype=float)
        symbols_out_onehot[dictionary[str(train_data[offset+n_input])]] = 1.0
        symbols_out_onehot = np.reshape(symbols_out_onehot,[1,-1])

        _, acc, loss, onehot_pred = session.run([optimizer, accuracy, cost, pred], \
                                                feed_dict={x: symbols_in_keys, y: symbols_out_onehot})
        loss_total += loss
        acc_total += acc
        
        if (step+1) % display_step == 0:
            print("Iter= " + str(step+1) + ", Average Loss= " + \
                  "{:.6f}".format(loss_total/display_step) + ", Average Accuracy= " + \
                  "{:.2f}%".format(100*acc_total/display_step))
            acc_total = 0
            loss_total = 0
            symbols_in = [train_data[i] for i in range(offset, offset + n_input)]
            symbols_out = train_data[offset + n_input]
            symbols_out_pred = reverse_dictionary[int(tf.argmax(onehot_pred, 1).eval())]
            print("%s - Actual word:[%s] vs Predicted word:[%s]" % (symbols_in,symbols_out,symbols_out_pred))
        step += 1
        offset += (n_input+1)
    print("Optimization Finished!")
   
    sentence = 'i only wish'
    words = sentence.split(' ')
    try:
        symbols_in_keys = [ input_one_hot(dictionary[ str(train_data[i])]) for i in range(offset, offset+n_input) ]
        for i in range(28):
            keys = np.reshape(np.array(symbols_in_keys), [-1, n_input,vocab_size])
            onehot_pred = session.run(pred, feed_dict={x: keys})
            onehot_pred_index = int(tf.argmax(onehot_pred, 1).eval())
            sentence = "%s %s" % (sentence,reverse_dictionary[onehot_pred_index])
            symbols_in_keys = symbols_in_keys[1:]
            symbols_in_keys.append(input_one_hot(onehot_pred_index))
        print(sentence)
    except:
        print("Word not in dictionary")

(3,)
Iter= 500, Average Loss= 4.242353, Average Accuracy= 16.60%
['so', 'stingy', 'about'] - Actual word:[it] vs Predicted word:[it]
Iter= 1000, Average Loss= 3.598423, Average Accuracy= 29.20%
['.', 'however,', 'she'] - Actual word:[did] vs Predicted word:[she]
Iter= 1500, Average Loss= 2.073462, Average Accuracy= 46.20%
['barley-sugar', 'and', 'such'] - Actual word:[things] vs Predicted word:[things]
Iter= 2000, Average Loss= 0.798790, Average Accuracy= 77.80%
[',', 'because', 'she'] - Actual word:[was] vs Predicted word:[was]
Iter= 2500, Average Loss= 0.338358, Average Accuracy= 91.60%
[',', "'", 'and'] - Actual word:[vinegar] vs Predicted word:[won't]
Iter= 3000, Average Loss= 0.366451, Average Accuracy= 90.40%
['so', 'close', 'to'] - Actual word:[her] vs Predicted word:[her]
Iter= 3500, Average Loss= 1.110132, Average Accuracy= 74.20%
['hot-tempered', ',', "'"] - Actual word:[she] vs Predicted word:[she]
Iter= 4000, Average Loss= 0.522139, Average Accuracy= 88.00%
['squeezed', 'he

# Listing 4-5 . Text used as Corpus in Listing 4-4 

' You can't think how glad I am to see you again , you dear old thing ! ' said the Duchess , as she tucked her arm affectionately into Alice's , and they walked off together . Alice was very glad to find her in such a pleasant temper , and thought to herself that perhaps it was only the pepper that had made her so savage when they met in the kitchen . ' When I'm a Duchess , ' she said to herself , ( not in a very hopeful tone though ) , ' I won't have any pepper in my kitchen at all . Soup does very well without — Maybe it's always pepper that makes people hot-tempered , ' she went on , very much pleased at having found out a new kind of rule , ' and vinegar that makes them sour — and camomile that makes them bitter — and — and barley-sugar and such things that make children sweet-tempered . I only wish people knew that : then they wouldn't be so stingy about it , you know — 'She had quite forgotten the Duchess by this time , and was a little startled when she heard her voice close to her

ear . ' You're thinking about something , my dear , and that makes you forget to talk . I can't tell you just now what the moral of that is , but I shall remember it in a bit . ' ' Perhaps it hasn't one , ' Alice ventured to remark . ' Tut , tut , child ! ' said the Duchess . ' Everything's got a moral , if only you can find it . ' And she squeezed herself up closer to Alice's side as she spoke . Alice did not much like keeping so close to her : first , because the Duchess was very ugly ; and secondly , because she was exactly the right height to rest her chin upon Alice's shoulder , and it was an uncomfortably sharp chin . However, she did not like to be rude , so she bore it as well as she could . 

## Code improved for Python 3.x

In [None]:
import numpy as np
import tensorflow as tf
from tensorflow.contrib import rnn
import random
import collections
import time
# Parameters
learning_rate = 0.001
#training_iters = 50000
training_iters = 2000
display_step = 500
n_input = 3

# number of units in RNN cell
n_hidden = 512

tf.reset_default_graph()

def read_data(fname):
    with open(fname) as f:
        data = f.readlines()
    data = [x.strip() for x in data]
    data = [data[i].lower().split() for i in range(len(data))]
    data = np.array(data)
    data = np.reshape(data, [-1, ])
    return data

train_file = 'alice in wonderland.txt'
train_data = read_data(train_file)


def build_dataset(train_data):
    count = collections.Counter(train_data[0]).most_common()
    dictionary = dict()
    for word, _ in count:
        dictionary[word] = len(dictionary)
    reverse_dictionary = dict(zip(dictionary.values(), dictionary.keys()))
    return dictionary, reverse_dictionary

dictionary, reverse_dictionary = build_dataset(train_data)
vocab_size = len(dictionary)


# Place holder for Mini batch input output
#x = tf.placeholder("float", [None, n_input, 174])
x = tf.placeholder("float", [None, n_input, vocab_size])
y = tf.placeholder("float", [None, vocab_size])

# RNN output node weights and biases
weights = {
    'out': tf.Variable(tf.random_normal([n_hidden, vocab_size]))
}
biases = {
    'out': tf.Variable(tf.random_normal([vocab_size]))
}

def input_one_hot(num):
    x = np.zeros(vocab_size)
    x[num] = 1 
    return x.tolist()

def RNN(x, weights, biases):
    x = tf.unstack(x, n_input, 1)
    print(np.shape(x))
    ## 2 layered LSTM 
    rnn_cell = rnn.MultiRNNCell([rnn.BasicLSTMCell(n_hidden),rnn.BasicLSTMCell(n_hidden)])

    # generate prediction
    outputs, states = rnn.static_rnn(rnn_cell, x, dtype=tf.float32)

    # there are n_input outputs but we only require the last output
    return tf.matmul(outputs[-1], weights['out']) + biases['out']

pred = RNN(x, weights, biases)

# Loss and optimizer
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=pred, labels=y))
optimizer = tf.train.RMSPropOptimizer(learning_rate=learning_rate).minimize(cost)

# Model evaluation
correct_pred = tf.equal(tf.argmax(pred,1), tf.argmax(y,1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

# Initializing the variables
init = tf.global_variables_initializer()

# Launch the graph
with tf.Session() as session:
    session.run(init)
    step = 0
    offset = random.randint(0,n_input+1)
    end_offset = n_input + 1
    acc_total = 0
    loss_total = 0

    
    while step < training_iters:
        if offset > (len(train_data)-end_offset):
            offset = random.randint(0, n_input+1)

#        symbols_in_keys = [ input_one_hot(dictionary[ str(train_data[i])]) for i in range(offset, offset+n_input) ]
#        print(range(offset, offset+n_input))
#        print(train_data[0][0])
#        print(train_data[0][1])
#        print(train_data[0][2])

        symbols_in_keys = [ train_data[0][i] for i in range(offset, offset+n_input) ]
#        print(symbols_in_keys)

        symbols_in_keys = [ input_one_hot(dictionary[ str(train_data[0][i])]) for i in range(offset, offset+n_input) ]
        symbols_in_keys = np.reshape(np.array(symbols_in_keys), [-1, n_input,vocab_size])
        symbols_out_onehot = np.zeros([vocab_size], dtype=float)
        symbols_out_onehot[dictionary[str(train_data[0][offset+n_input])]] = 1.0
        symbols_out_onehot = np.reshape(symbols_out_onehot,[1,-1])

        _, acc, loss, onehot_pred = session.run([optimizer, accuracy, cost, pred], \
                                                feed_dict={x: symbols_in_keys, y: symbols_out_onehot})
        loss_total += loss
        acc_total += acc
        
        if (step+1) % display_step == 0:
            print("Iter= " + str(step+1) + ", Average Loss= " + \
                  "{:.6f}".format(loss_total/display_step) + ", Average Accuracy= " + \
                  "{:.2f}%".format(100*acc_total/display_step))
            acc_total = 0
            loss_total = 0
            symbols_in = [train_data[0][i] for i in range(offset, offset + n_input)]
            symbols_out = train_data[0][offset + n_input]
            symbols_out_pred = reverse_dictionary[int(tf.argmax(onehot_pred, 1).eval())]
            print("%s - Actual word:[%s] vs Predicted word:[%s]" % (symbols_in,symbols_out,symbols_out_pred))
        step += 1
        offset += (n_input+1)
    print("Optimization Finished!")
   
    sentence = 'i only wish'
    words = sentence.split(' ')
    sentence_data = ['i','only','wish']
#    symbols_in_keys = [ input_one_hot(dictionary[ str(sentence_data[i])]) for i in range(0, 3) ]
#
#    for i in range(28):
#        keys = np.reshape(np.array(symbols_in_keys), [-1, n_input,vocab_size])
#        onehot_pred = session.run(pred, feed_dict={x: keys})
#        onehot_pred_index = int(tf.argmax(onehot_pred, 1).eval())
#        sentence = "%s %s" % (sentence,reverse_dictionary[onehot_pred_index])
#        symbols_in_keys = symbols_in_keys[1:]
#        symbols_in_keys.append(input_one_hot(onehot_pred_index))
#    print(sentence)

    
    try:
#        symbols_in_keys = [ input_one_hot(dictionary[ str(train_data[0][i])]) for i in range(offset, offset+n_input) ]
        symbols_in_keys = [ input_one_hot(dictionary[ str(sentence_data[i])]) for i in range(0, 3) ]

        for i in range(28):
            keys = np.reshape(np.array(symbols_in_keys), [-1, n_input,vocab_size])
            onehot_pred = session.run(pred, feed_dict={x: keys})
            onehot_pred_index = int(tf.argmax(onehot_pred, 1).eval())
            sentence = "%s %s" % (sentence,reverse_dictionary[onehot_pred_index])
            symbols_in_keys = symbols_in_keys[1:]
            symbols_in_keys.append(input_one_hot(onehot_pred_index))
        print(sentence)
    except:
        print("Word not in dictionary")