### LSTM by Example

https://towardsdatascience.com/lstm-by-example-using-tensorflow-feb0c1968537 <br> <br>
Github code : <br>

https://github.com/roatienza/Deep-Learning-Experiments/blob/master/Experiments/Tensorflow/RNN/rnn_words.py

In [1]:
from __future__ import print_function

import numpy as np
import tensorflow as tf
from tensorflow.contrib import rnn
import random
import collections
import time

import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

In [2]:
start_time = time.time()
def elapsed(sec):
    if sec<60:
        return str(sec) + " sec"
    elif sec<(60*60):
        return str(sec/60) + " min"
    else:
        return str(sec/(60*60)) + " hr"

In [3]:
logs_path = '/tmp/tensorflow/rnn_words'
writer = tf.summary.FileWriter(logs_path)

In [4]:
training_file = 'belling_the_cat.txt'

In [5]:
def read_data(fname):
    with open(fname) as f:
        content = f.readlines()
    content = [x.strip() for x in content]
    content = [word for i in range(len(content)) for word in content[i].split()]
    return np.array(content)

In [6]:
training_data = read_data(training_file)
print("Loaded training data...")

Loaded training data...


In [7]:
def build_dataset(words):
    count = collections.Counter(words).most_common()
    dictionary = dict()
    for word, _ in count:
        dictionary[word] = len(dictionary)
    reverse_dictionary = dict(zip(dictionary.values(), dictionary.keys()))
    return dictionary, reverse_dictionary

In [8]:
dictionary, reverse_dictionary = build_dataset(training_data)

In [9]:
vocab_size = len(dictionary)

In [10]:
vocab_size

112

In [11]:
learning_rate = 0.001
training_iters = 50000
display_step = 1000
n_input = 3

# in RNN cell
n_hidden = 512

In [12]:
# tf graph input
x = tf.placeholder(tf.float32, [None, n_input, 1])
y = tf.placeholder(tf.float32, [None, vocab_size])

In [13]:
# RNN output node weights and biases
weights = {
    'out' : tf.Variable(tf.random_normal([n_hidden, vocab_size]))
}

biases = {
    'out' : tf.Variable(tf.random_normal([vocab_size]))
}

In [14]:
def RNN(x, weights, biases):
    
    # reshape to [1, n_input]
    x = tf.reshape(x, [-1, n_input])
    
    # generate a n_input-element sequence of inputs
    x = tf.split(x, n_input, 1)
    
    # 2-layer LSTM, each layer has n_hidden units.
#     rnn_cell = rnn.multiRNNCell([rnn.BasicLSTMCell(n_hidden),
#                                 rnn.BasicLSTMCell(n_hidden)])
    
    # 1 layer
    rnn_cell = tf.nn.rnn_cell.LSTMCell(n_hidden, name='basic_lstm_cell')
    
    # generate prediction
    outputs, states = rnn.static_rnn(rnn_cell, x, dtype=tf.float32)
    
    return tf.matmul(outputs[-1], weights['out'] + biases['out'])

In [15]:
pred = RNN(x, weights, biases)

In [16]:
# loss and optimizer

cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=pred,
                                                                labels=y))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)

In [17]:
# Model evaluation
correct_pred = tf.equal(tf.argmax(pred,1), tf.argmax(y,1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

In [18]:
init = tf.global_variables_initializer()

In [None]:
with tf.Session() as sess:
    sess.run(init)
    step = 0
    offset = random.randint(0, n_input+1)
    end_offset = n_input+1
    acc_total = 0
    loss_total = 0
    
    writer.add_graph(sess.graph)
    
    while step < training_iters:
        if offset > (len(training_data)-end_offset):
            offset = random.randint(0, n_input+1)
            
        symbols_in_keys = [ [dictionary[ str(training_data[i])]] for i in range(offset, offset+n_input)]
        symbols_in_keys = np.reshape(np.array(symbols_in_keys), [-1, n_input, 1])
        
        symbols_out_onehot = np.zeros([vocab_size], dtype=float)
        symbols_out_onehot[dictionary[str(training_data[offset+n_input])]] = 1.0
        symbols_out_onehot = np.reshape(symbols_out_onehot, [1, -1])
        
        _, acc, loss, onehot_pred = sess.run([optimizer, accuracy, cost, pred], \
                                             feed_dict={x: symbols_in_keys, y:symbols_out_onehot})
        
        loss_total += loss
        acc_total += acc
        
        if (step+1) % display_step == 0:
            print("Iter= " + str(step+1) + ", Average Loss= " + \
                 "{:.6f}".format(loss_total/display_step) + ", Average Accuracy= " + \
                 "{:.2f}%".format(100*acc_total/display_step))
            
            acc_total = 0
            loss_total = 0
            symbols_in = [training_data[i] for i in range(offset, offset + n_input)]
            symbols_out = training_data[offset + n_input]
            symbols_out_pred = reverse_dictionary[ int(tf.argmax(onehot_pred, 1).eval())]
            print("%s - [%s] vs [%s]" % (symbols_in, symbols_out, symbols_out_pred))
        step += 1
        offset += (n_input+1)
    
    print("Optimization Finished!")
    print("Elapsed time: ", elapsed(time.time() - start_time))
    print("Run on command line.")
    
    print("\ttensorboard --logdir=%s" % (logs_path))
    print("Point your web browser to: http://localhost:6006/")
    
    while True:
        prompt = "%s words: " % n_input
        sentence = input(prompt)
        sentence = sentence.strip()
        words = sentence.split(' ')
        
        if len(words) != n_input:
            continue
        try:
            symbols_in_keys = [dictionary[str(words[i])] for i in range(len(words))]
            for i in range(32):
                keys = np.reshape(np.array(symbols_in_keys), [-1, n_input, 1])
                onehot_pred = sess.run(pred, feed_dict={x:keys})
                onehot_pred_index = int(tf.argmax(onehot_pred,1).eval())
                sentence = "%s %s" % (sentence, reverse_dictionary[onehot_pred_index])
                symbols_in_keys = symbols_in_keys[1:]
                symbols_in_keys.append(onehot_pred_index)
            print(sentence)
        except:
            print("Word not in dictionary")

Iter= 1000, Average Loss= 6.974286, Average Accuracy= 6.50%
['to', 'bell', 'the'] - [cat] vs [this]
Iter= 2000, Average Loss= 3.392263, Average Accuracy= 22.20%
['easily', 'retire', 'while'] - [she] vs [and]
Iter= 3000, Average Loss= 3.328163, Average Accuracy= 20.10%
['.', 'by', 'this'] - [means] vs [a]
Iter= 4000, Average Loss= 2.702323, Average Accuracy= 27.90%
['be', 'procured', ','] - [and] vs [then]
Iter= 5000, Average Loss= 2.209367, Average Accuracy= 36.90%
['therefore', ',', 'to'] - [propose] vs [could]
Iter= 6000, Average Loss= 1.995959, Average Accuracy= 38.90%
['enemy', 'approaches', 'us'] - [.] vs [.]
Iter= 7000, Average Loss= 1.564586, Average Accuracy= 45.90%
['chief', 'danger', 'consists'] - [in] vs [when]
Iter= 8000, Average Loss= 1.438136, Average Accuracy= 51.10%
['our', 'chief', 'danger'] - [consists] vs [,]
Iter= 9000, Average Loss= 1.063982, Average Accuracy= 63.80%
['would', 'meet', 'the'] - [case] vs [case]
Iter= 10000, Average Loss= 0.953809, Average Accuracy= 