# Tensorflow and Recurrent Neural Networks
In this Notebook, you will learn how to train a language model to generate a sentence given a few words (very similar to the mighty GPT-2 model. It's cool, right? :D).

## There are some TODOs in this Notebook:
- TODO#1: (as always) read the codes and comments from the begining to the end.
- TODO#2: train the dataset to a bigger one, with more documents.
- TODO#3: add Tensorboard to visualize the graph and see training parameters.

In [1]:
%load_ext autoreload
%autoreload 2
import tensorflow as tf
import collections
import random
import numpy as np
from tensorflow.contrib import rnn
import sys
import time
import os

  from ._conv import register_converters as _register_converters


In [2]:
# Include *.py files from other folders
module_path = os.path.abspath(os.path.join('../../'))
if module_path not in sys.path:
    sys.path.append(module_path)

In [3]:
from pythonlibs.rnn.data import dataset_lm

In [4]:
training_data = dataset_lm()

In [5]:
training_data

array(['All', 'good', 'so', 'far.', 'I', 'was', 'incredibly', 'hesitant',
       'to', 'buy', 'a', 'refurbished', 'phone', 'on', 'Amazon', 'after',
       'reading', 'so', 'many', 'negative', 'reviews', 'on', 'various',
       'products.', 'I', 'spent', 'a', 'lot', 'of', 'time', 'researching',
       'my', 'options', 'and', 'figured', 'this', 'one', 'was',
       'probably', 'my', 'safest', 'bet.', 'Phone', 'came', 'in',
       'perfect', 'condition', '(seriously,', 'it', 'looks', 'brand',
       'new)', 'and', 'so', 'far', 'all', 'the', 'functions', 'seem',
       'great.', 'Set', 'up', 'was', 'easy,', 'unlocked,', 'ready', 'to',
       'set', 'my', 'fingerprint', 'and', 'everything.', "It's", 'fast,',
       'sleek,', 'and', 'beautiful.', 'Camera', 'and', 'audio', 'are',
       'also', 'great.', '1.', 'DEFECTIVE', 'BATTERY', '-', 'The',
       'phone', 'was', 'defective', 'the', 'day', 'it', 'arrived.', 'The',
       'battery', 'does', 'not', 'hold', 'a', 'charge', 'consistently',
  

In [6]:
def build_dataset(words):
    count = collections.Counter(words).most_common()
    dictionary = dict()
    for word, _ in count:
        dictionary[word] = len(dictionary)
    reverse_dictionary = dict(zip(dictionary.values(), dictionary.keys()))
    return dictionary, reverse_dictionary

In [7]:
dictionary, reverse_dictionary = build_dataset(training_data)

In [8]:
# Location to write log files.
logs_path = '../../my_data/tf_rnn_logs'

In [9]:
tf.reset_default_graph()
n_input = 3 # 3 symbols are retrieved from the text
n_hidden = 100 # 10
vocab_size = len(dictionary)

In [10]:
# Constructing the RNN graph
x = tf.placeholder("float", [None, n_input, 1])
y = tf.placeholder("float", [None, vocab_size])

# RNN output node weights and biases
weights = {
    "out" : tf.Variable(tf.random_normal([n_hidden, vocab_size]))
}

biases = {
    "out" : tf.Variable(tf.random_normal([vocab_size]))
}

def RNN(x, weights, biases):
    x = tf.reshape(x, [-1, n_input])
    x = tf.split(x, n_input, 1)
    
    rnn_cell = rnn.MultiRNNCell([rnn.BasicLSTMCell(n_hidden),rnn.BasicLSTMCell(n_hidden)])
    # rnn_cell = rnn.BasicLSTMCell(n_hidden)
    
    # generate prediction
    outputs, states = rnn.static_rnn(rnn_cell, x, dtype=tf.float32)
    
    # there are n_input outputs but
    # we only want the last output
    return tf.matmul(outputs[-1], weights['out']) + biases['out']

In [11]:
# tf.reset_default_graph()

In [12]:
training_iters = 200
pred = RNN(x, weights, biases)
offset = random.randint(0,n_input+1)
end_offset = n_input + 1
learning_rate = 0.001

Instructions for updating:
This class is deprecated, please use tf.nn.rnn_cell.LSTMCell, which supports all the feature this cell currently has. Please replace the existing code with tf.nn.rnn_cell.LSTMCell(name='basic_lstm_cell').


In [13]:
# Loss and optimizer
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=pred, labels=y))
optimizer = tf.train.RMSPropOptimizer(learning_rate=learning_rate).minimize(cost)
# Model evaluator
correct_pred = tf.equal(tf.argmax(pred,1), tf.argmax(y,1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

Instructions for updating:

Future major versions of TensorFlow will allow gradients to flow
into the labels input on backprop by default.

See `tf.nn.softmax_cross_entropy_with_logits_v2`.



In [14]:
start_time = time.time()
def elapsed(sec):
    if sec<60:
        return str(sec) + " sec"
    elif sec<(60*60):
        return str(sec/60) + " min"
    else:
        return str(sec/(60*60)) + " hr"

In [15]:
writer = tf.summary.FileWriter(logs_path)

In [16]:
session = tf.InteractiveSession()

In [17]:
# After reshaping to fit the feed dictionary, we run the optimization:
init = tf.global_variables_initializer()
display_step = 5

session.run(init)
step = 0
offset = random.randint(0,n_input+1)
end_offset = n_input + 1
acc_total = 0
loss_total = 0

writer.add_graph(session.graph)

while step < training_iters:
    # Generate a minibatch. Add some randomness on selection process.
    if offset > (len(training_data)-end_offset):
        offset = random.randint(0, n_input+1)

    symbols_in_keys = [ [dictionary[ str(training_data[i])]] for i in range(offset, offset+n_input) ]
    symbols_in_keys = np.reshape(np.array(symbols_in_keys), [-1, n_input, 1])

    symbols_out_onehot = np.zeros([vocab_size], dtype=float)
    symbols_out_onehot[dictionary[str(training_data[offset+n_input])]] = 1.0
    symbols_out_onehot = np.reshape(symbols_out_onehot,[1,-1])

    _, acc, loss, onehot_pred = session.run([optimizer, accuracy, cost, pred], \
                                            feed_dict={x: symbols_in_keys, y: symbols_out_onehot})
    loss_total += loss
    acc_total += acc
    if (step+1) % display_step == 0:
        print("Iter= " + str(step+1) + ", Average Loss= " + \
              "{:.6f}".format(loss_total/display_step) + ", Average Accuracy= " + \
              "{:.2f}%".format(100*acc_total/display_step))
        acc_total = 0
        loss_total = 0
        symbols_in = [training_data[i] for i in range(offset, offset + n_input)]
        symbols_out = training_data[offset + n_input]
        symbols_out_pred = reverse_dictionary[int(tf.argmax(onehot_pred, 1).eval())]
        print("%s - [%s] vs [%s]" % (symbols_in,symbols_out,symbols_out_pred))
    step += 1
    offset += (n_input+1)

Iter= 5, Average Loss= 6.418518, Average Accuracy= 0.00%
['negative', 'reviews', 'on'] - [various] vs [fault]
Iter= 10, Average Loss= 5.216174, Average Accuracy= 0.00%
['my', 'safest', 'bet.'] - [Phone] vs [researching]
Iter= 15, Average Loss= 6.223140, Average Accuracy= 0.00%
['great.', 'Set', 'up'] - [was] vs [researching]
Iter= 20, Average Loss= 6.167353, Average Accuracy= 0.00%
['audio', 'are', 'also'] - [great.] vs [researching]
Iter= 25, Average Loss= 5.631999, Average Accuracy= 0.00%
['hold', 'a', 'charge'] - [consistently] vs [researching]
Iter= 30, Average Loss= 6.369317, Average Accuracy= 0.00%
['sometimes', 'it', "doesn't."] - [I] vs [defective]
Iter= 35, Average Loss= 5.349265, Average Accuracy= 0.00%
['did', 'a', 'test'] - [to] vs [defective]
Iter= 40, Average Loss= 5.121295, Average Accuracy= 0.00%
['my', 'fault', 'or'] - [a] vs [defective]
Iter= 45, Average Loss= 4.834794, Average Accuracy= 0.00%
['I', 'was', 'incredibly'] - [hesitant] vs [defective]
Iter= 50, Average Lo

In [18]:
# Play with trained model:
sentence = ""
while sentence != "exit":
    prompt = "%s words: " % n_input
    sentence = input(prompt)
    sentence = sentence.strip()
    words = sentence.split(' ')
    if len(words) != n_input:
        continue
    try:
        symbols_in_keys = [dictionary[str(words[i])] for i in range(len(words))]
        for i in range(32):
            keys = np.reshape(np.array(symbols_in_keys), [-1, n_input, 1])
            onehot_pred = session.run(pred, feed_dict={x: keys})
            onehot_pred_index = int(tf.argmax(onehot_pred, 1).eval())
            sentence = "%s %s" % (sentence,reverse_dictionary[onehot_pred_index])
            symbols_in_keys = symbols_in_keys[1:]
            symbols_in_keys.append(onehot_pred_index)
        print(sentence)
    except Exception as ex:
        print("Exception: ", ex)
        print("Word not in dictionary")

# Close Tensorflow session.
session.close()

3 words:  I was good


I was good defective long long defective defective long long defective defective long long defective defective long long defective defective long long defective defective long long defective defective long long defective defective long long defective


3 words:  a long iPhone


Exception:  'iPhone'
Word not in dictionary


3 words:  a long word


Exception:  'word'
Word not in dictionary


3 words:  a a long


a a long defective defective long long defective defective long long defective defective long long defective defective long long defective defective long long defective defective long long defective defective long long defective defective long long


3 words:  exit


# TODOs
- TODO#2: train the dataset to a bigger one, with more documents.

In [2]:
# Please go to bbc.com or any other website to copy and 
# paste here a piece of text (not too long, not too short). 
text_longer = """
<REPLACE WITH YOUR TEXT HERE>
"""
# Next: please replicate the whole process again with this new text.

# TODOs
- TODO#3: add Tensorboard to visualize the graph and see training parameters.<br>
(You are recommended to hook the Tensorboard codes back into the above codes)

# Conclusions:
After this Notebook, you should know:
- How to build an RNN network for a language modelling task.
- How to stack multiple RNN layers.
- How to use an interactive session instead of the traditional session of Tensorflow.