In [1]:
import tensorflow as tf
import numpy as np
%matplotlib inline
from tensorflow.models.rnn.ptb import reader
import time
import os
import urllib.request

In [2]:
%load_ext autoreload
%autoreload 2

In [3]:
"download dataset"
file_url = 'https://raw.githubusercontent.com/jcjohnson/torch-rnn/master/data/tiny-shakespeare.txt'
file_name = 'tinyshakespeare.txt'
if not os.path.isfile(file_name):
    urllib.request.urlretrieve(file_url, file_name)

In [4]:
data = open(file_name).read() # This contains all the file in a string in memory
print("the size of the data is {}".format(len(data)))
vocab = set(data) # upper-case and lower-case characters are different
vocab_size = len(vocab)
idx_to_vocab = dict(enumerate(vocab))
vocab_to_idx = dict(zip(idx_to_vocab.values(), idx_to_vocab.keys()))

the size of the data is 1115394


In [5]:
numeric_data = list(map(lambda x: vocab_to_idx[x], data))
del data

In [6]:
def gen_epochs(num_epochs, batch_size, num_steps):
    for _ in range(num_epochs):
        yield reader.ptb_iterator(numeric_data, batch_size, num_steps)

In [7]:
# Using the previously written code to build a language model
from basic_rnn_using_tensorflow_api import BasicRNN
from basic_lstm_using_tensorflow_api import BasicLSTM
from basic_lstm_using_dynamicRNN import DynamicLSTM
from basic_lstm_using_tfScan import DynamicScannedLSTM

In [49]:
state_size = 100
batch_size = 32
num_steps = 200
num_classes = vocab_size
inlayer_dropout = 0.6
learning_rate = 0.0001

### Basic RNN with one layer

In [50]:
tf.reset_default_graph()
tic = time.time()
language_model = BasicRNN(state_size=state_size, num_steps=num_steps, batch_size=batch_size,
                          num_classes=num_classes, num_layers=1, inlayer_dropout=inlayer_dropout,
                          learning_rate=learning_rate)
toc = time.time()
print("The time took to build the basic RNN model from list is ", toc - tic)

The time took to build the basic RNN model from list is  28.253008604049683


In [51]:
tic = time.time()
for n_epoch, epoch in enumerate(gen_epochs(3, batch_size, num_steps)):
    loss, step = 0, 0
    for batch in epoch:
        loss += language_model.update_params(batch)
        step += 1
    print("loss after {0} epoch is {1:0.2f}".format(n_epoch, loss / step))
toc = time.time()
print("time taken to finish this simulatin is {0:0.2f} minutes".format((toc - tic) / 60))

loss after 0 epoch is 3.80
loss after 1 epoch is 3.37
loss after 2 epoch is 3.24
time taken to finish this simulatin is 0.86 minutes


### Basic RNN with 3-layers

In [28]:
tf.reset_default_graph()
tic = time.time()
language_model_3_layer_rnn = BasicRNN(state_size=state_size, num_steps=num_steps, num_layers=3, 
                                      batch_size=batch_size, inlayer_dropout=inlayer_dropout,
                                      num_classes=num_classes, learning_rate=learning_rate)
toc = time.time()
print("The time took to build the basic RNN model with 3 layers from list is ", toc - tic)

The time took to build the basic RNN model with 3 layers from list is  41.43037939071655


In [29]:
tic = time.time()
for n_epoch, epoch in enumerate(gen_epochs(3, batch_size, num_steps)):
    loss, step = 0, 0
    for batch in epoch:
        loss += language_model_3_layer_rnn.update_params(batch)
        step += 1
    print("loss after {0} epoch is {1:0.2f}".format(n_epoch, loss / step))
toc = time.time()
print("time taken to finish this simulatin is {0:0.2f}".format((toc - tic) / 60))

loss after 0 epoch is 3.52
loss after 1 epoch is 3.20
loss after 2 epoch is 2.93
time taken to finish this simulatin is 1.17


### Basic LSTM with 3 layers using tf.rnn api 

In [53]:
tf.reset_default_graph()
tic = time.time()
language_model_3_layer_lstm = BasicLSTM(state_size=state_size, num_steps=num_steps, 
                                        num_layers=3, batch_size=batch_size, inlayer_dropout=inlayer_dropout,
                                        num_classes=num_classes, learning_rate=learning_rate)
toc = time.time()
print("The time took to build the basic LSTM model with 3 layers from list is ", toc - tic)

The time took to build the basic LSTM model with 3 layers from list is  152.77650666236877


In [31]:
tic = time.time()
for n_epoch, epoch in enumerate(gen_epochs(3, batch_size, num_steps)):
    loss, step = 0, 0
    for batch in epoch:
        loss += language_model_3_layer_lstm.update_params(batch)
        step += 1
    print("loss after {0} epoch is {1:0.2f}".format(n_epoch, loss / step))
toc = time.time()
print("time taken to finish this simulatin is {0:0.2f}".format((toc - tic) / 60))

loss after 0 epoch is 3.62
loss after 1 epoch is 3.35
loss after 2 epoch is 3.34
time taken to finish this simulatin is 4.62


It is taking lot of time to just build the model. This is not a problem during training time because we have to build the model only once. But it could be a problem during test time where we may have to build the model multiple times. We can use a `Tensorflow` api `DynamicRNN` that can delay the creation of the graph to the run time. 

### Basic LSTM using tf.dynamic_rnn api

In [32]:
tf.reset_default_graph()
tic = time.time()
language_model_dynamic_lstm = DynamicLSTM(state_size=state_size, num_steps=num_steps, num_layers=3, 
                                         batch_size=batch_size, num_classes=num_classes, 
                                         learning_rate=learning_rate)
toc = time.time()
print("The time took to build the dynamic LSTM model with 3 layers from list is ", toc - tic)

The time took to build the dynamic LSTM model with 3 layers from list is  1.6617040634155273


In [33]:
tic = time.time()
for n_epoch, epoch in enumerate(gen_epochs(3, batch_size, num_steps)):
    loss, step = 0, 0
    for batch in epoch:
        loss += language_model_dynamic_lstm.update_params(batch)
        step += 1
    print("loss after {0} epoch is {1:0.2f}".format(n_epoch, loss / step))
toc = time.time()
print("time taken to finish this simulatin is {0:0.2f}".format((toc - tic) / 60))

loss after 0 epoch is 3.60
loss after 1 epoch is 3.35
loss after 2 epoch is 3.34
time taken to finish this simulatin is 4.63


### Basic LSTM using tf.scan api

In [54]:
tf.reset_default_graph()
tic = time.time()
language_model_scanned_lstm = DynamicScannedLSTM(state_size=state_size, num_steps=num_steps, num_layers=3, 
                                                 batch_size=batch_size, num_classes=num_classes, 
                                                 inlayer_dropout=inlayer_dropout
                                                 learning_rate=learning_rate)
toc = time.time()
print("The time took to build the dynamic LSTM model with 3 layers from list is ", toc - tic)

The time took to build the dynamic LSTM model with 3 layers from list is  1.947620153427124


In [35]:
tic = time.time()
for n_epoch, epoch in enumerate(gen_epochs(3, batch_size, num_steps)):
    loss, step = 0, 0
    for batch in epoch:
        loss += language_model_scanned_lstm.update_params(batch)
        step += 1
    print("loss after {0} epoch is {1:0.2f}".format(n_epoch, loss / step))
toc = time.time()
print("time taken to finish this simulatin is {0:0.2f}".format((toc - tic) / 60))

loss after 0 epoch is 3.60
loss after 1 epoch is 3.35
loss after 2 epoch is 3.33
time taken to finish this simulatin is 4.73


In [41]:
sess.close()
sess = tf.InteractiveSession()

In [47]:
tf.nn.rnn_cell.DropoutWrapper?

In [46]:
sess.run(tf.nn.dropout([[1, 1, 1, 1.], [2, 2, 2, 2]], 0.5))

array([[ 0.,  0.,  0.,  0.],
       [ 4.,  0.,  0.,  4.]], dtype=float32)

** a small experiment **

In [None]:
batch_size = 4 
state_size = 3
vocab_size = 5
num_steps = 2

In [None]:
if sess: sess.close()
tf.reset_default_graph()
x = tf.placeholder(tf.int32, shape=(batch_size, num_steps), name="x")
y = tf.placeholder(tf.int32, shape=(batch_size, num_steps), name="y")


# word embeddings
W_embeddings = tf.get_variable("embeddings", shape=(vocab_size, state_size), 
                               initializer=tf.constant_initializer(1.0))
#rnn_inputs
rnn_inputs = tf.nn.embedding_lookup(W_embeddings, x)

#LSTM cell
lstm_cell = tf.nn.rnn_cell.LSTMCell(state_size, state_is_tuple=True)
lstm_cell = tf.nn.rnn_cell.MultiRNNCell([lstm_cell] * 3, state_is_tuple=True)
init_state = lstm_cell.zero_state(batch_size, tf.float32)

#LSTM layer
outputs, final_states = tf.nn.dynamic_rnn(lstm_cell, rnn_inputs, initial_state=init_state)

In [None]:
x_ = [[1, 1], [2, 2], [3, 3], [4, 4]]
y_ = [[1, 1], [2, 2], [3, 3], [4, 4]]

In [None]:
feed = {x: x_, y: y_}

In [None]:
rnn_inputs_transposed = tf.transpose(rnn_inputs, [1, 0, 2])

In [None]:
scanned_lstm = tf.scan(lambda a, x: lstm_cell(x, a[1]), rnn_inputs_transposed, 
                       initializer=(tf.zeros((batch_size, state_size)), init_state))

In [None]:
sess = tf.InteractiveSession()

In [None]:
sess.run(tf.initialize_all_variables())

In [None]:
t = scanned_lstm[0].eval(feed)

In [None]:
o = outputs.eval(feed)

In [None]:
rnn_inputs_transposed.eval(feed)

In [None]:
final_states[0][-1].eval(feed)

In [None]:
scanned_lstm[1][-1][0].eval(feed)

In [None]:
tf.slice?

In [None]:
f = scanned_lstm[1]

In [None]:
tf.unpack(f[0])

In [None]:
scanned_lstm[1][0][0][num_steps - 1]

In [None]:
o

In [None]:
init_state