# LSTM Exploration

-------------------------------------------------------------------------------------------------------------------
# Technology used: Tensorflow
<br>
### The purpose of this notebook is to experiment with the lstm package in tensorflow and try to understand the intricate subtleties of LSTM networks 

I start with the usual cells for utility purposes.

In [1]:
# packages used for processing: 
import cPickle as pickle # for reading the data
import matplotlib.pyplot as plt # for visualization
import numpy as np

# for operating system related stuff
import os
import sys # for memory usage of objects
from subprocess import check_output

# the boss of frameworks
import tensorflow as tf

# for dataset building:
import collections

# for regex based preprocessing
import re

# to plot the images inline
%matplotlib inline

In [2]:
# Input data files are available in the "../Data/" directory.

def exec_command(cmd):
    '''
        function to execute a shell command and see it's 
        output in the python console
        @params
        cmd = the command to be executed along with the arguments
              ex: ['ls', '../input']
    '''
    print(check_output(cmd).decode("utf8"))

In [3]:
# check the structure of the project directory
exec_command(['ls', '..'])

Data
Models
Scripts



In [4]:
''' Set the constants for the script '''

# various paths of the files
data_path = "../Data/WikiSQL/data" # the data path

train_files = {
    "questions": os.path.join(data_path, "train.jsonl"),
    "tables": os.path.join(data_path, "train.tables.jsonl")
}

base_model_path = '../Models'

processed_data_file_path = os.path.join(data_path, "processed.pickle")
plug_and_play_data_file_path = os.path.join(data_path, "plug_and_play.pickle")

# constants:
matcher_regex = r"[\w']+|[.,!?;\"]"
vocab_size = 55000 # total words in our vocabulary
lstm_hidden_state_size = 128 # hidden state size
input_seqs_length = 45
output_seqs_length = 85
no_of_epochs = 500000
batch_size = 64 # we look at only 64 examples in a single batch            
list_length = 8 # we send in 8 such batches at a time:
checkpoint_factor = 5000 # save the model after every 5 epochs

In [5]:
# check the contents of the data path
exec_command(['ls', data_path])

dev.db
dev.jsonl
dev.tables.jsonl
plug_and_play.pickle
processed.pickle
test.db
test.jsonl
test.tables.jsonl
train.db
train.jsonl
train.tables.jsonl



In [6]:
# create a function to unpickle the data into a python object
def unpickle(pickle_file):
    '''
        function to unpickle the pickle file into a python compatible object
        @param
        pickle => the pickle file path
        @return => the unpickled object
    '''
    with open(pickle_file) as dumper:
        return pickle.load(dumper)

In [7]:
import numpy as np
from random import shuffle
 
train_input = ['{0:020b}'.format(i) for i in range(2**20)]
shuffle(train_input)
train_input = [map(int,i) for i in train_input]
ti  = []
for i in train_input:
    temp_list = []
    for j in i:
            temp_list.append([j])
    ti.append(np.array(temp_list))
train_input = ti

In [8]:
np.array(train_input).shape

(1048576, 20, 1)

In [9]:
type(train_input)

list

In [10]:

train_output = []
 
for i in train_input:
    count = 0
    for j in i:
        if j[0] == 1:
            count+=1
    temp_list = ([0]*21)
    temp_list[count]=1
    train_output.append(temp_list)

In [11]:
np.array(train_output).shape

(1048576, 21)

In [12]:
print train_output[0]

[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0]


In [13]:
NUM_EXAMPLES = 10000
test_input = train_input[NUM_EXAMPLES:]
test_output = train_output[NUM_EXAMPLES:] #everything beyond 10,000
 
train_input = train_input[:NUM_EXAMPLES]
train_output = train_output[:NUM_EXAMPLES] #till 10,000

In [14]:
len(train_input), len(test_input)

(10000, 1038576)

In [15]:
data = tf.placeholder(tf.float32, [None, 20,1])
target = tf.placeholder(tf.float32, [None, 21])

In [16]:
num_hidden = 100
cell = tf.nn.rnn_cell.LSTMCell(num_hidden, state_is_tuple=True)

In [17]:
val, state = tf.nn.dynamic_rnn(cell, data, dtype=tf.float32)

In [18]:
val

<tf.Tensor 'rnn/transpose:0' shape=(?, 20, 100) dtype=float32>

In [19]:
c = state[0]; h = state[1]
c.shape, h.shape

(TensorShape([Dimension(None), Dimension(100)]),
 TensorShape([Dimension(None), Dimension(100)]))

In [20]:
# transpose the val tensor and carve out the last output as a slice
val = tf.transpose(val, [1, 0, 2])
last = val[val.shape[0] - 1, :, :]

In [21]:
val.shape

TensorShape([Dimension(20), Dimension(None), Dimension(100)])

In [22]:
last.shape

TensorShape([Dimension(None), Dimension(100)])

In [23]:
# define the weights and biases for projections
weight = tf.Variable(tf.truncated_normal((num_hidden, int(target.shape[1]))))
bias = tf.Variable(tf.constant(0.1, shape=[1, int(target.shape[1])]))

In [24]:
weight, bias

(<tf.Variable 'Variable:0' shape=(100, 21) dtype=float32_ref>,
 <tf.Variable 'Variable_1:0' shape=(1, 21) dtype=float32_ref>)

In [25]:
prediction = tf.nn.softmax(tf.matmul(last, weight) + bias)

In [26]:
prediction, target

(<tf.Tensor 'Softmax:0' shape=(?, 21) dtype=float32>,
 <tf.Tensor 'Placeholder_1:0' shape=(?, 21) dtype=float32>)

In [27]:
# define the loss function to be minimized
cross_entropy = -tf.reduce_sum(target * tf.log(tf.clip_by_value(prediction,1e-10,1.0)))

In [28]:
# define the training step
optimizer = tf.train.AdamOptimizer()
train_step = optimizer.minimize(cross_entropy)

In [29]:
# error calculation on the test dataset
mistakes = tf.not_equal(tf.argmax(target, 1), tf.argmax(prediction, 1))
error = tf.reduce_mean(tf.cast(mistakes, tf.float32))

In [31]:
# create and initialize the session:
init_op = tf.global_variables_initializer()
sess = tf.InteractiveSession()
sess.run(init_op)

In [36]:
# running the session:
batch_size = 1000
no_of_batches = int(len(train_input)/batch_size)
epoch = 200


In [37]:
np.array(train_input).shape, np.array(train_output).shape

((10000, 20, 1), (10000, 21))

In [38]:
loss_plot = []

In [66]:
for i in range(epoch):
    ptr = 0
    for j in range(no_of_batches):
        inp, out = train_input[ptr:ptr+batch_size], train_output[ptr:ptr+batch_size]
        ptr+=batch_size
        _, loss = sess.run((train_step, cross_entropy), feed_dict={data: inp, target: out})
    print "Epoch - ", str(i), "Loss - ", loss
    loss_plot.append(loss)

Epoch -  0 Loss -  11.2553
Epoch -  1 Loss -  11.1778
Epoch -  2 Loss -  11.1007
Epoch -  3 Loss -  11.0243
Epoch -  4 Loss -  10.9483
Epoch -  5 Loss -  10.8729
Epoch -  6 Loss -  10.7981
Epoch -  7 Loss -  10.7237
Epoch -  8 Loss -  10.6499
Epoch -  9 Loss -  10.5767
Epoch -  10 Loss -  10.504
Epoch -  11 Loss -  10.4317
Epoch -  12 Loss -  10.3599
Epoch -  13 Loss -  10.2885
Epoch -  14 Loss -  10.2175
Epoch -  15 Loss -  10.147
Epoch -  16 Loss -  10.0769
Epoch -  17 Loss -  10.0073
Epoch -  18 Loss -  9.93825
Epoch -  19 Loss -  9.86967
Epoch -  20 Loss -  9.80163
Epoch -  21 Loss -  9.73401
Epoch -  22 Loss -  9.66679
Epoch -  23 Loss -  9.5999
Epoch -  24 Loss -  9.53342
Epoch -  25 Loss -  9.46725
Epoch -  26 Loss -  9.40146
Epoch -  27 Loss -  9.33597
Epoch -  28 Loss -  9.27088
Epoch -  29 Loss -  9.20621
Epoch -  30 Loss -  9.14194
Epoch -  31 Loss -  9.07818
Epoch -  32 Loss -  9.01483
Epoch -  33 Loss -  8.95197
Epoch -  34 Loss -  8.88953
Epoch -  35 Loss -  8.82747
Epoch

In [60]:
incorrect = sess.run(error,{data: test_input[:1000], target: test_output[:1000]})
print('error {:3.1f}%'.format(i + 1, 100 * incorrect))

error 200.0%


In [67]:
state = h.eval(feed_dict={data: test_input[:2], target: test_output[:2]})

In [68]:
state.shape

(2, 100)

In [63]:
out = val.eval(feed_dict={data: test_input[:1], target: test_output[:1]})

In [64]:
lest = out[19]

In [65]:
lest.shape

(1, 100)

In [58]:
lest

array([[  6.08171761e-01,  -8.43231618e-01,  -2.61537850e-01,
         -8.70031893e-01,   3.23232532e-01,  -8.63342762e-01,
         -6.83092475e-01,   5.76106124e-02,  -2.69228853e-02,
          4.56251949e-01,   6.39560401e-01,  -5.44083714e-01,
          4.32975024e-01,  -1.99097767e-02,  -1.63227916e-01,
         -6.57440841e-01,   7.89873004e-01,  -4.85014290e-01,
          5.26652753e-01,  -9.70201194e-01,   9.07053173e-01,
         -7.56672323e-01,   3.65930170e-01,   8.61710496e-03,
         -1.55350432e-01,   9.30902362e-01,   7.94562697e-01,
         -6.50904417e-01,   9.37372625e-01,   7.20937625e-02,
          1.50268003e-01,   1.14286486e-02,   5.46257317e-01,
         -8.38700458e-02,  -5.87568693e-02,  -5.64977169e-01,
         -3.62886667e-01,  -9.52773094e-01,   9.22005415e-01,
         -9.20509875e-01,  -2.86670059e-01,   8.90155315e-01,
          8.74028921e-01,  -1.28616452e-01,   6.53341472e-01,
          9.86320078e-01,  -7.93899346e-07,  -9.22212005e-01,
        

In [59]:
state

array([[  6.08171761e-01,  -8.43231618e-01,  -2.61537850e-01,
         -8.70031893e-01,   3.23232532e-01,  -8.63342762e-01,
         -6.83092475e-01,   5.76106124e-02,  -2.69228853e-02,
          4.56251949e-01,   6.39560401e-01,  -5.44083714e-01,
          4.32975024e-01,  -1.99097767e-02,  -1.63227916e-01,
         -6.57440841e-01,   7.89873004e-01,  -4.85014290e-01,
          5.26652753e-01,  -9.70201194e-01,   9.07053173e-01,
         -7.56672323e-01,   3.65930170e-01,   8.61710496e-03,
         -1.55350432e-01,   9.30902362e-01,   7.94562697e-01,
         -6.50904417e-01,   9.37372625e-01,   7.20937625e-02,
          1.50268003e-01,   1.14286486e-02,   5.46257317e-01,
         -8.38700458e-02,  -5.87568693e-02,  -5.64977169e-01,
         -3.62886667e-01,  -9.52773094e-01,   9.22005415e-01,
         -9.20509875e-01,  -2.86670059e-01,   8.90155315e-01,
          8.74028921e-01,  -1.28616452e-01,   6.53341472e-01,
          9.86320078e-01,  -7.93899346e-07,  -9.22212005e-01,
        

In [57]:
state == lest

array([[ True,  True,  True,  True,  True,  True,  True,  True,  True,
         True,  True,  True,  True,  True,  True,  True,  True,  True,
         True,  True,  True,  True,  True,  True,  True,  True,  True,
         True,  True,  True,  True,  True,  True,  True,  True,  True,
         True,  True,  True,  True,  True,  True,  True,  True,  True,
         True,  True,  True,  True,  True,  True,  True,  True,  True,
         True,  True,  True,  True,  True,  True,  True,  True,  True,
         True,  True,  True,  True,  True,  True,  True,  True,  True,
         True,  True,  True,  True,  True,  True,  True,  True,  True,
         True,  True,  True,  True,  True,  True,  True,  True,  True,
         True,  True,  True,  True,  True,  True,  True,  True,  True,
         True]], dtype=bool)

In [None]:
# close the session and let it rest.
sess.close()