In [1]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
from bs4 import BeautifulSoup
import os
import re
import pandas as pd
%matplotlib inline

In [2]:
def vocab2vec(vocab_size, vocab_length=10**7):
    f = open("Combined_String.txt", "r")
    s = f.read()
    f.close()
    D = 'abcdefghijklmnopqrstuvwxyz .,\'1234567890";'
    res = []
    for i in range(vocab_length):
        c = s[i].lower()
        v = np.zeros((vocab_size))
        try:
            idx = D.index(c)
            v[idx] = 1
            res.append(v)
        except (ValueError, IndexError) as e:
            pass
        
        
    ret = np.array(res) # A list of shape (vocab_length,) one-hot encoded characters
    print ("shape is: {}".format(ret.shape))
    return ret

#vocab2vec(40)

# What is LSTM? #  
- [Nico's blog on LSTM](http://nicodjimenez.github.io/2014/08/08/lstm.html)  
- [Colah's blog on LSTM](http://colah.github.io/posts/2015-08-Understanding-LSTMs)  
- Written in equation:
    Each LSTM cell has three inputs (character $x_t$, cell prediction $h_{t-1}$, and hidden state $C_{t-1}$) and two outputs (hidden state $C_t$ and cell prediction $h_t$).  
    Forget gate: 
    $$f_t = \sigma (W_f [h_{t-1}, x_t] + b_f)$$   
    Information gate: 
    $$i_t = \sigma (W_i [h_{t-1}, x_t] + b_i)$$  
    Updates for cell state:
    $$D_t = tanh (W_D [h_{t-1}, x_t] + b_D)$$  
    $$C_t = f_t * C_{t-1} + i_t * D_t$$
    Output layers:  
    $$o_t = \sigma (W_o [h_{t-1}, x_t] + b_o)$$
    $$h_t = o_t * tanh(C_t)$$
    
- Training goal:  
    $argmin_W J$, where
    $$J = \sum_t (y_t log h_t)$$

**LSTM used for reading paragraphs character by character**
- Cell prediction is the next (batch of) character, given the input and previous states.
- The first character in prediction sequence is used to calculate the cross entropy.


**Existing code examples:**  
- [Aymeric Damien's TensorFlow-Examples](https://github.com/aymericdamien/TensorFlow-Examples/blob/master/examples/3_NeuralNetworks/recurrent_network.py)  

In [None]:
# My implementation of LSTM on character reading - based on the equation above

class basicLSTM:
    def __init__(self, vocab_size, cell_size, batch_size, continue_training, global_step):
        self._construct_networks(continue_training, global_step)
        self.global_step = global_step
        self.MODEL_NAME = "LSTM"
        
    def _weight(self, shape, dtype=tf.float32, name):
        m = 0
        s = 0.01
        return tf.Variable(tf.random_normal(shape=shape, mean=m, stddev=s, dtype), dtype, name=name)
    
    def _const(self, shape, dtype, name):
        d1 = shape[0]
        d2 = shape[1]
        tmp = np.zeros(shape=shape)

        tmp[:, 0] = np.ones(shape=[d1,])
        return tf.constant(tmp, dtype=dtype, name=name)

    def _construct_networks(self, continue_training, global_step):
        x = tf.placeholder([batch_size, vocab_size], name="x")
        y = tf.placeholder([batch_size, vocab_size], name="y")
        init_C = tf.placeholder([batch_size, cell_size], name="init_C")
        init_h = tf.placeholder([batch_size, vocab_size], name="init_h")
        
        if not continue_training:
            # Fotget gate
            Wf = self._weight([2 * vocab_size, cell_size], name="Wf")
            bf = self._const([1, cell_size], name="bf")
            f = tf.nn.softmax(tf.matmul(tf.concat([init_h, x], axis=1), Wf) + bf, dim=1)

            # Info gate
            Wi = self._weight([2 * vocab_size, cell_size], name="Wi")
            bi = self._const([[1, cell_size]], name="bi")
            i = tf.nn.softmax(tf.matmul(tf.concat([init_h, x], axis=1), Wi) + bi, dim=1)

            # Next cell state
            Wd = self._weight([2 * vocab_size, cell_size], name="Wd")
            bd = self._const([1, cell_size], name="bd")
            D = tf.tanh(tf.matmul(tf.concat([init_h, x], axis=1), Wd) + bd)

            # Update cell state
            C = tf.add(f * init_C, i * D, name="C")

            # Output layers
            Wo = self._weight([2 * vocab_size, cell_size], name="Wo")
            bo = self._const([1, cell_size], name="bo")
            o = tf.nn.softmax(tf.matmul(tf.concat([init_h, x], axis=1), Wo) + bo, dim=1)
            h = tf.multiply(o, tf.tanh(C), name="h")
            
            # Loss function, etc
            hs = tf.nn.softmax(h, dim=1) # Convert h into softmax form
            J = tf.nn.softmax_cross_entropy_with_logits(labels=y, logits=hs)
            optimizer = tf.train.AdamOptimizer()
            grads_and_vars = optimizer.compute_gradients(J)
            for grad, var in grads_and_vars:
                clipped_grad = tf.clip_by_value(grad, -grad_limit, grad_limit)
                clipped_grads_and_vars.append((clipped_grad, var))
            train_step = self.optimizer.apply_gradients(clipped_grads_and_vars, name="train:0")


            # Session, Saver, etc
            saver = tf.train.Saver()
            sess = tf.Session()
            sess.run(tf.global_variables_initializer())
            saver.save(sess, self.MODEL_NAME,global_step=0)
            
        else:
            sess = tf.Session()
            
            saver = tf.train.import_meta_graph(self.MODEL_NAME + "-{}.meta".format(global_step))
            saver.restore(sess,tf.train.latest_checkpoint('./'))
            
            graph = tf.get_default_graph()
            
            x = graph.get_tensor_by_name("x:0")
            y = graph.get_tensor_by_name("y:0")
            init_C = graph.get_tensor_by_name("init_C:0")
            init_h = graph.get_tensor_by_name("init_h:0")
            C = graph.get_tensor_by_name("C:0")
            h = graph.get_tensor_by_name("h:0")
            Wf = graph.get_tensor_by_name("Wf:0")
            Wi = graph.get_tensor_by_name("Wi:0")
            Wd = graph.get_tensor_by_name("Wd:0")
            Wo = graph.get_tensor_by_name("Wo:0")
            
            train_step = graph.get_tensor_by_name("train:0")
            
        
        
        
        return x, y, init_C, init_h, C, h, train_step, saver, sess
        

        
    def train(self):
        stp = self.global_step
        #TODO
    
    def evaluate(self):
        pass
    
    