In [1]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
from bs4 import BeautifulSoup
import os
import re
import pandas as pd
%matplotlib inline

In [2]:
def vocab2vec(vocab_size, vocab_length=10**7):
    f = open("Combined_String.txt", "r")
    s = f.read()
    f.close()
    D = 'abcdefghijklmnopqrstuvwxyz .,\'1234567890";'
    res = []
    for i in range(vocab_length):
        c = s[i].lower()
        v = np.zeros((vocab_size))
        try:
            idx = D.index(c)
            v[idx] = 1
            res.append(v)
        except (ValueError, IndexError) as e:
            pass
        
        
    ret = np.array(res) # A list of shape (vocab_length,) one-hot encoded characters
    print ("shape is: {}".format(ret.shape))
    return ret

#vocab2vec(40)

## Gated Recurrent Unit ##

- Arxiv paper [here](https://arxiv.org/pdf/1406.1078v3.pdf)
- How the seq2seq NMT model is set up here

In [None]:
# My implementation of GRU on character reading - based on the equation above
# TODO - not implemented yet
class GRU:
    def __init__(self, vocab_size, cell_size, batch_size, continue_training = False, global_step = -1):
        self.batch_size = batch_size
        self.vocab_size = vocab_size
        self.cell_size = cell_size
        self.global_step = global_step
        
        self.global_step = global_step
        self.MODEL_NAME = "./model/GRU"
        self.TEST_SAMPLE_SEQ_LENGTH = 100
        self._construct_networks(vocab_size, cell_size, batch_size, continue_training, global_step)
        
        
        
    def _weight(self, shape, dtype=tf.float32, name=None):
        m = 0
        s = 0.01
        return tf.Variable(tf.random_normal(shape=shape, mean=m, stddev=s, dtype=dtype), dtype, name=name)
    
    def _const(self, shape, name, dtype=tf.float32):
        d0 = shape[0]
        d1 = shape[1]
        tmp = np.zeros(shape=shape)
        tmp[:, 0] = np.ones(shape=[d0, 1])
        return tf.constant(tmp, dtype=dtype, name=name)
    
    def _ohe2char(self, ohe_vec): # takes only the first row in ohe_vec
        assert ohe_vec.shape[1] == self.vocab_size
        chars = 'abcdefghijklmnopqrstuvwxyz .,\'1234567890";'
        choice_id = np.random.choice(self.vocab_size, p=ohe_vec[0,:].ravel())
        return chars[choice_id]
            

    def _construct_networks(self, vocab_size, cell_size, batch_size, continue_training, global_step):
        graph = tf.Graph()
        with graph.as_default():
            x = tf.placeholder(tf.float32, [batch_size, vocab_size], name="x")
            y = tf.placeholder(tf.float32, [batch_size, vocab_size], name="y")
            init_h = tf.placeholder(tf.float32, [batch_size, vocab_size], name="init_h")
        
            if not continue_training:
                # Update gate
                Wz = self._weight([2 * vocab_size, cell_size], name="Wz")
                bz = self._const([1, cell_size], name="bz")
                f = tf.nn.softmax(tf.matmul(tf.concat([init_h, x], axis=1), Wz) + bz, dim=1)

                # Reset gate
                Wr = self._weight([2 * vocab_size, cell_size], name="Wi")
                br = self._const([1, cell_size], name="bi")
                i = tf.nn.softmax(tf.matmul(tf.concat([init_h, x], axis=1), Wi) + bi, dim=1)

                # Next cell state
                Wd = self._weight([2 * vocab_size, cell_size], name="Wd")
                bd = self._const([1, cell_size], name="bd")
                D = tf.tanh(tf.matmul(tf.concat([init_h, x], axis=1), Wd) + bd)

                # Update cell state
                C = tf.add(f * init_C, i * D, name="C")

                # Output layers
                Wo = self._weight([2 * vocab_size, vocab_size], name="Wo")
                bo = self._const([1, vocab_size], name="bo")
                o = tf.nn.softmax(tf.matmul(tf.concat([init_h, x], axis=1), Wo) + bo, dim=1)
                h = tf.multiply(o, tf.tanh(C), name="h")
                hs = tf.nn.softmax(h)

                # Loss function, etc.
                #hs = tf.nn.softmax(h, dim=1) # Convert h into softmax form
                loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=y, logits=h, name="loss"))
                optimizer = tf.train.AdamOptimizer()
                grad_limit = tf.constant(5.0, dtype=tf.float32, name="grad_limit")
                grads_and_vars = optimizer.compute_gradients(loss)
                clipped_grads_and_vars = []
                for grad, var in grads_and_vars:

                    clipped_grad = tf.clip_by_value(grad, -grad_limit, grad_limit)
                    clipped_grads_and_vars.append((clipped_grad, var))
                train_step = optimizer.apply_gradients(clipped_grads_and_vars, name="train")


                # Session, Saver, etc.
                saver = tf.train.Saver()
                sess = tf.Session()
                sess.run(tf.global_variables_initializer())
                saver.save(sess, self.MODEL_NAME,global_step=0)

            else:
                sess = tf.Session()

                saver = tf.train.import_meta_graph(self.MODEL_NAME + "-{}.meta".format(global_step))
                saver.restore(sess,tf.train.latest_checkpoint('./'))

                graph = tf.get_default_graph()

                x = graph.get_tensor_by_name("x:0")
                y = graph.get_tensor_by_name("y:0")
                init_C = graph.get_tensor_by_name("init_C:0")
                init_h = graph.get_tensor_by_name("init_h:0")
                C = graph.get_tensor_by_name("C:0")
                h = graph.get_tensor_by_name("h:0")
                Wf = graph.get_tensor_by_name("Wf:0")
                Wi = graph.get_tensor_by_name("Wi:0")
                Wd = graph.get_tensor_by_name("Wd:0")
                Wo = graph.get_tensor_by_name("Wo:0")
                J = graph.get_tensor_by_name("J:0")

                train_step = graph.get_tensor_by_name("train:0")
            
        
        # After creation, save to class variables
        self.x = x
        self.y = y
        self.init_C = init_C
        self.init_h = init_h
        self.C = C
        self.h = h
        self.hs = hs
        self.loss = loss
        self.train_step = train_step
        self.saver = saver
        self.sess = sess
        
        

        
    def train(self, steps, training_data, sample = True, sample_every = 200000, save_per_step = 1000):
        save_per_steps = 10
        batch_size = self.batch_size
        vocab_size = self.vocab_size
        cell_size = self.cell_size
        
        for stp in range(steps):
            
            prev_C = np.random.rand(batch_size, cell_size)
            prev_h = np.random.rand(batch_size, vocab_size)
            p = 0
            while p < (len(training_data) - batch_size - 1):
                self.global_step
                fdata = {self.init_C: prev_C, 
                         self.init_h: prev_h,
                         self.x: training_data[p : p + batch_size], 
                         self.y: training_data[p+1 : p+1+batch_size]
                         }
                _, prev_C, prev_h, loss = self.sess.run([self.train_step, self.C, self.h, self.loss], feed_dict = fdata)
                
                p += batch_size
                
            
                if sample and p % sample_every == 0 and stp % 100 == 0:
                    # Perform a trial of sample run 
                    words_outputs = ""
                    for i in range(self.TEST_SAMPLE_SEQ_LENGTH):
                        fdata = {self.init_C: prev_C,
                                self.init_h: prev_h,
                                self.x: training_data[p : p + batch_size],
                                 self.y: training_data[p+1 : p+1+batch_size]
                                }
                        _, vec_ohe, loss = self.sess.run([self.C, self.hs, self.loss], feed_dict = fdata)

                        words_outputs += self._ohe2char(vec_ohe)
                    
                    print ("--- n = {}, p = {}, loss = {} ---".format(self.global_step, p, loss))
                    print ("{}\n".format(words_outputs))
                    
                    
            if self.global_step % save_per_steps == 0:
                self.saver.save(self.sess, self.MODEL_NAME, global_step = self.global_step)
        
            self.global_step += 1
    
    
if __name__ == "__main__":
    print ("Started!")
    lstm = BasicLSTM(vocab_size = 40,
                    cell_size = 40, # They have to be equal. GGWP
                    batch_size = 1000,
                    continue_training = False,
                    global_step = -1)
    training_words = vocab2vec(40, 10 ** 6)
    lstm.train(steps = 10**5, training_data = training_words, sample = True, sample_every = 500000, save_per_step = 100000)    