# GRU RNN
This file is to self implement a GRU RNN in hopes for the Yelp Dataset Challenge

In [1]:
import pandas as pd
import sklearn as sk
import numpy as np
import tensorflow as tf
%matplotlib inline
import matplotlib.pyplot as plt
import seaborn
import langdetect
import operator
import joblib
import itertools

# import utils

import sys
import argparse
import random

## GRU_rnn class and Computation Graph Building

__init__: this part of the code takes in parameters to be used for matrix dimensions later

__graph__: main part that constructs the computation graph. It first declares placeholders and variables that be used at the start. The __step__ function performs computations for all the gates and states for all layers then stores the results and returns it stacked

tf.scan is used for faster computation?

In [4]:
BATCH_SIZE = 128

class GRU_rnn():
    def __init__(self, state_size, num_classes, num_layers,
                ckpt_path='ckpt/gru2/',
                model_name='gru2'):
        # Initialize parameter variables for GRU
        self.state_size = state_size
        self.num_classes = num_classes
        self.num_layers = num_layers
        self.ckpt_path = ckpt_path
        self.model_name = model_name
        
        # build graph ops
        
        def __graph__():
            tf.reset_default_graph() # Clears graph stack
            X_input = tf.placeholder(shape=[None, None], dtype=tf.int32)
            y_input = tf.placeholder(shape=[None], dtype=tf.int32)
            
            # embeddings
            embs = tf.get_variable('emb', [num_classes, state_size])
            rnn_inputs = tf.nn.embedding_lookup(emb, X_input)
            # initial hidden state
            inital_state = tf.placeholder(shape=[num_layers, None, state_size],
                                         dtype=tf.float32, name='initial_state')
            # initializer
            xavier_init = tf.contrib.layers.xavier_initializer
            # Params
            W = tf.get_variable('W',
                               shape=[num_layers, 3, self.state_size, self.state_size],
                               initializer=xavier_init())
            U = tf.get_variable('U', 
                                shape=[num_layers, 3, self.state_size, self.state_size],
                               initializer=xavier_init())
            b = tf.get_variable('b',
                               shape=[num_layers, self.state_size],
                               initializer=tf.constant_initializer(0.))
            
            def step(st_1, x):
                st = []
                x_in = x
                for i in range(num_layers):
                    # Update Gate
                    z = tf.sigmoid(tf.matmul(x_in, U[i][0]) + tf.matmul(st_1[i], W[i][0]))
                    # Reset Gate
                    r = tf.sigmoid(tf.matmul(x_in, U[i][1]) + tf.matmul(st_1[i], W[i][1]))
                    # hidden gate
                    h = tf.tanh(tf.matmul(x_in, U[i][2]) + tf.matmul((r*st_1[i]), W[i][2]))
                    # New State
                    st_i = (1-z)*h + (z*st_1[i])
                    x_in = st_i
                    st.append(st_i)
                return tf.stack(st)
            
            ###
            # tf scan operation for faster computation
            ###
            states = tf.scan(step,
                            tf.transpose(rnn_inputs, [1,0,2]),
                            initializer=init_state)
            ###
            # Get last state before reshape
            ###
            last_state = states[-1]
            
            ###
            # Predictions
            ###
            V = tf.get_variable('V', shape=[state_size, num_classes],
                               initializer=xavier_init())
            bo = tf.get_variable('bo', shape=[num_classes],
                                initializer=tf.constant_initializer(0.))
            
            states = tf.transpose(states, [1,2,0,3])[-1]
            # Flatten to 2d for matmul with V
            states_flat = tf.reshape(states, [-1, state_size])
            logits = tf.matmul(states_flat, V) + bo
            # predictions
            predictions = tf.nn.softmax(logits)
            
            #losses = tf.keras.losses.sparse_categorical_crossentropy()
            losses = tf.nn.spare_softmax_cross_entropy_with_logits(logits, y_input)
            loss = tf.reduce_mean(losses)
            
            train_op = tf.train.AdagradOptimizer(learning_rate=0.1).minimize(loss)
            
            self.X_input = X_input
            self.y_input = y_input
            self.loss = loss
            self.train_op = train_op
            self.predictions = predictions
            self.last_state = last_state
            self.init_state = init_state
        sys.stdout.write('\n<log> Building Graph...')
        __graph__()
        sys.stdout.write('</log>\n')
    
    """
    Training
    """
    def train(self, train_set, epochs=100):
        # training session
        with tf.Session() as sess:
            sess.run(tf.global_variables_initializer())
            train_loss = 0
            try:
                for i in range(epochs):
                    for j in range(300):
                        X_input, y_input = train_set.__next__()
                        batch_size = X_input.shape[0]
                        _, train_loss_ = sess.run([self.train_op, self.loss], feed_dict={
                            self.X_input : X_input,
                            self.y_input: y_input.flatten(),
                            self.init_state : np.zeros([self.num_layers, batch_size, self.state_size])
                        })
                        train_loss += train_loss_
                    print('[{}] loss : {}'.format(i,train_loss/300))
                    train_loss = 0
            except KeyboardInterrupt:
                print('interrupted by user at ' + str(i))
                
            ###
            # Training ends here, save checkpoint
            ###
            saver = tf.train.Saver()
            saver.save(sess, self.ckpt_path + self.model_name, global_step=i)
    """
    Generate Characters
    """
    def generate(self, idx2word, word2idx, num_words=100, div=' '):
        ###
        # Generate Text
        ###
        random_init_word = random.choice(idx2word)
        current_word = word2idx[random_init_word]
        
        ###
        # Start sess
        ###
        with tf.Session() as sess:
            sess.run(tf.global_variables_initializer())
            # restore session
            ckpt = tf.train.get_checkpoint_state(self.ckpt_path)
            saver = tf.train.Saver()
            if ckpt and ckpt.model_checkpoint_path:
                saver.restore(sess, ckpt.model_checkpoint_path)
            # Generate Operation
            words = [current_word]
            state = None
            for i in range(num_words):
                if state:
                    feed_dict = {self.X_input : np.array([current_word]).reshape([1,1]),
                                self.init_state : state_}
                else:
                    feed_dict = {self.X_input : np.array([current_word]).reshape([1,1]),
                                self.init_state : np.zeros([self.num_layers, 1, self.state_size])}
                # Forward Prop
                preds, state_ = sess.run([self.predictions, self.last_state], feed_dict=feed_dict)
                
                # set flag to true
                state = True
                
                # set new word
                current_word = np.random.choice(preds.shape[-1], 1, p=np.squeeze(preds))[0]
                # add to list of words
                words.append(current_word)
                
        return div.join([idx2word[w] for w in words])

## Parsing Arguments
Function underneath allows user to specify whether they want to continue training the network or generate words (along with specify the number of words). Generally good habit to include to so command line can run the code in multiple ways

In [5]:
### 
# parse arguments
def parse_args():
    parser = argparse.ArgumentParser(
        description='Stacked Gated Recurrent Unit RNN for Text Hallucination, built with tf.scan')
    group = parser.add_mutually_exclusive_group(required=True)
    group.add_argument('-g', '--generate', action='store_true',
                        help='generate text')
    group.add_argument('-t', '--train', action='store_true',
                        help='train model')
    parser.add_argument('-n', '--num_words', required=False, type=int,
                        help='number of words to generate')
    args = vars(parser.parse_args())
    return args

In [None]:
def rand_batch_gen(X_input, y_input, batch_size, seq_len):
    pass

In [None]:
###
# main function
if __name__ == '__main__':
    # parse arguments
    args = parse_args()
    #
    # fetch data
    ###
    # This part will be different depending how our data is loaded
    # Look into GLoVE embedding**
    ###
    #
    # create the model