# GRU LSTM
Attempts to create a GRU LSTM model to use for Yelp Dataset Challenge
Based off of TensorFlow Tutorial: https://github.com/tensorflow/models/blob/master/tutorials/rnn/ptb/ptb_word_lm.py


In [None]:
%matplotlib inline

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import time

import pandas as pd
import sklearn as sk
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt

import itertools

import utils
import util

import sys
import argparse
import random

## gru_lstm class
...

In [None]:
flags = tf.flags
flags.DEFINE_string("data_path", None, "Where the training/test data is stored.")
flags.DEFINE_string("save_path", None, "Model output directory.")
flags.DEFINE_string("training_files", "person_match.train2", "training file (default: None)")  #for sentence semantic similarity use "train_snli.txt"

flags.DEFINE_integer("embedding_dim", 300, "Dimensionality of character embedding (default: 300)")
flags.DEFINE_float("dropout_keep_prob", 0.8, "Dropout keep probability (default: 1.0)")
flags.DEFINE_integer("hidden_units", 100, "Number of hidden units (default:50)")
flags.DEFINE_integer("num_layers", 2, "Number of units (default: 2)")

# Training parameters
flags.DEFINE_integer("batch_size", 128, "Batch Size (default: 64)")
flags.DEFINE_integer("num_epochs", 30, "Number of training epochs (default: 200)")
flags.DEFINE_integer("evaluate_every", 1000, "Evaluate model on dev set after this many steps (default: 100)")
flags.DEFINE_integer("checkpoint_every", 1000, "Save model after this many steps (default: 100)")

# Misc Parameters
flags.DEFINE_boolean("allow_soft_placement", True, "Allow device soft device placement")

FLAGS = flags.FLAGS

In [None]:
# Uses tf.contrib to generate GRU cell
# Saves implementing own GRUCell from scratch
# Based off of PTB example: # https://github.com/tensorflow/models/blob/master/tutorials/rnn/ptb/ptb_word_lm.py

def data_type():
    return tf.float32

class gru_lstm(object):
    def __init__(self, X_inputs, y_labels, num_steps, batch_size, is_training, 
                 vocab_size, embedding_size, state_size, num_layers, ckpt_path='ckpt/gru2/'):
        
        self.epoch_size = ((len(data) // batch_size) - 1) // num_steps
        self.vocab_size = vocab_size # Dont know if need
        self.embedding_size = embedding_size
        self.state_size = hidden_units
        self.batch_size = batch_size
        self.num_layers = num_layers
        self._is_training = is_training
        
        self._cell = None
        self.dropout_keep_prob = tf.placeholder(tf.float32, name="dropout_keep_prob")
        
        embedding = tf.get_variable("embedding", [vocab_size, state_size], dtype=data_type())
        
        inputs = tf.nn.embedding_lookup(embedding)
        if is_training and keep_prob < 1:
            inputs = tf.nn.dropout(inputs, keep_prob)
        
        # Output is Neural Network
        output, state = self.stacked_gru_graph(X_inputs, state_size, keep_prob, num_layers, is_training)
        
        W = tf.get_variable("W", [size, vocab_size], dtype=data_type())
        b = tf.get_variable("b", [vocab_size], dtype=data_type())
        
        
        logits = tf.nn.xw_plus_b(output, W, b)
        logits = tf.reshape(logits, [self.batch_size, self.num_steps, vocab_size])
        
        loss = tf.contrib.seq2seq.sequence_loss(logits,
                                                y_labels,
                                                tf.ones([self.batch_size, self.num_steps], dtype=data_type()),
                                                average_across_timesteps=False,
                                                average_across_batch=True)
                                               
        
        self._cost = tf.reduce_sum(loss)
        self._final_state = state
        
        self._learn_rate = tf.Variable(0.0, trainable=False) # Learning Rate
        gradients, _ = tf.clip_by_global_norm(tf.gradients(self._cost, tvars),
                                          config.max_grad_norm)
        train_vars = tf.trainable_variables() # Returns all variables specified trainable=True
        
        optimizer = tf.train.RMSPropOptimizer(self._learn_rate) # Using RMSProp for RNN's rather than SGD as usual
        self._train_op = optimizer.apply_gradients(zip(gradients, train_vars),
                                                   global_step=tf.train.get_or_create_global_step())
        
        self._new_learn_rate = tf.placeholder(tf.float32, shape=[], name="new_learning_rate")
        self._learn_update = tf.assign(self._learn_rate, self._new_learn_rate)
    
    def gru_cell(self, hidden_state_size, keep_prob, is_training, layer, num_layers):
    
        assert isinstance(keep_prob, float)
        assert isinstance(hidden_state_size, int)
        assert isinstance(is_training, bool)
        assert isinstance(layer, int)
        assert isinstance(num_layers, int)      
        if num_layers <= 0:
            raise AssertionError("Need positive number of layers")
    
        cell = tf.contrib.rnn.GRUCell(hidden_state_size)
    
        if layer == num_layers - 1:
            return tf.contrib.rnn.OutputProjectionWrapper(cell,
                                                          output_size=6)
        elif is_training and keep_prob < 1:
            return tf.contrib.rnn.DropoutWrapper(cell,
                                                 output_keep_prob=keep_prob)
        else:
            return cell
        
    def stacked_gru_graph(self, X_inputs, state_size, keep_prob, num_layers, is_training):
        
        assert isinstance(state_size, int)
        assert isinstance(keep_prob, float)
        if keep_prob < 0 or keep_prob > 1:
            raise AssertionError("needs to be a value between 0 and 1")
        assert isinstance(num_layers, int)
        assert isinstance(is_training, bool)
        
        stacked_gru = tf.contrib.rnn.MultiRNNCell(
            cells=[gru_cell(state_size, keep_prob, _, num_layers, is_training) for _ in range(num_layers)], 
            state_is_tupl=True)
        
        inputs = tf.unstack(X_inputs, num=num_steps, axis=1)
        
        self._initial_state = cell.zero_state(config.batch_size, data_type())
        
        outputs, state = tf.contrib.rnn.static_rnn(cell, inputs,
                                                   initial_state=self._initial_state)
        
        return output, state
    
    def assign_lr(self, session, lr_value):
        """
        :param: lr_value- new learning rate value multiplied by decay rate
        """
        assert isinstance(lr_value, float)
        session.run(self._learn_update, feed_dict={self._new_learn_rate: lr_value})
        
    def export_ops(self, name):
        """Exports ops to collections."""
        self._name = name
        ops = {util.with_prefix(self._name, "cost"): self._cost}
        if self._is_training:
            ops.update(lr=self._lr, new_lr=self._new_lr, lr_update=self._lr_update)
        for name, op in ops.items():
            tf.add_to_collection(name, op)
        self._initial_state_name = util.with_prefix(self._name, "initial")
        self._final_state_name = util.with_prefix(self._name, "final")
        util.export_state_tuples(self._initial_state, self._initial_state_name)
        util.export_state_tuples(self._final_state, self._final_state_name)
        
    def import_ops(self):
    """Imports ops from collections."""
        if self._is_training:
            self._train_op = tf.get_collection_ref("train_op")[0]
            self._learn_rate = tf.get_collection_ref("lr")[0]
            self._new_learn_rate = tf.get_collection_ref("new_lr")[0]
            self._learn_update = tf.get_collection_ref("lr_update")[0]
            
        self._cost = tf.get_collection_ref(util.with_prefix(self._name, "cost"))[0]
        num_replicas = 0
        self._initial_state = util.import_state_tuples(self._initial_state,
                                                       self._initial_state_name,
                                                       num_replicas)
        self._final_state = util.import_state_tuples(self._final_state,
                                                     self._final_state_name,
                                                     num_replicas)

In [None]:
def run_epoch(session, model, eval_op=None, verbose=False):
    """Runs the model on the given data."""
    start_time = time.time()
    costs = 0.0
    iters = 0
    state = session.run(model.initial_state)

    fetches = {
        "cost": model.cost,
        "final_state": model.final_state,
    }
    if eval_op is not None:
        fetches["eval_op"] = eval_op

    for step in range(model.input.epoch_size):
        feed_dict = {}
        for i, (c, h) in enumerate(model.initial_state):
            feed_dict[c] = state[i].c
            feed_dict[h] = state[i].h

    vals = session.run(fetches, feed_dict)
    cost = vals["cost"]
    state = vals["final_state"]

    costs += cost
    iters += model.input.num_steps

    if verbose and step % (model.input.epoch_size // 10) == 10:
        print("%.3f perplexity: %.3f speed: %.0f wps" %
            (step * 1.0 / model.input.epoch_size, np.exp(costs / iters), 
             iters * model.input.batch_size * max(1, FLAGS.num_gpus) /
             (time.time() - start_time)))

    return np.exp(costs / iters)

In [None]:
def get_train_data():
    pass

def get_test_data():
    pass

def main(_):
    raw_data = reader.ptb_raw_data(FLAGS.data_path)
    
    with tf.Graph().as_default():
        initializer = tf.random_uniform_initializer(-0.1,0.1)
        with tf.name_scope("Train"):
            train_input = get_train_data()
            with tf.variable_scope("Model", reuse=None, initializer=initializer):
                model = gru_lstm(X_inputs, y_labels, num_steps, batch_size, 
                                 is_training,vocab_size, embedding_size, 
                                 state_size, num_layers)
            tf.summary.scalar("Training Loss", m.cost)
            tf.summary.scalar("Learning Rate", m.lr)
            
        with tf.name_scope("Test"):
            test_input = get_test_data()
        
        
        models = {"Train": model, "Test": model_test}
        for name, model in models.items():
            model.export_ops(name)
        
        metagraph = tf.train.export_meta_graph()

    with tf.Graph().as_default():
        tf.train.import_meta_graph(metagraph)
        for model in models.values():
            model.import_ops()
        sv = tf.train.Supervisor(logdir=FLAGS.save_path)
        config_proto = tf.ConfigProto(allow_soft_placement=False)
    

if __name__=="__main__":
    tf.app.run()

In [None]:
num_layers = 2
state_size = 1024
batch_size = 128
time_steps = 2
num_features = 100
keep_prob = 0.8 # Keep 80% if outputs after when implementing dropout

# Based off of TensorFlow PTB example found at 
# https://github.com/tensorflow/models/blob/master/tutorials/rnn/ptb/ptb_word_lm.py

words_in_dataset = tf.placeholder(tf.float32, [time_steps, batch_size, num_features])




# Initial state of LSTM
# initial_state = state = stacked_gru.zero_state(batch_size, tf.float32)
