# GRU LSTM
Attempts to create a GRU LSTM model to use for Yelp Dataset Challenge
Based off of TensorFlow Tutorial: https://github.com/tensorflow/models/blob/master/tutorials/rnn/ptb/ptb_word_lm.py


In [1]:
%matplotlib inline

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import time

import pandas as pd
import sklearn as sk
from sklearn.model_selection import train_test_split
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
import keras

import itertools

import utils
import util

import sys
import argparse
import random

Using TensorFlow backend.


## gru_lstm class
...

In [2]:
EMBEDDING_SIZE = 300
DROPOUT_KEEP_PROB = 0.8
HIDDEN_UNITS = 100
NUM_LAYERS = 2
NUM_EPOCHS = 2
NUM_STEPS = 20
BATCH_SIZE = 128
MAX_EPOCHS = 30
EVALUATE_EVERY = 1000
CHECKPOINT_EVERY = 1000

VOCAB_SIZE = 10000

LEARNING_RATE = 1
LEARN_RATE_DECAY = 0.5
DECAY_EVERY = 3

DATA_PATH = "./data/"
SAVE_PATH = "./saves/"


In [19]:
# Uses tf.contrib to generate GRU cell
# Saves implementing own GRUCell from scratch
# Based off of PTB example: # https://github.com/tensorflow/models/blob/master/tutorials/rnn/ptb/ptb_word_lm.py

def data_type():
    return tf.float32

class gru_lstm(object):
    def assign_lr(self, session, lr_value):
        """
        :param: lr_value- new learning rate value multiplied by decay rate
        """
        assert isinstance(lr_value, float)
        session.run(self._learn_update, feed_dict={self._new_learn_rate: lr_value})
        
    def gru_cell(self, hidden_state_size, keep_prob, is_training, layer, num_layers):
    
        assert isinstance(keep_prob, float)
        assert isinstance(hidden_state_size, int)
        assert isinstance(is_training, bool)
        assert isinstance(layer, int)
        assert isinstance(num_layers, int)      
        if num_layers <= 0:
            raise AssertionError("Need positive number of layers")
    
        cell = tf.contrib.rnn.GRUCell(hidden_state_size)
    
        if layer == num_layers - 1:
            return tf.contrib.rnn.OutputProjectionWrapper(cell,
                                                          output_size=6)
        elif is_training and keep_prob < 1:
            return tf.contrib.rnn.DropoutWrapper(cell,
                                                 output_keep_prob=keep_prob)
        else:
            return cell
            
    def stacked_gru_graph(self, X_inputs, state_size, keep_prob, num_layers, is_training):
        assert isinstance(state_size, int)
        assert isinstance(keep_prob, float)
        if keep_prob < 0 or keep_prob > 1:
            raise AssertionError("needs to be a value between 0 and 1")
        assert isinstance(num_layers, int)
        assert isinstance(is_training, bool)

        stacked_gru = tf.contrib.rnn.MultiRNNCell(
            cells=[gru_cell(state_size, keep_prob, _, num_layers, is_training) for _ in range(num_layers)], 
            state_is_tupl=True)

        inputs = tf.unstack(X_inputs, num=num_steps, axis=1)

        self._initial_state = cell.zero_state(config.batch_size, data_type())

        outputs, state = tf.contrib.rnn.static_rnn(cell, inputs,
                                                   initial_state=self._initial_state)

        return output, state
    
    def __init__(self, X_inputs, y_labels, num_steps, batch_size, is_training, 
                 vocab_size, embedding_size, state_size, num_layers, dropout_keep_prob):
        
        self.epoch_size = ((len(X_inputs) // batch_size) - 1) // num_steps
        self.vocab_size = vocab_size # Dont know if need
        self.embedding_size = embedding_size
        self.state_size = state_size
        self.batch_size = batch_size
        self.num_layers = num_layers
        self._is_training = is_training
        
        self._cell = None        
        embedding = tf.get_variable("embedding", [vocab_size, self.state_size], dtype=data_type())
        
        inputs = tf.nn.embedding_lookup(embedding, X_inputs)
        if is_training and dropout_keep_prob < 1:
            inputs = tf.nn.dropout(inputs, dropout_keep_prob)
        
        # Output is Neural Network
        output, state = stacked_gru_graph(X_inputs,
                                          state_size,
                                          dropout_keep_prob,
                                          num_layers,
                                          is_training)
        
        W = tf.get_variable("W", [size, vocab_size], dtype=data_type())
        b = tf.get_variable("b", [vocab_size], dtype=data_type())
        
        
        logits = tf.nn.xw_plus_b(output, W, b)
        logits = tf.reshape(logits, [self.batch_size, self.num_steps, vocab_size])
        
        loss = tf.contrib.seq2seq.sequence_loss(logits,
                                                y_labels,
                                                tf.ones([self.batch_size, self.num_steps], dtype=data_type()),
                                                average_across_timesteps=False,
                                                average_across_batch=True)
                                               
        
        self._cost = tf.reduce_sum(loss)
        self._final_state = state
        
        self._learn_rate = tf.Variable(0.0, trainable=False) # Learning Rate
        gradients, _ = tf.clip_by_global_norm(tf.gradients(self._cost, tvars),
                                          config.max_grad_norm)
        train_vars = tf.trainable_variables() # Returns all variables specified trainable=True
        
        optimizer = tf.train.RMSPropOptimizer(self._learn_rate) # Using RMSProp for RNN's rather than SGD as usual
        self._train_op = optimizer.apply_gradients(zip(gradients, train_vars),
                                                   global_step=tf.train.get_or_create_global_step())
        
        self._new_learn_rate = tf.placeholder(tf.float32, shape=[], name="new_learning_rate")
        self._learn_update = tf.assign(self._learn_rate, self._new_learn_rate)
    
   
        
    def export_ops(self, name):
        """Exports ops to collections."""
        self._name = name
        ops = {util.with_prefix(self._name, "cost"): self._cost}
        if self._is_training:
            ops.update(lr=self._learn_rate, new_learn_rate=self._new_learn_rate, lr_update=self._learn_update)
        for name, op in ops.items():
            tf.add_to_collection(name, op)
        self._initial_state_name = util.with_prefix(self._name, "initial")
        self._final_state_name = util.with_prefix(self._name, "final")
        util.export_state_tuples(self._initial_state, self._initial_state_name)
        util.export_state_tuples(self._final_state, self._final_state_name)
        
    def import_ops(self):
        if self._is_training:
            self._train_op = tf.get_collection_ref("train_op")[0]
            self._learn_rate = tf.get_collection_ref("lr")[0]
            self._new_learn_rate = tf.get_collection_ref("new_lr")[0]
            self._learn_update = tf.get_collection_ref("lr_update")[0]
            
        self._cost = tf.get_collection_ref(util.with_prefix(self._name, "cost"))[0]
        num_replicas = 0
        self._initial_state = util.import_state_tuples(self._initial_state,
                                                       self._initial_state_name,
                                                       num_replicas)
        self._final_state = util.import_state_tuples(self._final_state,
                                                     self._final_state_name,
                                                     num_replicas)

In [4]:
def run_epoch(session, model, eval_op=None, verbose=False):
    """Runs the model on the given data."""
    start_time = time.time()
    costs = 0.0
    iters = 0
    state = session.run(model.initial_state)

    fetches = {
        "cost": model.cost,
        "final_state": model.final_state,
    }
    if eval_op is not None:
        fetches["eval_op"] = eval_op

    for step in range(model.input.epoch_size):
        feed_dict = {}
        for i, (c, h) in enumerate(model.initial_state):
            feed_dict[c] = state[i].c
            feed_dict[h] = state[i].h

    vals = session.run(fetches, feed_dict)
    cost = vals["cost"]
    state = vals["final_state"]

    costs += cost
    iters += model.input.num_steps

    if verbose and step % (model.input.epoch_size // 10) == 10:
        print("%.3f perplexity: %.3f speed: %.0f wps" %
            (step * 1.0 / model.input.epoch_size, np.exp(costs / iters), 
             iters * model.input.batch_size * max(1, FLAGS.num_gpus) /
             (time.time() - start_time)))

    return np.exp(costs / iters)

In [20]:
def main(_):
    
    global EMBEDDING_SIZE
    global DROPOUT_KEEP_PROB
    global HIDDEN_UNITS
    global NUM_LAYERS
    global NUM_EPOCHS
    global BATCH_SIZE
    global MAX_EPOCHS
    global EVALUATE_EVERY
    global CHECKPOINT_EVERY
    global NUM_STEPS
    
    global VOCAB_SIZE
    
    global LEARN_RATE_DECAY
    global DECAY_EVERY
    global LEARNING_RATE
    
    global DATA_PATH
    global SAVE_PATH
        
    DF_reviews = pd.read_csv(DATA_PATH + "features.csv")
    DF_ratings = pd.read_csv(DATA_PATH + "stars.csv")
    
    with tf.Graph().as_default():
        initializer = tf.random_uniform_initializer(-0.1,0.1)
        
        xTrain, xTest, yTrain, yTest = train_test_split(DF_reviews,
                                                        DF_ratings,
                                                        test_size=0.2,
                                                        random_state=9)
        # Defining Training model
        with tf.name_scope("Train"):
            IS_TRAINING = True
            with tf.variable_scope("Model", reuse=None, initializer=initializer):
                model_train = gru_lstm(xTrain, yTrain, NUM_STEPS, BATCH_SIZE, 
                                 IS_TRAINING, VOCAB_SIZE, EMBEDDING_SIZE, 
                                 HIDDEN_UNITS, NUM_LAYERS, DROPOUT_KEEP_PROB)
                
            tf.summary.scalar("Training Loss", model_train.cost)
            tf.summary.scalar("Learning Rate", model_train.learn_rate)
            
        with tf.name_scope("Test"):            
            with tf.variable_scope("Model", reuse=True, initializer=initializer):
                
                IS_TRAINING = False
                BATCH_SIZE = 1
                NUM_STEPS = 1
                
                model_test = gru_lstm(xTest, yTest, NUM_STEPS, BATCH_SIZE, 
                                 IS_TRAINING, VOCAB_SIZE, EMBEDDING_SIZE, 
                                 HIDDEN_UNITS, NUM_LAYERS, DROPOUT_KEEP_PROB)
        
        
        models = {"Train": model_train, "Test": model_test}
        
        for name, model in models.items():
            model.export_ops(name)
        
        metagraph = tf.train.export_meta_graph()

    with tf.Graph().as_default():
        tf.train.import_meta_graph(metagraph)
        for model in models.values():
            model.import_ops()
        sv = tf.train.Supervisor(logdir=FLAGS.save_path)
        config_proto = tf.ConfigProto(allow_soft_placement=False)
        
        with sv.managed_session(config=config_proto) as session:
            for i in range(NUM_EPOCHS):
                lr_decay = LEARN_RATE_DECAY ** max(i + 1 - DECAY_EVERY, 0.0)
                model_train.assign_lr(session, LEARNING_RATE * lr_decay)
                
                print("Epoch: %d Learning rate: %.3f" % (i + 1, session.run(m.lr)))
                train_perplexity = run_epoch(session, m, eval_op=m.train_op, verbose=True)
                print("Epoch: %d Train Perplexity: %.3f" % (i + 1, train_perplexity))
                valid_perplexity = run_epoch(session, mvalid)
                print("Epoch: %d Valid Perplexity: %.3f" % (i + 1, valid_perplexity))
                
            test_perplexity = run_epoch(session, mtest)
            print("Test Perplexity: %.3f" % test_perplexity)
            
            if SAVE_PATH:
                print("Saving model to %s." % SAVE_PATH)
                sv.saver.save(session, SAVE_PATH, global_step=sv.global_step)
                

if __name__=="__main__":
    tf.app.run()

NameError: global name 'stacked_gru_graph' is not defined

# Text Processing
Using tf.contrib.keras to preprocess the text data