In [1]:
import tensorflow as tf
from tensorflow.models.rnn import rnn_cell
from tensorflow.models.rnn import rnn
from reader import TokReader
import pickle
with open('tok_map.pkl', 'rb') as f:
    tok_map = pickle.load(f)

In [2]:
class RNNModel():
    def __init__(self, config):
        sent_len = config.sent_len
        batch_size = config.batch_size
        vocab_size = config.vocab_size
        embed_size = config.embed_size
        num_layers = config.num_layers
        state_size = config.state_size
        keep_prob = config.keep_prob

        self.input_data = tf.placeholder(tf.int32, [batch_size, sent_len])
        self.lengths = tf.placeholder(tf.int64, [batch_size])
        self.targets = tf.placeholder(tf.float32, [batch_size, 1])

        # Get embedding layer which requires CPU
        with tf.device("/cpu:0"):
            embeding = tf.get_variable("embedding", [vocab_size, embed_size])
            inputs = tf.nn.embedding_lookup(embeding, self.input_data)

        #LSTM 1 -> Encode the characters of every tok into a fixed dense representation
        with tf.variable_scope("rnn1", reuse=None):
            cell = rnn_cell.LSTMCell(state_size, input_size=embed_size, initializer=tf.contrib.layers.xavier_initializer())
            back_cell = rnn_cell.LSTMCell(state_size, input_size=embed_size, initializer=tf.contrib.layers.xavier_initializer())
            cell = rnn_cell.DropoutWrapper(
              cell, input_keep_prob=keep_prob,
                         output_keep_prob=keep_prob)
            back_cell = rnn_cell.DropoutWrapper(
              back_cell, input_keep_prob=keep_prob,
                              output_keep_prob=keep_prob) 
            cell = rnn_cell.MultiRNNCell([cell] * num_layers)
            backcell = rnn_cell.MultiRNNCell([back_cell] * num_layers)
            
            rnn_splits = [tf.squeeze(input_, [1]) for input_ in tf.split(1, sent_len, inputs)]
            
            self.shape1 = tf.shape(rnn_splits[0])

            # Run the bidirectional rnn
            outputs, last_fw_state, last_bw_state = rnn.bidirectional_rnn(
                                                        cell, backcell, rnn_splits,
                                                        sequence_length=self.lengths,
                                                        dtype=tf.float32)
        self.check1 = outputs[0]
        self.shape2 = tf.shape(outputs[0])
        sent_out = tf.concat(1, [last_fw_state, last_bw_state])
        self.shape3 = tf.shape(sent_out)
        #sent_out = outputs[-1]
        #sent_out = tf.add_n(outputs)
        output_size = state_size*4

        with tf.variable_scope("linear", reuse=None):
            w = tf.get_variable("w", [output_size, 1])
            b = tf.get_variable("b", [1], initializer=tf.constant_initializer(0.0))
            raw_logits = tf.matmul(sent_out, w) + b 
        self.probabilities = tf.sigmoid(raw_logits)
        self.cost = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(raw_logits, self.targets))

        #Calculate gradients and propagate
        #Aggregation method 2 is really important for rnn per the tensorflow issues list
        tvars = tf.trainable_variables()
        self.lr = tf.Variable(0.0, trainable=False) #Assign to overwrite
        optimizer = tf.train.GradientDescentOptimizer(self.lr)
        grads, _vars = zip(*optimizer.compute_gradients(self.cost, tvars, aggregation_method=2))
        grads, self.grad_norm = tf.clip_by_global_norm(grads,
                                      config.max_grad_norm)
        self.train_op = optimizer.apply_gradients(zip(grads, _vars))

In [3]:
class Config:
    init_scale = 0.05
    learning_rate = 0.1
    max_grad_norm = 5
    batch_size = 32
    sent_len = 10
    num_layers = 1
    keep_prob = 0.5
    vocab_size = len(tok_map) 
    state_size = 256
    embed_size = 256
    num_models = 100
    num_epochs = 100
    save_every = 10

In [4]:
stream = TokReader(Config.sent_len, Config.batch_size, tok_map, random=True, 
                           rounded=True, training=True, limit=1000)
validstream = TokReader(Config.sent_len, Config.batch_size, tok_map, random=True, 
                                rounded=True, training=False, limit=1000)

INFO:USF.reader:Instantiating TokReader object: training
INFO:USF.reader:Loading reviews
INFO:USF.reader:Instantiating TokReader object: valid
INFO:USF.reader:Loading reviews


In [5]:
def run_one(model, session, reader, training):
    x,y,lengths = next(reader)
    num_data_points = len(x)
    feed_dict = {model.input_data:x, model.targets:y,
                 model.lengths:lengths}
    if training:
        fetches =  [model.cost, model.grad_norm, model.shape1,
                    model.shape2, model.shape3, model.check1, 
                    model.train_op]
        cost, grad_norm, s1, s2, s3, c1, _  = session.run(fetches, feed_dict)
        print("Check1 ", c1[0,:50])
        print("Shape1 ",s1)
        print("Shape2 ",s2)
        print("Shape3 ",s3)
        print("Cost: ", cost)
        print("Grad norm: ", grad_norm)
        
    else:
        print("Test step: ",step)
        fetches =  self.probabilities
        proba = session.run(fetches, feed_dict) 
        choice = np.where(proba > 0.5, 1, 0)
        accuracy = np.mean(choice == y)
        print(accuracy)

In [6]:
initializer = tf.random_uniform_initializer(-Config.init_scale,
                                             Config.init_scale)
sess = tf.InteractiveSession()
with tf.variable_scope("model", reuse=False, initializer=initializer):    
    m = RNNModel(Config)
    tf.initialize_all_variables().run()

In [8]:
run_one(m, sess, stream.get_sents(), True)

INFO:USF.reader:Shuffling input data


Check1  [ 0.00631031  0.         -0.         -0.         -0.00852726  0.00900993
  0.00618966 -0.01362468  0.0075122  -0.01496265 -0.0045309   0.00410457
  0.00936645  0.          0.          0.00855165 -0.         -0.00106325
 -0.00031307 -0.01047822  0.00430402 -0.         -0.01205309 -0.
  0.00036303 -0.00471735 -0.         -0.01158424 -0.          0.
  0.00927435  0.          0.00185948  0.01082929  0.         -0.          0.
 -0.         -0.          0.         -0.01465058  0.          0.00034434
 -0.01125397 -0.00193667 -0.01474594 -0.         -0.         -0.01042028
 -0.        ]
Shape1  [ 32 256]
Shape2  [ 32 512]
Shape3  [  32 1024]
Cost:  0.694444
Grad norm:  0.863772
