#### RNN forward networks

In [1]:
import tensorflow as tf
def lstm_update(X, state, input_gate, forget_gate, output_gate, cell_gate):
    next_state = forget_gate * state + input_gate * cell_gate
    next_output = tf.tanh(next_state) * output_gate
    return next_state, next_output

def lstm_forward(X, ini_state, input_dim, num_nodes, initializer, forget_bias, steps, direction, reuse=None):
    with tf.variable_scope('LSTM_gate_parameters_%s' % direction, reuse=reuse):
      # Input gate
      ug = tf.get_variable('U_g', shape=[input_dim, num_nodes], initializer=initializer)
      wg = tf.get_variable('W_g', shape=[num_nodes, num_nodes], initializer=initializer)
      bg = tf.get_variable('b_g', shape=[num_nodes], initializer=tf.zeros_initializer())
    
      # Forget gate
      uf = tf.get_variable('U_f', shape=[input_dim, num_nodes], initializer=initializer)
      wf = tf.get_variable('W_f', shape=[num_nodes, num_nodes], initializer=initializer)
      bf = tf.get_variable('b_f', shape=[num_nodes], initializer=forget_bias)
    
      # Output gate
      uo = tf.get_variable('U_o', shape=[input_dim, num_nodes], initializer=initializer)
      wo = tf.get_variable('W_o', shape=[num_nodes, num_nodes], initializer=initializer)
      bo = tf.get_variable('b_o', shape=[num_nodes], initializer=tf.zeros_initializer())

      # Cell gate
      uc = tf.get_variable('U_c', shape=[input_dim, num_nodes], initializer=initializer)
      wc = tf.get_variable('W_c', shape=[num_nodes, num_nodes], initializer=initializer)
      bc = tf.get_variable('b_c', shape=[num_nodes], initializer=tf.zeros_initializer())
    
      # Concate gate parameters for parallelization
      U = tf.concat([ug, uf, uo, uc], axis=1)
      W = tf.concat([wg, wf, wo, wc], axis=1)
      b = tf.concat([bg, bf, bo, bc], axis=0)
    
    # Initialization step=0
    q = tf.sigmoid(tf.matmul(X[:, 0, :], uo) + tf.matmul(ini_state, wo) + bo)
    
    # output and state at step=0
    output = tf.tanh(ini_state) * q
    state = ini_state
    
    for i in range(1, steps):
        update = tf.sigmoid(tf.matmul(X[:, i, :], U) + tf.matmul(output, W) + b)
        g, f, o, c = tf.split(update, 4, axis=1)
        state, output = lstm_update(X[:, i, :], state, g, f, o, c)
    return state, output

In [2]:
import tensorflow as tf

num_cell = 128

hidden_units = 30

def bi_rnn_forward(inputs):
    batch_size = inputs.get_shape()[0]
    ini = tf.zeros(shape=[100, num_cell])
    fw_state, fw_output = lstm_forward(inputs, ini, 128, 
                                 num_cell, 
                                 tf.contrib.layers.xavier_initializer(), 
                                 tf.zeros_initializer(), 
                                 100,
                                 'fw')
    
    reverse_inputs = tf.reverse(inputs, axis=[1])
    
    bw_state, bw_output = lstm_forward(reverse_inputs, ini, 128, 
                                 num_cell, 
                                 tf.contrib.layers.xavier_initializer(), 
                                 tf.zeros_initializer(), 
                                 100,
                                 'bw')
  
    # Logistic layers, choose the last time step as inputs
    h_inputs = tf.concat([fw_output, bw_output], axis=1)
    
    before_outputs = tf.contrib.layers.fully_connected(h_inputs, hidden_units, activation_fn=tf.nn.relu)
    outputs = tf.contrib.layers.fully_connected(before_outputs, 1, activation_fn=None)
    return outputs

#### Read IMDB dataset using Keras API

In [3]:
from keras.datasets import imdb
from keras.preprocessing import sequence
import numpy as np

(x_train, y_train), (x_test, y_test) = imdb.load_data(path="imdb.npz",
                                                      num_words=10000,
                                                      skip_top=0,
                                                      maxlen=None,
                                                      seed=113,
                                                      start_char=1,
                                                      oov_char=2,
                                                      index_from=3)

def generate_dataset(x_train, y_train, batch_num):
    indices = np.arange(y_train.shape[0])
    np.random.shuffle(indices)
    x_train, y_train = x_train[indices], y_train[indices]
    xs = np.split(x_train, batch_num)
    ys = np.split(y_train, batch_num)
    return xs, ys

  
def max_len(sentences):
    sequence_length = max(len(x) for x in sentences)
    return sequence_length

Using TensorFlow backend.


#### Build graph

In [4]:
X = tf.placeholder(name='inputs', shape=[None, None], dtype=tf.int32)
y = tf.placeholder(name='labels', shape=[None, 1], dtype=tf.float32)

# Embedding matrix
num_words = 10000
embedding_size = 128
embedding = tf.get_variable('Embedding', shape=[num_words, embedding_size], initializer=tf.random_uniform_initializer(-1., 1.))

# Convert tokens into word vectors
embedded = tf.nn.embedding_lookup(embedding, X)

logits = bi_rnn_forward(embedded)

loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(labels=y, logits=logits))

probs = tf.sigmoid(logits)

train_op = tf.train.AdamOptimizer(0.01).minimize(loss)

#### Training

In [5]:
num_epochs = 3
batch_size = 100
batch_num = y_train.shape[0] / batch_size

with tf.Session() as sess:
    tf.global_variables_initializer().run()
    for k in range(num_epochs):
        xs, ys = generate_dataset(x_train, y_train, batch_num)
        for j in range(batch_num):
            
            # We can pass different time step at every iteration of dynamic RNN
            maxlen = max_len(xs[j])
            if maxlen > 100:
                maxlen = 100
                
            X_feed = sequence.pad_sequences(xs[j], maxlen=maxlen)
            y_feed = ys[j].reshape(batch_size, 1)
            loss_val, _ , probs_val = sess.run([loss, train_op, probs], feed_dict={X: X_feed, y: y_feed})
            
            if j % 50 == 0:
                predictions = [1 if i > 0.5 else 0 for i in probs_val]
                accurate_pred = predictions == ys[j]
                print "Epoch %d, step %d " % (k, j)
                print "Train loss and accuracy ", loss_val, float(sum(accurate_pred)) / batch_size, "\n"
                
    # Test performance
    test_xs, test_ys = generate_dataset(x_test, y_test, batch_num)
    acc_count = []
    for j in range(batch_num):
    # We can pass different time step at every iteration of dynamic RNN
        maxlen = max_len(test_xs[j])
        if maxlen > 100:
            maxlen = 100
                
        X_feed = sequence.pad_sequences(test_xs[j], maxlen=maxlen)
        y_feed = test_ys[j].reshape(batch_size, 1)
        probs_val = sess.run(probs, feed_dict={X: X_feed, y: y_feed})
        predictions = [1 if i > 0.5 else 0 for i in probs_val]
        accurate_pred = predictions == test_ys[j]
        acc_count.append(sum(accurate_pred))
    
    print "Test accuracy ", float(sum(acc_count)) / y_test.shape[0], "\n"

Epoch 0, step 0 
Train loss and accuracy  0.690977 0.52 

Epoch 0, step 50 
Train loss and accuracy  0.69111 0.53 

Epoch 0, step 100 
Train loss and accuracy  0.644542 0.69 

Epoch 0, step 150 
Train loss and accuracy  0.527349 0.74 

Epoch 0, step 200 
Train loss and accuracy  0.445612 0.8 

Epoch 1, step 0 
Train loss and accuracy  0.315693 0.91 

Epoch 1, step 50 
Train loss and accuracy  0.314145 0.88 

Epoch 1, step 100 
Train loss and accuracy  0.39216 0.85 

Epoch 1, step 150 
Train loss and accuracy  0.370596 0.84 

Epoch 1, step 200 
Train loss and accuracy  0.340789 0.85 

Epoch 2, step 0 
Train loss and accuracy  0.261349 0.91 

Epoch 2, step 50 
Train loss and accuracy  0.271609 0.91 

Epoch 2, step 100 
Train loss and accuracy  0.267397 0.91 

Epoch 2, step 150 
Train loss and accuracy  0.228646 0.94 

Epoch 2, step 200 
Train loss and accuracy  0.245157 0.91 

Test accuracy  0.84276 

