In [1]:
import tensorflow as tf
import numpy as np
import os

print("Tensorflow version: {}".format(tf.__version__))

  from ._conv import register_converters as _register_converters


Tensorflow version: 1.9.0


# RNN in Tensorflow

## RNN Cell

```python
basicRNNCell = tf.nn.rnn_cell.BasicRNNCell(num_units=num_units, input_size=None, activation=tanh)

basicLSTMCell = tf.nn.rnn_cell.BasicLSTMCell(num_units=num_units, input_size=None, activation=tanh, \
                                             forget_bias=1.0,  state_is_tuple=True)

LSTMCell = tf.nn.rnn_cell.LSTMCell(num_units=num_units, input_size=None, activation=tanh, \
                                  forget_bias=1.0, state_is_tuple=True, \
                                  use_peepholes=False, cell_clip=None, initializer=None, num_proj=None, \
                                  num_unit_shards=1, num_proj_shards=1)

GRUCell = tf.nn.rnn_cell.GRUCell(num_units=num_units, input_size=None, activation=tanh)
```

## RNN Flow

```python
# create LSTM cell
cell_layer_1 = tf.nn.rnn_cell.BasicLSTMCell(10)

# create another LSTM Cell
cell_layer_2 = tf.nn.rnn_cell.BasicLSTMCell(20)
tf.nn.rnn_cell.DropoutWrapper(cell_layer_2, input_keep_prob=1.0, output_keep_prob=1.0, seed=None)   # dropout

# assemble the rnn layer
full_cell = tf.nn.rnn_cell.MultiRNNCell([cell_layer_1, cell_layer_2])

# assemble the rnn network
# input@time_major = F, inputs = [batch_size, max_time, ...], outputs = [batch_size, max_time, cell.output_size]
#                    T, inputs = [max_time, batch_size, ...], outputs = [max_time, batch_size, cell.output_size]
# state = [batch_size, cell.state_size]
outputs, state = tf.nn.dynamic_rnn(full_cell, inputs=inputs, sequence_length=None, dtype=None, \
                                   parallel_iterations=None, swap_memory=False, time_major=False, scope=None)
```

# Emotion Analyzing

## Data preparation

In [2]:
!pip install tflearn

[33mYou are using pip version 18.0, however version 18.1 is available.
You should consider upgrading via the 'pip install --upgrade pip' command.[0m


In [3]:
import tflearn

!pip list | grep 'tflearn'

tflearn                            0.3.2    
[33mYou are using pip version 18.0, however version 18.1 is available.
You should consider upgrading via the 'pip install --upgrade pip' command.[0m


In [4]:
from tflearn.data_utils import to_categorical, pad_sequences
from tflearn.datasets import imdb

In [5]:
# n_word=30000, most common used 300000 word
if os.path.exists('/notebooks/data'):
    train, test, _ = imdb.load_data('/notebooks/data/imdb/imdb.pkl', n_words=30000, valid_portion=0.1)
elif os.path.exists('/Volumes/Data'):
    train, test, _ = imdb.load_data('/Volumes/Data/imdb/imdb.pkl', n_words=30000, valid_portion=0.1)
else:
    raise IOError("no such path")

# trainX, testX: the sequence
# trainY, testY: the emotion label (positive/negative, 1/0)
trainX, trainY = train
testX, testY = test

## Data Preprocessing

In [6]:
# expand sequence to length of 500 with value = 0 if the sequence length <= 500
# truncated sequence to length of 500 if the sequence length > 500
trainX = pad_sequences(trainX, maxlen=500, value=0.)
testX = pad_sequences(testX, maxlen=500, value=0.)

In [7]:
# change label to one-hot encoding
trainY = to_categorical(trainY, nb_classes=2)
testY = to_categorical(testY, nb_classes=2)

In [8]:
# a simple example
trainX[1]

array([  16,  586,   32,  885,   17,   39,   68,   31, 2994, 2389,  328,
          4,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,   

In [9]:
# data loading method
class IMDBDataset():
    def __init__(self, X, Y):
        self.data_count = len(X)
        self.input = X
        self.label = Y
        self.ptr = 0
        
    def minibatch(self, size):
        ret = None
        
        if self.ptr + size < self.data_count:
            ret = self.input[self.ptr:(self.ptr + size)], self.label[self.ptr:(self.ptr + size)]
        else:
            ret = np.concatenate((self.input[self.ptr:], self.input[:size-len(self.input[self.ptr:])])), \
                  np.concatenate((self.label[self.ptr:], self.label[:size-len(self.label[self.ptr:])]))
        
        self.ptr = (self.ptr + size) % (self.data_count)
        return ret

In [10]:
train = IMDBDataset(trainX, trainY)
test = IMDBDataset(testX, testY)

# Model

## Hyperparameters

In [11]:
full_training = False
learning_rate = 1e-2
training_epochs = 100 if full_training else 1
batch_size = 2000 if full_training else 1000
display_step = 1

if os.path.exists(os.path.join("/","notebooks","devops")):
    log_path = os.path.join("/","notebooks","devops","tmp","emotion_log")
elif os.path.exists(os.path.join("/","Users","jiankaiwang","devops")):
    log_path = os.path.join("/","Users","jiankaiwang","devops","tmp","emotion_log")
else:
    raise IOError("no such log path")
print("log path: {}".format(log_path))

log path: /Users/jiankaiwang/devops/tmp/emotion_log


## network

In [12]:
def embedding_layer(input, weight_shape):
    weight_init = tf.random_normal_initializer(stddev=(1.0/weight_shape[0])**0.5)
    E = tf.get_variable("E", weight_shape, initializer=weight_init)
    incoming = tf.cast(input, tf.int32)
    embeddings = tf.nn.embedding_lookup(E, incoming)
    return embeddings

In [13]:
def lstm(input, hidden_dim, keep_prob, phase_train):
    lstm = tf.nn.rnn_cell.BasicLSTMCell(hidden_dim)
    dropout_lstm = tf.nn.rnn_cell.DropoutWrapper(lstm, input_keep_prob=keep_prob, output_keep_prob=keep_prob)
    lstm_outputs, state = tf.nn.dynamic_rnn(dropout_lstm, inputs=input, dtype=tf.float32)
    
    # (batch_size, max_time, cell.output_size) -> (batch_size, 1, cell.output_size)
    slice_output = tf.slice(lstm_outputs, \
            [0, tf.shape(lstm_outputs)[1]-1, 0], [tf.shape(lstm_outputs)[0], 1, tf.shape(lstm_outputs)[1]])
    
    # (batch_size, 1, cell.output_size) -> (batch_size, cell.output_size)
    squeeze_output = tf.squeeze(slice_output)
    
    return squeeze_output

In [14]:
def dense(input, weight_shape, bias_shape, phase_train):
    in_count = weight_shape[0] * weight_shape[1]
    weight_init = tf.random_normal_initializer(stddev=(2/in_count)**0.5)
    bias_init = tf.constant_initializer(value=0)
    weight = tf.get_variable("W", weight_shape, initializer=weight_init)
    bias = tf.get_variable("b", bias_shape, initializer=bias_init)
    return tf.nn.relu(tf.nn.bias_add(tf.matmul(input, weight), bias=bias))

In [15]:
def inference(input, keep_prob, phase_train):
    embedding = embedding_layer(input, [30000, 500])
    lstm_output = lstm(embedding, 500, keep_prob, phase_train)
    output = dense(lstm_output, [500, 2], [2], phase_train)
    return output

## target

In [16]:
def loss(output, y):
    """
    output: the logits value from inference
    y: the labeling data
    """
    cross_entropy = tf.nn.softmax_cross_entropy_with_logits_v2(logits=output, labels=y)
    loss = tf.reduce_mean(cross_entropy)
    return loss

In [17]:
def training(loss, global_step):
    tf.summary.scalar("loss", loss)
    optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
    grads = tf.gradients(loss, tf.trainable_variables())
    grads = list(zip(grads, tf.trainable_variables()))
    apply_grads = optimizer.apply_gradients(grads_and_vars=grads, global_step=global_step)
    return grads, apply_grads

In [18]:
def evaluate(output, y):
    compare = tf.equal(tf.argmax(output, 1), tf.argmax(y, 1))
    accuracy = tf.reduce_mean(tf.cast(compare, tf.float32))
    tf.summary.scalar("eval", accuracy)
    return accuracy

# Learning

In [19]:
print("Run 'tensorboard --logdir={}' to monitor the training process.".format(log_path))

Run 'tensorboard --logdir=/Users/jiankaiwang/devops/tmp/emotion_log' to monitor the training process.


In [20]:
with tf.Graph().as_default():
    with tf.variable_scope("LSTM"):
        # input, label
        x = tf.placeholder(tf.int32, [None, 500])
        y = tf.placeholder(tf.int32, [None, 2])
        keep_prob = tf.placeholder(tf.float32)
        phase_train = tf.placeholder(tf.bool)
        
        # inference, training, evaluation
        output = inference(x, keep_prob, phase_train)
        loss_val = loss(output, y)
        global_step = tf.Variable(0, name="global_step", trainable=False)
        train_grads, train_opt = training(loss_val, global_step)
        eval_opt = evaluate(output, y)
        
        # summary
        for var in tf.trainable_variables():
            tf.summary.histogram(var.name, var)
            
        for grad, var in train_grads:
            tf.summary.histogram(var.name + "/gradient", grad)
        
        summary_opt = tf.summary.merge_all()
            
        # initialization
        init_var = tf.global_variables_initializer()
        saver = tf.train.Saver()
        
        # start leanring
        with tf.Session() as sess:
            summary_writer = tf.summary.FileWriter(log_path, graph=sess.graph)
            sess.run(init_var)
            
            batch_count = int(len(trainX) / batch_size)
            for epoch in range(training_epochs):
                avg_cost = 0.
                
                for batch in range(batch_count):
                    b_x, b_y = train.minibatch(batch_size)
                    feed_data = {x: b_x, y: b_y, keep_prob: 0.5, phase_train: True}
                    grads, _ = sess.run([train_grads, train_opt], feed_dict=feed_data)
                    
                    batch_loss = sess.run(loss_val, feed_dict=feed_data)
                    avg_cost += batch_loss / batch_size
                    
                if epoch % display_step == 0:
                    # validation 
                    b_v_x, b_v_y = test.minibatch(batch_size)
                    feed_val_data = {x: b_v_x, y: b_v_y, keep_prob: 1.0, phase_train: False}
                    acc = sess.run(eval_opt, feed_dict=feed_val_data)
                    tf.summary.scalar("val_acc", acc)
                    print("epoch: {}, accuracy: {}".format(epoch, acc))
                    
                    # summary
                    summary_str = sess.run(summary_opt, feed_dict=feed_val_data)
                    summary_writer.add_summary(summary_str, sess.run(global_step))
                    saver.save(sess, os.path.join(log_path, "model-checkpoint"), global_step=global_step)
                    
            print("Training finished.")

INFO:tensorflow:Summary name LSTM/E:0 is illegal; using LSTM/E_0 instead.
INFO:tensorflow:Summary name LSTM/rnn/basic_lstm_cell/kernel:0 is illegal; using LSTM/rnn/basic_lstm_cell/kernel_0 instead.
INFO:tensorflow:Summary name LSTM/rnn/basic_lstm_cell/bias:0 is illegal; using LSTM/rnn/basic_lstm_cell/bias_0 instead.
INFO:tensorflow:Summary name LSTM/W:0 is illegal; using LSTM/W_0 instead.
INFO:tensorflow:Summary name LSTM/b:0 is illegal; using LSTM/b_0 instead.
INFO:tensorflow:Summary name LSTM/E:0/gradient is illegal; using LSTM/E_0/gradient instead.
INFO:tensorflow:Summary name LSTM/rnn/basic_lstm_cell/kernel:0/gradient is illegal; using LSTM/rnn/basic_lstm_cell/kernel_0/gradient instead.
INFO:tensorflow:Summary name LSTM/rnn/basic_lstm_cell/bias:0/gradient is illegal; using LSTM/rnn/basic_lstm_cell/bias_0/gradient instead.
INFO:tensorflow:Summary name LSTM/W:0/gradient is illegal; using LSTM/W_0/gradient instead.
INFO:tensorflow:Summary name LSTM/b:0/gradient is illegal; using LSTM/