In [1]:
import warnings
warnings.filterwarnings('ignore')
import numpy as np
import tensorflow as tf
from keras.datasets import imdb

Using TensorFlow backend.


In [2]:
# Hyperpameter
NUM_WORDS = 10000
SEQUENCE_LENGTH = 250
EMBEDDING_DIM = 100
HIDDEN_SIZE = 150
ATTENTION_SIZE = 50
KEEP_PROB = 0.8
BATCH_SIZE = 256
NUM_EPOCHS = 5

### 讀取數據

In [3]:
(X_train , y_train) , (X_test , y_test) =\
imdb.load_data(num_words = NUM_WORDS)

# 若是numpy版本超過1.16.2，可以用下列指令讀取數據
# np_load_old = np.load
# np.load = lambda *a,**k: np_load_old(*a, allow_pickle=True, **k)
# (X_train , y_train) , (X_test , y_test) =\
# imdb.load_data(num_words = NUM_WORDS)
# np.load = np_load_old 

### 數據預處理

In [4]:
# 「加1」代表把'pad'算進去
vocabulary_size = max([max(x) for x in X_train]) + 1
X_test = [[w for w in x if w < vocabulary_size] for x in X_test]
X_train_ = []
for i , sentence in enumerate(X_train):
    if len(sentence) < SEQUENCE_LENGTH:
        # 「0」代表'pad'
        sentence += [0] * (SEQUENCE_LENGTH - len(sentence))
    else:
        sentence = sentence[:SEQUENCE_LENGTH]
    X_train_.append(sentence)

X_test_ = []
for i , sentence in enumerate(X_test):
    if len(sentence) < SEQUENCE_LENGTH:
        sentence += [0] * (SEQUENCE_LENGTH - len(sentence))
    else:
        sentence = sentence[:SEQUENCE_LENGTH]
    X_test_.append(sentence)

X_train = np.array(X_train_)
X_test = np.array(X_test_)

In [5]:
# 輸入層
input_data = tf.placeholder(tf.int32 , [None , SEQUENCE_LENGTH])
target = tf.placeholder(tf.float32 , [None])
seq_len = tf.placeholder(tf.int32 , [None])
keep_prob = tf.placeholder(tf.float32)

In [6]:
def lstm_cell(rnn_size):
    cell = tf.contrib.rnn.LSTMCell(rnn_size)
    return cell

def attention_layer(inputs , attention_size , return_score = False):
    hidden_size = inputs.shape[2].value
    initializer = tf.random_normal_initializer(stddev = 0.1)

    # Trainable parameters
    w_omega = tf.get_variable(name = 'w_omega' ,
                              shape = [hidden_size , attention_size] ,
                              initializer = initializer)
    b_omega = tf.get_variable(name = 'b_omega' ,
                              shape = [attention_size] ,
                              initializer = initializer)
    u_omega = tf.get_variable(name = 'u_omega' ,
                              shape = [attention_size] ,
                              initializer = initializer)

    with tf.name_scope('v'):
        # v ➞ [B , T , D] * [D , A] = [B , T , A]
        v = tf.tanh(tf.tensordot(inputs , w_omega , axes = 1) + b_omega) 
    
    # vu ➞ [B , T]
    vu = tf.tensordot(v , u_omega , axes = 1, name = 'vu')  
    
    # score ➞ [B , T] 
    score = tf.nn.softmax(vu , name = 'score')              

    # output ➞ [B , D]    
    output = tf.reduce_sum(inputs * tf.expand_dims(score , -1) , 1) 

    if not return_score:
        return output
    else:
        return output , score
    
def batch_generator(X , y , batch_size):
    size = X.shape[0]
    X_copy = X.copy()
    y_copy = y.copy()
    indices = np.arange(size)
    np.random.shuffle(indices)
    X_copy = X_copy[indices]
    y_copy = y_copy[indices]
    i = 0
    while True:
        if i + batch_size <= size:
            yield X_copy[i : i + batch_size] , y_copy[i : i + batch_size]
            i += batch_size
        else:
            i = 0
            indices = np.arange(size)
            np.random.shuffle(indices)
            X_copy = X_copy[indices]
            y_copy = y_copy[indices]

def compute_sequence_length(x_batch):
    seq_len = []
    for x in x_batch:
        count = 0
        for word in x:
            if word != 0:
                count += 1
            if word == 0:
                break
        seq_len.append(count)
    return np.array(seq_len)    

In [7]:
# Embedding layer
with tf.variable_scope('Embedding_layer'):
    embeddings_var = tf.Variable(tf.random_uniform([vocabulary_size , EMBEDDING_DIM] , -1.0 , 1.0))
    batch_embedded = tf.nn.embedding_lookup(embeddings_var , input_data)

# LSTM layer
with tf.variable_scope('LSTM'):
    cell_fw = tf.contrib.rnn.MultiRNNCell([lstm_cell(HIDDEN_SIZE) for _ in range(1)])
            
    cell_bw = tf.contrib.rnn.MultiRNNCell([lstm_cell(HIDDEN_SIZE) for _ in range(1)])
    
    lstm_outputs , states = tf.nn.bidirectional_dynamic_rnn(cell_fw ,
                                                            cell_bw ,
                                                            batch_embedded ,
                                                            sequence_length = seq_len ,
                                                            dtype = tf.float32)
    lstm_outputs = tf.concat(lstm_outputs , axis = -1)

The TensorFlow contrib module will not be included in TensorFlow 2.0.
For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
  * https://github.com/tensorflow/io (for I/O related ops)
If you depend on functionality not listed there, please file an issue.

Instructions for updating:
This class is equivalent as tf.keras.layers.LSTMCell, and will be replaced by that in Tensorflow 2.0.
Instructions for updating:
This class is equivalent as tf.keras.layers.StackedRNNCells, and will be replaced by that in Tensorflow 2.0.
Instructions for updating:
Please use `keras.layers.Bidirectional(keras.layers.RNN(cell))`, which is equivalent to this API
Instructions for updating:
Please use `keras.layers.RNN(cell)`, which is equivalent to this API
Instructions for updating:
Please use `layer.add_weight` method instead.
Instructions for updating:
Call initializer instance with the dtype argument i

In [8]:
# Attention layer
with tf.variable_scope('Attention_layer'):
    attention_output , score = attention_layer(lstm_outputs , ATTENTION_SIZE , return_score = True)
    attention_output = tf.nn.dropout(attention_output , keep_prob)    

# Fully connected layer
with tf.variable_scope('Fully_connected_layer'):
    W = tf.Variable(tf.truncated_normal([HIDDEN_SIZE * 2 , 1] , stddev = 0.1))
    b = tf.Variable(tf.constant(0., shape = [1]))
    y_pred = tf.nn.xw_plus_b(attention_output , W , b)
    y_pred = tf.reshape(y_pred , [-1 , ])

with tf.variable_scope('optimizer'):
    loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits = y_pred , labels = target))
    optimizer = tf.train.AdamOptimizer(1e-3).minimize(loss)
    correct = tf.cast(tf.equal(tf.round(tf.sigmoid(y_pred)) , target)  , tf.float32)
    accuracy = tf.reduce_mean(correct)

Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.


In [9]:
# Batch generators
train_batch_generator = batch_generator(X_train , y_train , BATCH_SIZE)
test_batch_generator = batch_generator(X_test , y_test , BATCH_SIZE)

In [10]:
sess = tf.Session()
sess.run(tf.global_variables_initializer())
for epoch in range(0 , NUM_EPOCHS):
    loss_train = 0
    loss_test = 0
    accuracy_train = 0
    accuracy_test = 0

    # Training
    num_batches = X_train.shape[0] // BATCH_SIZE
    for batch_i in range(0 , num_batches):
        x_batch , y_batch = next(train_batch_generator)
        seq_len_ = compute_sequence_length(x_batch)

        loss_train_batch , acc_train_batch , _ =\
        sess.run([loss , accuracy , optimizer],
                 feed_dict = {input_data : x_batch ,
                              target : y_batch ,
                              seq_len : seq_len_ ,
                              keep_prob : KEEP_PROB})

        accuracy_train += acc_train_batch
        loss_train += loss_train_batch

        if batch_i % 45 == 0:
            print('=' * 30)
            print('epoch: {}'.format(epoch))
            print('batch_i : {}'.format(batch_i))
            print('train_loss : {:.2f}'.format(loss_train_batch))
            print('train_accuracy : {:.2%}'.format(acc_train_batch))

    accuracy_train /= num_batches
    loss_train /= num_batches

    # Testing
    num_batches = X_test.shape[0] // BATCH_SIZE
    for batch_i in range(0 , num_batches):
        x_batch , y_batch = next(test_batch_generator)
        seq_len_ = compute_sequence_length(x_batch)

        loss_test_batch , acc_test_batch =\
        sess.run([loss , accuracy],
                 feed_dict ={input_data : x_batch ,
                             target : y_batch ,
                             seq_len : seq_len_ ,
                             keep_prob : 1.0})
        if batch_i % 45 == 0:
            print('=' * 30)
            print('epoch : {}'.format(epoch))
            print('batch_i : {}'.format(batch_i))
            print('test_loss : {:.2f}'.format(loss_test_batch))
            print('test_accuracy : {:.2%}'.format(acc_test_batch))
        accuracy_test += acc_test_batch
        loss_test += loss_test_batch

    accuracy_test /= num_batches
    loss_test /= num_batches
    print('*' * 30)
    print('epoch: {}'.format(epoch))
    print('train_loss_mean : {:.2f} , test_loss_mean : {:.2f}'.format(loss_train , loss_test))
    print('train_accuracy_mean : {:.2%} , test_accuracy_mean : {:.2%}'.format(accuracy_train , accuracy_test))
    print('*' * 30)

epoch: 0
batch_i : 0
train_loss : 0.69
train_accuracy : 48.05%
epoch: 0
batch_i : 45
train_loss : 0.56
train_accuracy : 71.88%
epoch: 0
batch_i : 90
train_loss : 0.53
train_accuracy : 74.22%
epoch : 0
batch_i : 0
test_loss : 0.46
test_accuracy : 79.30%
epoch : 0
batch_i : 45
test_loss : 0.47
test_accuracy : 75.78%
epoch : 0
batch_i : 90
test_loss : 0.45
test_accuracy : 78.52%
******************************
epoch: 0
train_loss_mean : 0.56 , test_loss_mean : 0.46
train_accuracy_mean : 70.51% , test_accuracy_mean : 78.36%
******************************
epoch: 1
batch_i : 0
train_loss : 0.49
train_accuracy : 75.78%
epoch: 1
batch_i : 45
train_loss : 0.34
train_accuracy : 84.77%
epoch: 1
batch_i : 90
train_loss : 0.34
train_accuracy : 83.98%
epoch : 1
batch_i : 0
test_loss : 0.36
test_accuracy : 82.03%
epoch : 1
batch_i : 45
test_loss : 0.34
test_accuracy : 82.81%
epoch : 1
batch_i : 90
test_loss : 0.33
test_accuracy : 85.16%
******************************
epoch: 1
train_loss_mean : 0.38 , 