<u>**2015 Relation Classification via Recurrent Neural Network**</u><br/>
Dongxu Zhang, Dong Wang

1 layer of BiRNN <br/>
for i in length do h = max(ht_fw + ht_bw)

In [1]:
import tensorflow as tf
from tensorflow.contrib import rnn
import numpy as np
import time
print(tf.__version__)

1.10.0


In [2]:
import mnistdata
mnist = mnistdata.read_data_sets("data/mnist", one_hot=True, reshape=False)

Successfully unzipped train-images-idx3-ubyte.gz
Successfully unzipped train-labels-idx1-ubyte.gz
Successfully unzipped t10k-images-idx3-ubyte.gz
Successfully unzipped t10k-labels-idx1-ubyte.gz


In [3]:
print(mnist.train.images.shape)
print(mnist.test.labels.shape)

(60000, 28, 28, 1)
(10000, 10)


In [4]:
learning_rate = 1e-4
batch_size = 100
display_step = 1000
test_size = mnist.test.labels.shape[0]

n_input = 28
n_steps=  28
n_hidden = 128
n_classes = 10

In [5]:
X = tf.placeholder(tf.float32, [None, n_steps, n_input])
Y_ = tf.placeholder(tf.float32, [None, n_classes])

In [6]:
# BiRNN version 1
def BiRNN(x):
    x = tf.transpose(x, [1, 0, 2])
    x = tf.reshape(x, [-1, n_input])
    x = tf.split(x, n_steps)

    W = tf.Variable(tf.truncated_normal([n_hidden, n_classes], stddev=0.1))
    b = tf.Variable(tf.truncated_normal([n_classes], stddev=0.1))
    
    fw_cell = tf.nn.rnn_cell.LSTMCell(n_hidden, forget_bias=1.0)
    bw_cell = tf.nn.rnn_cell.LSTMCell(n_hidden, forget_bias=1.0)
    out, _, _ = tf.contrib.rnn.static_bidirectional_rnn(fw_cell, bw_cell, x, dtype=tf.float32)
    
    out = tf.stack(out)
    h = out[:, :, 0:n_hidden] + out[:, :, n_hidden:2*n_hidden]
    h_max = tf.reduce_max(h, 0)
    
    pred = tf.nn.softmax(tf.matmul(h_max, W) + b)

    return pred

In [6]:
# BiRNN version 2
def BiRNN(x):
    x = tf.transpose(x, [1, 0, 2])
    x = tf.unstack(x, n_input)
    
    W_fw = tf.Variable(tf.truncated_normal([n_input, n_hidden], stddev=0.1))
    U_fw = tf.Variable(tf.truncated_normal([n_hidden, n_hidden], stddev=0.1))
    b_fw = tf.Variable(tf.truncated_normal([n_hidden], stddev=0.1))

    W_bw = tf.Variable(tf.truncated_normal([n_input, n_hidden], stddev=0.1))
    U_bw = tf.Variable(tf.truncated_normal([n_hidden, n_hidden], stddev=0.1))
    b_bw = tf.Variable(tf.truncated_normal([n_hidden], stddev=0.1))

    W_out = tf.Variable(tf.truncated_normal([n_hidden, n_classes], stddev=0.1))
    b_out = tf.Variable(tf.truncated_normal([n_classes], stddev=0.1))

    h0_fw = tf.Variable(tf.truncated_normal([1, n_hidden], stddev=0.1))
    h0_bw = tf.Variable(tf.truncated_normal([1, n_hidden], stddev=0.1))

    h_fw = []
    h_bw = []
    h = []
    
    for i in range(n_steps):
        if i == 0:
            h_fw.append(tf.tanh(tf.matmul(x[0], W_fw) + tf.matmul(h0_fw, U_fw) + b_fw))
            h_bw.append(tf.tanh(tf.matmul(x[n_steps-1], W_bw) + tf.matmul(h0_bw, U_bw) + b_bw))
            
        else:
            h_fw.append(tf.tanh(tf.matmul(x[i], W_fw) + tf.matmul(h_fw[i-1], U_fw) + b_fw))
            h_bw.append(tf.tanh(tf.matmul(x[n_steps-1-i], W_bw) + tf.matmul(h_fw[i-1], U_bw) + b_bw))
    
    for i in range(n_steps):
        h.append(h_fw[i] + h_bw[n_steps-1-i])
    
    h = tf.stack(h)
    h_max = tf.reduce_max(h, 0)
    
    pred = tf.nn.softmax(tf.matmul(h_max, W_out) + b_out)
    
    return pred

In [7]:
Y = BiRNN(X)

In [8]:
cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=Y, labels=Y_))

correct_prediction = tf.equal(tf.argmax(Y, 1), tf.argmax(Y_, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

train_step = tf.train.AdamOptimizer(learning_rate).minimize(cross_entropy)

sess = tf.Session()
sess.run(tf.global_variables_initializer())

In [9]:
# data/mnist && BiRNN version 1
start = time.clock()
test_X = mnist.test.images
test_X = test_X.reshape([test_size, n_steps, n_input])
test_Y = mnist.test.labels
for i in range(10000):
    batch_X, batch_Y = mnist.train.next_batch(batch_size)
    batch_X = batch_X.reshape([batch_size, n_steps, n_input])
    if i % display_step == 0:
        train_accuracy = accuracy.eval(session=sess, feed_dict={X:batch_X, Y_:batch_Y})
        test_accuracy = accuracy.eval(session=sess, feed_dict={X:test_X, Y_:test_Y})
        print("step %5d, train_accuracy = %.4g %% test_accuracy = %.4g %%" % (i, train_accuracy*100, test_accuracy*100))
    train_step.run(session=sess, feed_dict={X:batch_X, Y_:batch_Y})
print("------------------------------------")
print("training time: ", time.clock()-start, " s")
test_accuracy = accuracy.eval(session=sess, feed_dict={X:test_X, Y_:test_Y})
print("final accuracy = %.4g %%" % (test_accuracy*100))

step     0, train_accuracy = 5 % test_accuracy = 8.14 %
step  1000, train_accuracy = 91 % test_accuracy = 87.16 %
step  2000, train_accuracy = 89 % test_accuracy = 93.12 %
step  3000, train_accuracy = 95 % test_accuracy = 95.06 %
step  4000, train_accuracy = 98 % test_accuracy = 95.74 %
step  5000, train_accuracy = 91 % test_accuracy = 96.03 %
step  6000, train_accuracy = 95 % test_accuracy = 96.61 %
step  7000, train_accuracy = 99 % test_accuracy = 96.5 %
step  8000, train_accuracy = 91 % test_accuracy = 96.86 %
step  9000, train_accuracy = 97 % test_accuracy = 97.11 %
------------------------------------
training time:  289.10449459999995  s
final accuracy = 97.41 %


In [9]:
# data/mnist && BiRNN version 2
start = time.clock()
test_X = mnist.test.images
test_X = test_X.reshape([test_size, n_steps, n_input])
test_Y = mnist.test.labels
for i in range(10000):
    batch_X, batch_Y = mnist.train.next_batch(batch_size)
    batch_X = batch_X.reshape([batch_size, n_steps, n_input])
    if i % display_step == 0:
        train_accuracy = accuracy.eval(session=sess, feed_dict={X:batch_X, Y_:batch_Y})
        test_accuracy = accuracy.eval(session=sess, feed_dict={X:test_X, Y_:test_Y})
        print("step %5d, train_accuracy = %.4g %% test_accuracy = %.4g %%" % (i, train_accuracy*100, test_accuracy*100))
    train_step.run(session=sess, feed_dict={X:batch_X, Y_:batch_Y})
print("------------------------------------")
print("training time: ", time.clock()-start, " s")
test_accuracy = accuracy.eval(session=sess, feed_dict={X:test_X, Y_:test_Y})
print("final accuracy = %.4g %%" % (test_accuracy*100))

step     0, train_accuracy = 3 % test_accuracy = 8.06 %
step  1000, train_accuracy = 79 % test_accuracy = 79.68 %
step  2000, train_accuracy = 78 % test_accuracy = 83.16 %
step  3000, train_accuracy = 95 % test_accuracy = 91.42 %
step  4000, train_accuracy = 94 % test_accuracy = 93.43 %
step  5000, train_accuracy = 92 % test_accuracy = 94.02 %
step  6000, train_accuracy = 97 % test_accuracy = 94.67 %
step  7000, train_accuracy = 98 % test_accuracy = 95.39 %
step  8000, train_accuracy = 94 % test_accuracy = 95.68 %
step  9000, train_accuracy = 98 % test_accuracy = 95.97 %
------------------------------------
training time:  111.9186839  s
final accuracy = 96.3 %


In [9]:
# data/fashion && BiRNN version 2
start = time.clock()
test_X = mnist.test.images
test_X = test_X.reshape([test_size, n_steps, n_input])
test_Y = mnist.test.labels
for i in range(10000):
    batch_X, batch_Y = mnist.train.next_batch(batch_size)
    batch_X = batch_X.reshape([batch_size, n_steps, n_input])
    if i % display_step == 0:
        train_accuracy = accuracy.eval(session=sess, feed_dict={X:batch_X, Y_:batch_Y})
        test_accuracy = accuracy.eval(session=sess, feed_dict={X:test_X, Y_:test_Y})
        print("step %5d, train_accuracy = %.4g %% test_accuracy = %.4g %%" % (i, train_accuracy*100, test_accuracy*100))
    train_step.run(session=sess, feed_dict={X:batch_X, Y_:batch_Y})
print("training time: ", time.clock()-start, " s")

step     0, train_accuracy = 8 % test_accuracy = 10 %
step  1000, train_accuracy = 76 % test_accuracy = 71.97 %
step  2000, train_accuracy = 77 % test_accuracy = 73.93 %
step  3000, train_accuracy = 85 % test_accuracy = 80.02 %
step  4000, train_accuracy = 80 % test_accuracy = 81.22 %
step  5000, train_accuracy = 82 % test_accuracy = 81.64 %
step  6000, train_accuracy = 90 % test_accuracy = 82.28 %
step  7000, train_accuracy = 82 % test_accuracy = 82.92 %
step  8000, train_accuracy = 85 % test_accuracy = 83.3 %
step  9000, train_accuracy = 88 % test_accuracy = 83.61 %
training time:  114.0431472  s
