In [1]:
import tensorflow as tf
import numpy as np
import pandas as pd
import utils
import random

  from ._conv import register_converters as _register_converters


In [2]:
# hyperparameters
iterations = 500
batch_size = 32
learning_rate = 0.001
reg_eta = 0.001

# dimensionalities
dim_lstm = 300
dim_word = 300
dim_aspect = 5
dim_aspect_embedding = 300
dim_sentence = 80
dim_polarity = 3

# setup utils object
u = utils.UTILS(batch_size, dim_sentence, dim_polarity)

In [3]:
# define tf placeholders
X = tf.placeholder(tf.int32, [None, dim_sentence])
y = tf.placeholder(tf.float32, [None, dim_polarity])
seqlen = tf.placeholder(tf.int32, [None])
aspects = tf.placeholder(tf.int32, [None])

In [4]:
# define tf variables
with tf.variable_scope('aspect_embedding_vars', reuse = tf.AUTO_REUSE):
    va = tf.get_variable(
        name = 'aspect_matrix_Va',
        shape = [dim_aspect, dim_aspect_embedding],
        initializer = tf.random_normal_initializer(0, 0.003),
        regularizer = tf.contrib.layers.l2_regularizer(reg_eta)
    )
    wv = tf.get_variable(
        name = 'aspect_Wv',
        shape = [dim_aspect_embedding, dim_aspect_embedding],
        initializer = tf.random_normal_initializer(0, 0.003),
        regularizer = tf.contrib.layers.l2_regularizer(reg_eta)
    )
with tf.variable_scope('attention_vars', reuse = tf.AUTO_REUSE):
    wh = tf.get_variable(
        name = 'M_tanh_Wh',
        shape = [dim_lstm, dim_lstm],
        initializer = tf.random_normal_initializer(0, 0.003),
        regularizer = tf.contrib.layers.l2_regularizer(reg_eta)
    )
    w = tf.get_variable(
        name = 'alpha_softmax_W',
        shape = [dim_lstm + dim_aspect_embedding, 1],
        initializer = tf.random_normal_initializer(0, 0.003),
        regularizer = tf.contrib.layers.l2_regularizer(reg_eta)
    )
    wp = tf.get_variable(
        name = 'hstar_tanh_Wp',
        shape = [dim_lstm, dim_lstm],
        initializer = tf.random_normal_initializer(0, 0.003),
        regularizer = tf.contrib.layers.l2_regularizer(reg_eta)
    )
    wx = tf.get_variable(
        name = 'hstar_tanh_Wx',
        shape = [dim_lstm, dim_lstm],
        initializer = tf.random_normal_initializer(0, 0.003),
        regularizer = tf.contrib.layers.l2_regularizer(reg_eta)
    )
with tf.variable_scope('output_softmax_vars', reuse = tf.AUTO_REUSE):
    ws = tf.get_variable(
        name = 'y_softmax_Ws',
        shape = [dim_lstm, dim_polarity],
        initializer = tf.random_normal_initializer(0, 0.003),
        regularizer = tf.contrib.layers.l2_regularizer(reg_eta)
    )
    bs = tf.get_variable(
        name = 'y_softmax_Bs',
        shape = [dim_polarity],
        initializer = tf.random_normal_initializer(0, 0.003),
        regularizer = tf.contrib.layers.l2_regularizer(reg_eta)
    )    

Instructions for updating:
Use the retry module or similar alternatives.


In [5]:
# define lstm model
def dynamic_lstm(inputs, seqlen, aspects):
    inputs = tf.nn.dropout(inputs, keep_prob=1.0)
    with tf.name_scope('lstm_model'):
        # slice the corresponding vai from va
        vai = tf.gather(va, aspects) # batch_size x dim_aspect_embedding
        lstm_cell = tf.contrib.rnn.LSTMCell(dim_lstm)
        H, state = tf.nn.dynamic_rnn(
            lstm_cell,
            inputs = inputs,
            sequence_length = seqlen,
            dtype = tf.float32,
            scope = 'lstm'
        )
        size = tf.shape(H)[0]
        wv_vai = tf.matmul(vai, wv) # batch_size x dim_aspect_embedding
        # stacking Wv x Va along sentence length
        wv_vai = [wv_vai for i in range(dim_sentence)]
        wv_vai_en = tf.stack(wv_vai, axis = 1) # batch_size x dim_sentence x dim_aspect_embedding
        wv_vai_en = tf.reshape(wv_vai_en, [-1, dim_aspect_embedding]) # (batch_size * dim_sentence) x dim_aspect_embedding
        H_1 = tf.reshape(H, [-1, dim_lstm]) # (batch_size * dim_sentence) x dim_lstm
        wh_H = tf.matmul(H_1, wh) # (batch_size * dim_sentence) x dim_lstm
        # concatenate wh_H and wv_va_En for inputting to tanh
        wh_H_wv_vai_en = tf.concat([wh_H, wv_vai_en], 1) # (batch_size * dim_sentence) x (dim_lstm + dim_aspect_embedding)
        M = tf.tanh(wh_H_wv_vai_en) # (batch_size * dim_sentence) x (dim_lstm + dim_aspect_embedding)
        alpha = tf.nn.softmax(tf.matmul(M, w)) # (batch_size * dim_sentence)
        alpha = tf.reshape(alpha, [-1, 1, dim_sentence]) # batch_size x 1 x dim_sentence
        index = tf.range(0, size) * dim_sentence + seqlen - 1 # batch_size
        hn = tf.gather(tf.reshape(H, [-1, dim_lstm]), index)  # batch_size x dim_lstm
        r = tf.reshape(tf.matmul(alpha, H), [-1, dim_lstm]) # batch_size x dim_lstm
        h_star = tf.tanh(tf.matmul(r, wp) + tf.matmul(hn, wx)) # batch_size x dim_lstm
        predict = tf.matmul(h_star, ws) + bs # batch x dim_polarity
    return predict


In [6]:
# define operations
# tf.reset_default_graph()
pred = dynamic_lstm(tf.nn.embedding_lookup(u.gloveDict, X), seqlen, aspects)
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits = pred, labels = y))
optimizer = tf.train.AdamOptimizer(learning_rate).minimize(loss)
correct = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))
init = tf.global_variables_initializer()

  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


In [None]:
# full dataset training
test_X, test_y, test_seqlen, test_aspects = u.getData('test')
train_X, train_y, train_seqlen, train_aspects = u.getData('train')
with tf.Session() as sess:
    sess.run(init)
    for i in range(iterations):
        sess.run(optimizer, feed_dict = {X: train_X, y: train_y, seqlen: train_seqlen, aspects: train_aspects})
#         if i > 0 and i % 4 == 0:
        loss_train, accuracy_train = sess.run([loss, accuracy], feed_dict = {X: train_X, y: train_y, seqlen: train_seqlen, aspects: train_aspects})
        print('step: %s, train loss: %s, train accuracy: %s' % (i, loss_train, accuracy_train))
        loss_test, accuracy_test = sess.run([loss, accuracy], feed_dict = {X: test_X, y: test_y, seqlen: test_seqlen, aspects: test_aspects})
        print('step: %s, test loss: %s, test accuracy: %s' % (i, loss_test, accuracy_test))

In [7]:
# batch training
test_X, test_y, test_seqlen, test_aspects = u.getData('test')
with tf.Session() as sess:
    sess.run(init)
    for i in range(iterations):
        batch_X, batch_y, batch_seqlen, batch_aspects = u.nextBatch(batch_size)
        sess.run(optimizer, feed_dict = {X: batch_X, y: batch_y, seqlen: batch_seqlen, aspects: batch_aspects})
        if i > 0 and i % 4 == 0:
            loss_train, accuracy_train = sess.run([loss, accuracy], feed_dict = {X: batch_X, y: batch_y, seqlen: batch_seqlen, aspects: batch_aspects})
            print('step: %s, train loss: %s, train accuracy: %s' % (i, loss_train, accuracy_train))
            loss_test, accuracy_test = sess.run([loss, accuracy], feed_dict = {X: test_X, y: test_y, seqlen: test_seqlen, aspects: test_aspects})
            print('step: %s, test loss: %s, test accuracy: %s' % (i, loss_test, accuracy_test))

step: 4, train loss: 1.0564857, train accuracy: 0.4375
step: 4, test loss: 1.0706103, test accuracy: 0.3134635
step: 8, train loss: 0.94235384, train accuracy: 0.5
step: 8, test loss: 1.0103799, test accuracy: 0.5529291
step: 12, train loss: 0.79138494, train accuracy: 0.625
step: 12, test loss: 0.9852434, test accuracy: 0.52312434
step: 16, train loss: 0.8214115, train accuracy: 0.59375
step: 16, test loss: 0.7984635, test accuracy: 0.672148
step: 20, train loss: 0.9011877, train accuracy: 0.59375
step: 20, test loss: 0.8904527, test accuracy: 0.6053443
step: 24, train loss: 0.8405943, train accuracy: 0.59375
step: 24, test loss: 0.83706146, test accuracy: 0.64028776
step: 28, train loss: 0.83595157, train accuracy: 0.59375
step: 28, test loss: 1.071208, test accuracy: 0.5508736
step: 32, train loss: 0.70952654, train accuracy: 0.71875
step: 32, test loss: 1.038372, test accuracy: 0.5601233
step: 36, train loss: 0.8234942, train accuracy: 0.625
step: 36, test loss: 0.77566665, test ac

step: 288, test loss: 0.5767456, test accuracy: 0.7677287
step: 292, train loss: 0.39509395, train accuracy: 0.875
step: 292, test loss: 0.6514637, test accuracy: 0.7379239
step: 296, train loss: 0.65705633, train accuracy: 0.8125
step: 296, test loss: 0.58328205, test accuracy: 0.7780062
step: 300, train loss: 0.32199064, train accuracy: 0.90625
step: 300, test loss: 0.60867393, test accuracy: 0.7677287
step: 304, train loss: 0.5829632, train accuracy: 0.75
step: 304, test loss: 0.8525316, test accuracy: 0.62898254
step: 308, train loss: 0.39177847, train accuracy: 0.90625
step: 308, test loss: 0.61435246, test accuracy: 0.7749229
step: 312, train loss: 0.6058456, train accuracy: 0.625
step: 312, test loss: 0.7019048, test accuracy: 0.7235355
step: 316, train loss: 0.61384773, train accuracy: 0.625
step: 316, test loss: 0.5856152, test accuracy: 0.76156217
step: 320, train loss: 0.42325252, train accuracy: 0.875
step: 320, test loss: 0.68316525, test accuracy: 0.7204522
step: 324, tra