In [1]:
import numpy as np
import tensorflow as tf

In [2]:
data_path = "data/sentiment_analysis/"
BATCH_SIZE = 48
SEQ_LENGTH = 100
EMB_DIM = 32 # embedding dimension
HIDDEN_DIM = 64 # hidden state dimension of lstm cell
emb_dict_file = data_path + "sst_vocab.txt"
positive_file = data_path + "sst_pos_sentences_id.txt"
negative_file = data_path + "sst_neg_sentences_id.txt"
EPOCH_NUM = 1000
graph_path = "./graphs/implementation_2_graph"

In [3]:
class Dataloader():
    def __init__(self, batch_size, max_length = 100):
        self.batch_size = batch_size
        self.sentences = np.array([])
        self.labels = np.array([])
        self.max_length = max_length

    def load_train_data(self, positive_file, negative_file):
        # Load data
        positive_examples = []
        negative_examples = []
        with open(positive_file)as fin:
            for line in fin:
                line = line.strip()
                line = line.split()
                parse_line = [int(x) for x in line]
                parse_line.extend([0] * (self.max_length - len(parse_line)))
                if len(parse_line) == self.max_length:
                    positive_examples.append(parse_line)
        with open(negative_file)as fin:
            for line in fin:
                line = line.strip()
                line = line.split()
                parse_line = [int(x) for x in line]
                parse_line.extend([0] * (self.max_length - len(parse_line)))
                if len(parse_line) == self.max_length:
                    negative_examples.append(parse_line)
        self.sentences = np.array(positive_examples + negative_examples)

        # Generate labels
        # positive_labels = [1 for _ in positive_examples]
        # negative_labels = [0 for _ in negative_examples]
        positive_labels = [[0, 1] for _ in positive_examples]
        negative_labels = [[1, 0] for _ in negative_examples]
        self.labels = np.concatenate([positive_labels, negative_labels], 0)
        # self.labels = positive_labels + negative_labels

        # Shuffle the data
        shuffle_indices = np.random.permutation(np.arange(len(self.labels)))
        self.sentences = self.sentences[shuffle_indices]
        self.labels = self.labels[shuffle_indices]

        # Split batches
        self.num_batch = int(len(self.labels) / self.batch_size)
        self.sentences = self.sentences[:self.num_batch * self.batch_size]
        self.labels = self.labels[:self.num_batch * self.batch_size]
        self.sentences_batches = np.split(self.sentences, self.num_batch, 0)
        self.labels_batches = np.split(self.labels, self.num_batch, 0)

        self.pointer = 0


    def next_batch(self):
        ret = self.sentences_batches[self.pointer], self.labels_batches[self.pointer]
        self.pointer = (self.pointer + 1) % (self.num_batch - 5)
        return ret

    def test_batch(self):#Preserve part of dataset for testing
        ret = self.sentences_batches[self.num_batch - 1], self.labels_batches[self.num_batch - 1]
        return ret

    def reset_pointer(self):
        self.pointer = 0

In [4]:
class Detection:
    def __init__(self, sequence_length, batch_size, vocab_size, emb_dim, hidden_dim = 128, output_keep_prob=0.7):
        self.num_emb = vocab_size  # vocab size
        self.batch_size = batch_size  # batch size
        self.emb_dim = emb_dim  # dimision of embedding
        self.hidden_dim = hidden_dim  # hidden size
        self.sequence_length = sequence_length  # sequence length
        self.output_dim = 2
        self.output_keep_prob = output_keep_prob #to prevent overfit
        with tf.variable_scope("placeholder"):
            self.x = tf.placeholder(shape=[self.batch_size, self.sequence_length], dtype=tf.int32)
            self.targets = tf.placeholder(shape=[self.batch_size, self.output_dim], dtype=tf.int64)
        with tf.variable_scope("embedding"):
            self.g_embeddings = tf.Variable(tf.random_uniform([self.num_emb, self.emb_dim], -1.0, 1.0), name="W_text")
            self.inputs= tf.nn.embedding_lookup(self.g_embeddings, self.x)  # seq_length x batch_size x emb_dim
        with tf.variable_scope("rnn"):
            cell_bw = tf.contrib.rnn.BasicLSTMCell(self.hidden_dim, state_is_tuple=False)  # single lstm unit
            cell_bw = tf.contrib.rnn.DropoutWrapper(cell_bw, output_keep_prob=self.output_keep_prob)
            cell_fw = tf.contrib.rnn.BasicLSTMCell(self.hidden_dim, state_is_tuple=False)  # single lstm unit
            cell_fw = tf.contrib.rnn.DropoutWrapper(cell_fw, output_keep_prob=self.output_keep_prob)
        with tf.variable_scope("output"):
            self.outputs, self.states = tf.nn.bidirectional_dynamic_rnn(cell_bw, cell_fw, self.inputs, dtype=tf.float32)
            self.outputs = tf.reshape(self.outputs, shape=[-1, self.sequence_length, self.hidden_dim])
            self.outputs = tf.transpose(self.outputs, perm=[1, 0, 2])  # [batch_size,seq_length]
            self.outputs = tf.reduce_mean(self.outputs, 0)
            self.outputs = self.outputs[:self.batch_size] + self.outputs[self.batch_size:]
            self.logits = tf.layers.dense(self.outputs, self.output_dim, name="logits")
            self.prob = tf.nn.softmax(self.logits, name="softmax_output")
        with tf.variable_scope("train"):
            self.loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=self.targets, logits=self.logits))
            tvars = tf.trainable_variables()
            max_grad_norm = 5
            # We clip the gradients to prevent explosion
            grads, _ = tf.clip_by_global_norm(tf.gradients(self.loss, tvars), max_grad_norm)
            gradients = list(zip(grads, tvars))
            self.train_op = tf.train.AdamOptimizer(0.005).apply_gradients(gradients)
        with tf.variable_scope("accuracy"):
            self.accuracy = tf.equal(tf.argmax(self.targets, axis=1), tf.argmax(self.prob, axis=1))
            
    def train(self, sess, x_batch, y_batch):
        _, loss = sess.run([self.train_op, self.loss], feed_dict={self.x:x_batch, self.targets:y_batch})
        return loss

    def predict(self, sess, x_batch):
        prob = sess.run([self.prob], feed_dict={self.x:x_batch})
        return prob

    def get_accuracy(self, sess, x_batch, y_batch):
        accuracy = sess.run([self.accuracy], feed_dict={self.x: x_batch, self.targets: y_batch})
        return (accuracy[0].tolist().count(True) / len(x_batch))

In [5]:
def load_emb_data(emb_dict_file):
    word_dict = {}
    word_list = []
    item = 0
    with open(emb_dict_file, 'r', encoding="utf-8") as f:
        lines = f.readlines()
        for line in lines:
            word = line.strip()
            word_dict[word] = item
            item += 1
            word_list.append(word)
    length = len(word_dict)
    print("Load embedding success! Num: %d" % length)
    return word_dict, length, word_list

In [6]:
if __name__ == "__main__":
    vocab_dict, vocab_size, vocab_list = load_emb_data(emb_dict_file)
    dis_data_loader = Dataloader(BATCH_SIZE, SEQ_LENGTH)
    dis_data_loader.load_train_data(positive_file, negative_file)
    detection = Detection(SEQ_LENGTH, BATCH_SIZE, vocab_size, EMB_DIM, HIDDEN_DIM)
    test_x_batch, test_y_batch = dis_data_loader.test_batch()
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        writer = tf.summary.FileWriter(graph_path, sess.graph)
        writer.close()
        for i in range(EPOCH_NUM):
            x_batch, y_batch = dis_data_loader.next_batch()
            loss = detection.train(sess, x_batch, y_batch)
            if (i % 20 == 0):
                accuracy = detection.get_accuracy(sess, test_x_batch, test_y_batch)
                print("%d, loss:%f, accuracy:%f" % (i, loss, accuracy))

Load embedding success! Num: 4734
Instructions for updating:
This class is deprecated, please use tf.nn.rnn_cell.LSTMCell, which supports all the feature this cell currently has. Please replace the existing code with tf.nn.rnn_cell.LSTMCell(name='basic_lstm_cell').
Instructions for updating:

Future major versions of TensorFlow will allow gradients to flow
into the labels input on backprop by default.

See `tf.nn.softmax_cross_entropy_with_logits_v2`.



InternalError: Blas GEMM launch failed : a.shape=(48, 96), b.shape=(96, 256), m=48, n=256, k=96
	 [[node output/bidirectional_rnn/fw/fw/while/basic_lstm_cell/MatMul (defined at <ipython-input-4-c47a1050fae4>:22)  = MatMul[T=DT_FLOAT, transpose_a=false, transpose_b=false, _device="/job:localhost/replica:0/task:0/device:GPU:0"](output/bidirectional_rnn/fw/fw/while/basic_lstm_cell/concat, output/bidirectional_rnn/fw/fw/while/basic_lstm_cell/MatMul/Enter)]]
	 [[{{node train/Mean/_23}} = _Recv[client_terminated=false, recv_device="/job:localhost/replica:0/task:0/device:CPU:0", send_device="/job:localhost/replica:0/task:0/device:GPU:0", send_device_incarnation=1, tensor_name="edge_1108_train/Mean", tensor_type=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:CPU:0"]()]]

Caused by op 'output/bidirectional_rnn/fw/fw/while/basic_lstm_cell/MatMul', defined at:
  File "C:\ProgramData\Anaconda3\lib\runpy.py", line 193, in _run_module_as_main
    "__main__", mod_spec)
  File "C:\ProgramData\Anaconda3\lib\runpy.py", line 85, in _run_code
    exec(code, run_globals)
  File "C:\ProgramData\Anaconda3\lib\site-packages\ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "C:\ProgramData\Anaconda3\lib\site-packages\traitlets\config\application.py", line 658, in launch_instance
    app.start()
  File "C:\ProgramData\Anaconda3\lib\site-packages\ipykernel\kernelapp.py", line 486, in start
    self.io_loop.start()
  File "C:\ProgramData\Anaconda3\lib\site-packages\tornado\platform\asyncio.py", line 127, in start
    self.asyncio_loop.run_forever()
  File "C:\ProgramData\Anaconda3\lib\asyncio\base_events.py", line 422, in run_forever
    self._run_once()
  File "C:\ProgramData\Anaconda3\lib\asyncio\base_events.py", line 1432, in _run_once
    handle._run()
  File "C:\ProgramData\Anaconda3\lib\asyncio\events.py", line 145, in _run
    self._callback(*self._args)
  File "C:\ProgramData\Anaconda3\lib\site-packages\tornado\ioloop.py", line 759, in _run_callback
    ret = callback()
  File "C:\ProgramData\Anaconda3\lib\site-packages\tornado\stack_context.py", line 276, in null_wrapper
    return fn(*args, **kwargs)
  File "C:\ProgramData\Anaconda3\lib\site-packages\zmq\eventloop\zmqstream.py", line 536, in <lambda>
    self.io_loop.add_callback(lambda : self._handle_events(self.socket, 0))
  File "C:\ProgramData\Anaconda3\lib\site-packages\zmq\eventloop\zmqstream.py", line 450, in _handle_events
    self._handle_recv()
  File "C:\ProgramData\Anaconda3\lib\site-packages\zmq\eventloop\zmqstream.py", line 480, in _handle_recv
    self._run_callback(callback, msg)
  File "C:\ProgramData\Anaconda3\lib\site-packages\zmq\eventloop\zmqstream.py", line 432, in _run_callback
    callback(*args, **kwargs)
  File "C:\ProgramData\Anaconda3\lib\site-packages\tornado\stack_context.py", line 276, in null_wrapper
    return fn(*args, **kwargs)
  File "C:\ProgramData\Anaconda3\lib\site-packages\ipykernel\kernelbase.py", line 283, in dispatcher
    return self.dispatch_shell(stream, msg)
  File "C:\ProgramData\Anaconda3\lib\site-packages\ipykernel\kernelbase.py", line 233, in dispatch_shell
    handler(stream, idents, msg)
  File "C:\ProgramData\Anaconda3\lib\site-packages\ipykernel\kernelbase.py", line 399, in execute_request
    user_expressions, allow_stdin)
  File "C:\ProgramData\Anaconda3\lib\site-packages\ipykernel\ipkernel.py", line 208, in do_execute
    res = shell.run_cell(code, store_history=store_history, silent=silent)
  File "C:\ProgramData\Anaconda3\lib\site-packages\ipykernel\zmqshell.py", line 537, in run_cell
    return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
  File "C:\ProgramData\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 2662, in run_cell
    raw_cell, store_history, silent, shell_futures)
  File "C:\ProgramData\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 2785, in _run_cell
    interactivity=interactivity, compiler=compiler, result=result)
  File "C:\ProgramData\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 2903, in run_ast_nodes
    if self.run_code(code, result):
  File "C:\ProgramData\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 2963, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-6-ae1613e45622>", line 5, in <module>
    detection = Detection(SEQ_LENGTH, BATCH_SIZE, vocab_size, EMB_DIM, HIDDEN_DIM)
  File "<ipython-input-4-c47a1050fae4>", line 22, in __init__
    self.outputs, self.states = tf.nn.bidirectional_dynamic_rnn(cell_bw, cell_fw, self.inputs, dtype=tf.float32)
  File "C:\ProgramData\Anaconda3\lib\site-packages\tensorflow\python\ops\rnn.py", line 439, in bidirectional_dynamic_rnn
    time_major=time_major, scope=fw_scope)
  File "C:\ProgramData\Anaconda3\lib\site-packages\tensorflow\python\ops\rnn.py", line 664, in dynamic_rnn
    dtype=dtype)
  File "C:\ProgramData\Anaconda3\lib\site-packages\tensorflow\python\ops\rnn.py", line 872, in _dynamic_rnn_loop
    swap_memory=swap_memory)
  File "C:\ProgramData\Anaconda3\lib\site-packages\tensorflow\python\ops\control_flow_ops.py", line 3291, in while_loop
    return_same_structure)
  File "C:\ProgramData\Anaconda3\lib\site-packages\tensorflow\python\ops\control_flow_ops.py", line 3004, in BuildLoop
    pred, body, original_loop_vars, loop_vars, shape_invariants)
  File "C:\ProgramData\Anaconda3\lib\site-packages\tensorflow\python\ops\control_flow_ops.py", line 2939, in _BuildLoop
    body_result = body(*packed_vars_for_body)
  File "C:\ProgramData\Anaconda3\lib\site-packages\tensorflow\python\ops\control_flow_ops.py", line 3260, in <lambda>
    body = lambda i, lv: (i + 1, orig_body(*lv))
  File "C:\ProgramData\Anaconda3\lib\site-packages\tensorflow\python\ops\rnn.py", line 840, in _time_step
    (output, new_state) = call_cell()
  File "C:\ProgramData\Anaconda3\lib\site-packages\tensorflow\python\ops\rnn.py", line 826, in <lambda>
    call_cell = lambda: cell(input_t, state)
  File "C:\ProgramData\Anaconda3\lib\site-packages\tensorflow\python\ops\rnn_cell_impl.py", line 1282, in __call__
    output, new_state = self._cell(inputs, state, scope=scope)
  File "C:\ProgramData\Anaconda3\lib\site-packages\tensorflow\python\ops\rnn_cell_impl.py", line 370, in __call__
    *args, **kwargs)
  File "C:\ProgramData\Anaconda3\lib\site-packages\tensorflow\python\layers\base.py", line 374, in __call__
    outputs = super(Layer, self).__call__(inputs, *args, **kwargs)
  File "C:\ProgramData\Anaconda3\lib\site-packages\tensorflow\python\keras\engine\base_layer.py", line 757, in __call__
    outputs = self.call(inputs, *args, **kwargs)
  File "C:\ProgramData\Anaconda3\lib\site-packages\tensorflow\python\ops\rnn_cell_impl.py", line 748, in call
    array_ops.concat([inputs, h], 1), self._kernel)
  File "C:\ProgramData\Anaconda3\lib\site-packages\tensorflow\python\ops\math_ops.py", line 2057, in matmul
    a, b, transpose_a=transpose_a, transpose_b=transpose_b, name=name)
  File "C:\ProgramData\Anaconda3\lib\site-packages\tensorflow\python\ops\gen_math_ops.py", line 4857, in mat_mul
    name=name)
  File "C:\ProgramData\Anaconda3\lib\site-packages\tensorflow\python\framework\op_def_library.py", line 787, in _apply_op_helper
    op_def=op_def)
  File "C:\ProgramData\Anaconda3\lib\site-packages\tensorflow\python\util\deprecation.py", line 488, in new_func
    return func(*args, **kwargs)
  File "C:\ProgramData\Anaconda3\lib\site-packages\tensorflow\python\framework\ops.py", line 3274, in create_op
    op_def=op_def)
  File "C:\ProgramData\Anaconda3\lib\site-packages\tensorflow\python\framework\ops.py", line 1770, in __init__
    self._traceback = tf_stack.extract_stack()

InternalError (see above for traceback): Blas GEMM launch failed : a.shape=(48, 96), b.shape=(96, 256), m=48, n=256, k=96
	 [[node output/bidirectional_rnn/fw/fw/while/basic_lstm_cell/MatMul (defined at <ipython-input-4-c47a1050fae4>:22)  = MatMul[T=DT_FLOAT, transpose_a=false, transpose_b=false, _device="/job:localhost/replica:0/task:0/device:GPU:0"](output/bidirectional_rnn/fw/fw/while/basic_lstm_cell/concat, output/bidirectional_rnn/fw/fw/while/basic_lstm_cell/MatMul/Enter)]]
	 [[{{node train/Mean/_23}} = _Recv[client_terminated=false, recv_device="/job:localhost/replica:0/task:0/device:CPU:0", send_device="/job:localhost/replica:0/task:0/device:GPU:0", send_device_incarnation=1, tensor_name="edge_1108_train/Mean", tensor_type=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:CPU:0"]()]]
