In [1]:
import tensorflow as tf
import math
import _pickle as cPickle
import gzip
import numpy as np
import random
import logging

logger = logging.getLogger()
fhandler = logging.FileHandler(filename='mylog.log', mode='a')
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
fhandler.setFormatter(formatter)
logger.addHandler(fhandler)
logger.setLevel(logging.DEBUG)

In [2]:
sequences_1 = cPickle.load(gzip.open(r"data_1.pickle.gz", "rb"))
sequences_2 = cPickle.load(gzip.open(r"data_2.pickle.gz", "rb"))
target = cPickle.load(gzip.open(r"one_hot_labels.pickle.gz", "rb"))
test_sequences_1 = cPickle.load(gzip.open(r"test_data_1.pickle.gz", "rb"))
test_sequences_2 = cPickle.load(gzip.open(r"test_data_2.pickle.gz", "rb"))
test_ids = cPickle.load(gzip.open(r"test_ids.pickle.gz", "rb"))

In [3]:
def sequence_lengths(sequences):
    lengths = tf.reduce_sum(tf.sign(sequences), axis = 1);
    return lengths;

def build_model(seq_1_length, seq_2_length, embedding_size = 100, lstm_size = 300, vocabulary_size = 100000):
    x1 = tf.placeholder(tf.int32, shape = [None, seq_1_length]);
    x2 = tf.placeholder(tf.int32, shape = [None, seq_2_length]);
    
    embeddings = tf.Variable(tf.truncated_normal([vocabulary_size, embedding_size],
                      stddev=1.0 / math.sqrt(embedding_size)));
    embedding_layer_1 = tf.nn.embedding_lookup(embeddings, x1);
    embedding_layer_2 = tf.nn.embedding_lookup(embeddings, x2);

    lstm_cell_1 = tf.contrib.rnn.LSTMCell(lstm_size);
    lstm_cell_2 = tf.contrib.rnn.LSTMCell(lstm_size);

    with tf.variable_scope("lstm_1"):
        _, (lstm_1_c_state, lstm_1_m_state) = tf.nn.dynamic_rnn(cell=lstm_cell_1,
                                     dtype = tf.float32,
                                     sequence_length = sequence_lengths(x1),
                                     inputs = embedding_layer_1,
                                     time_major = False);
    
    with tf.variable_scope("lstm_2"):
        _, (lstm_2_c_state, lstm_2_m_state) = tf.nn.dynamic_rnn(cell = lstm_cell_2,
                                     dtype = tf.float32,
                                     sequence_length = sequence_lengths(x2),
                                     inputs = embedding_layer_2,
                                     time_major = False);
    
    lstm_outputs = tf.concat([lstm_1_m_state, lstm_2_m_state], axis = 1);
    W = tf.Variable(tf.truncated_normal([2 * lstm_size, 2], stddev = 1.0/ math.sqrt(lstm_size)));
    b = tf.Variable(tf.truncated_normal([2], stddev = 1.0));
    softmax_inputs = tf.matmul(lstm_outputs, W) + b
    return x1, x2, softmax_inputs

In [4]:
x1, x2, softmax_inputs = build_model(seq_1_length = sequences_1.shape[1], seq_2_length = sequences_2.shape[1])
y_ = tf.placeholder(tf.float32, shape = (None, 2))

In [5]:
def create_loss(outputs, labels):
    softmax_outputs = tf.nn.softmax_cross_entropy_with_logits(labels=labels, logits=outputs)
    log_loss = tf.reduce_mean(softmax_outputs)
    return log_loss

log_loss = create_loss(softmax_inputs, y_)

In [6]:
def create_optimizer(loss):
    optimizer = tf.train.AdamOptimizer()
    minimize = optimizer.minimize(loss)
    return minimize

minimize = create_optimizer(log_loss)

In [7]:
init_op = tf.global_variables_initializer()
sess = tf.Session()
sess.run(init_op)

In [8]:
sequences = (sequences_1, sequences_2)
indexes = list(range(len(sequences_1)))
random.shuffle(indexes)
train_part = int(0.7 * len(indexes))
train_indexes, test_indexes = indexes[:train_part], indexes[train_part:]
train_1 = np.asarray([sequences_1[index] for index in train_indexes])
train_2 = np.asarray([sequences_2[index] for index in train_indexes])
test_1  = np.asarray([sequences_1[index] for index in test_indexes])
test_2  = np.asarray([sequences_2[index] for index in test_indexes])
train_target = np.asarray([target[index] for index in train_indexes], dtype=np.float32)
test_target =  np.asarray([target[index] for index in test_indexes], dtype=np.float32)

In [10]:
batch_size = 1000
epochs = 15
num_batches = int(len(sequences[0]) / batch_size)
for e in range(epochs):
    for i in range(num_batches):
        train_x1 = train_1[i * batch_size : (i + 1) * batch_size]
        train_x2 = train_2[i * batch_size : (i + 1) * batch_size]
        batch_target = train_target[i * batch_size : (i + 1) * batch_size]
        sess.run(minimize, {x1: train_x1, x2: train_x2, y_: batch_target})
    train_loss = sess.run(log_loss, {x1: train_1, x2: train_2, y_: train_target})
    test_loss = sess.run(log_loss, {x1 : test_1, x2 : test_2, y_ : test_target})
    logger.info('Epoch {:2d}, train error {:3.5f}, test error {:3.5f}'.format(e, train_loss, test_loss))

sess.close()

KeyboardInterrupt: 

In [14]:
sum(target)

149263

In [11]:
train_loss = sess.run(log_loss, {x1: train_1, x2: train_2, y_: train_target})

In [12]:
train_loss

1885804.0

In [12]:
print('{:3.5f}'.format(12.12345))

12.12345
