In [27]:
import sys
import os
import time
import tensorflow as tf
from tensorflow.models.rnn import rnn_cell

In [9]:
sys.path.append(os.pardir)
from utils.mixins import NNMixin, TrainMixin
from utils import ymr_data

In [119]:
# Parameters
# ==================================================

# Model Hyperparameters
SENTENCE_LENGTH_PADDED = 64
HIDDEN_DIM = 200
EMBEDDING_SIZE = 128

# Training parameters
NUM_EPOCHS = 50
BATCH_SIZE = 64
EVALUATE_EVERY = 16
NUM_CLASSES = 2

In [113]:
train_x, train_y, dev_x, dev_y, test_x, test_y = ymr_data.generate_dataset(fixed_length=SENTENCE_LENGTH_PADDED)
VOCABULARY_SIZE = max(train_x.max(), dev_x.max(), test_x.max()) + 1
print("\ntrain/dev/test size: {:d}/{:d}/{:d}\n".format(len(train_y), len(dev_y), len(test_y)))


train/dev/test size: 29017/1528/7637



In [124]:
class CharRNN(object, NNMixin, TrainMixin):
    
    def __init__(self, vocabulary_size, sequence_length, batch_size, num_classes,
                 embedding_size=128, hidden_dim=256,num_gpus=1, cell=None, loss="linear_gain"):
        
        self.input_x = tf.placeholder(tf.int32, [batch_size, sequence_length])
        self.input_y = tf.placeholder(tf.float32, [batch_size, num_classes])
        
        if not cell:
            self.cell = rnn_cell.LSTMCell(hidden_dim, embedding_size, use_peepholes=True)
        
        with tf.variable_scope("embedding"):
            self.embedded_chars = self._build_embedding([vocabulary_size, embedding_size], self.input_x)
        
        with tf.variable_scope("rnn") as scope:
            self.state = tf.Variable(tf.zeros([batch_size, self.cell.state_size]))
            self.outputs = []
            self.states = [self.state]
            for i in range(sequence_length):
                if i > 0:
                    scope.reuse_variables()
                new_output, new_state = self.cell(self.embedded_chars[:, i, :], self.states[-1])
                self.outputs.append(new_output)
                self.states.append(new_state)
                
            self.final_state = self.states[-1]
            self.final_output = self.outputs[-1]
        
        with tf.variable_scope("softmax"):
            self.ys = [self._build_softmax([hidden_dim, num_classes], o) for o in self.outputs]
            self.y = self.ys[-1]
        
        if loss == "linear_gain":
             # Loss with linear gain. We output at each time step and multiply losses with a linspace
            packed_ys = tf.pack(self.ys)
            tiled_labels = tf.pack([self.input_y for i in range(sequence_length)])
            accumulated_losses = -tf.reduce_sum(tiled_labels * tf.log(packed_ys), [1,2])
            loss_gains = tf.linspace(0.0, 1.0, sequence_length)
            annealed_losses = tf.mul(loss_gains, tf.concat(0, accumulated_losses))
            accumulated_loss = tf.reduce_sum(annealed_losses)
            self.loss = accumulated_loss
            self.mean_loss = tf.reduce_mean(annealed_losses)
        elif loss == "last":        
            # Standard loss, only last output is considered
            self.loss = self._build_total_ce_loss(self.ys[-1], self.input_y)
            self._build_mean_ce_loss(self.ys[-1], self.input_y)

        # Summaries
        total_loss_summary = tf.scalar_summary("total loss", self.loss)
        mean_loss_summary = tf.scalar_summary("mean loss", self.mean_loss)
        accuracy_summmary = tf.scalar_summary("accuracy", self._build_accuracy(self.y, self.input_y))
        self.summaries = tf.merge_all_summaries()


In [125]:
with tf.Graph().as_default():
    session_conf = tf.ConfigProto(allow_soft_placement=True)
    sess = tf.Session(config=session_conf)
    with sess.as_default():
        # Instantiate our model
        rnn = CharRNN(VOCABULARY_SIZE, SENTENCE_LENGTH_PADDED, BATCH_SIZE, 2)

        # Generate input batches (using tensorflow)
        with tf.variable_scope("input"):
            placeholder_x = tf.placeholder(tf.int32, train_x.shape)
            placeholder_y = tf.placeholder(tf.float32, train_y.shape)
            train_x_var = tf.Variable(placeholder_x, trainable=False, collections=[])
            train_y_var = tf.Variable(placeholder_y, trainable=False, collections=[])
            x_slice, y_slice = tf.train.slice_input_producer([train_x_var, train_y_var], num_epochs=NUM_EPOCHS)
            x_batch, y_batch = tf.train.batch([x_slice, y_slice], batch_size=BATCH_SIZE)

        # Define Training procedure
        out_dir = os.path.join(os.path.curdir, "runs", str(int(time.time())))
        global_step = tf.Variable(0, name="global_step")
        optimizer = tf.train.AdamOptimizer(1e-4)
        # Clip the gradients
        tvars = tf.trainable_variables()
        grads, _ = tf.clip_by_global_norm(tf.gradients(rnn.loss, tvars), 5)
        train_op = optimizer.apply_gradients(zip(grads, tvars), global_step=global_step)
        
        # Generate train and eval seps
        train_step = rnn.build_train_step(out_dir, train_op, global_step, rnn.summaries, save_every=8, sess=sess)
        eval_step = rnn.build_eval_step(out_dir, global_step, rnn.summaries, sess=sess)

        # Initialize variables and input data
        sess.run(tf.initialize_all_variables())
        sess.run([train_x_var.initializer, train_y_var.initializer], {placeholder_x: train_x, placeholder_y: train_y})

        # Initialize queues
        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(sess=sess, coord=coord)

        # Print model parameters
        # rnn.print_parameters()

        # Repeat until we're done (the input queue throws an error)...
        try:
            while not coord.should_stop():
                train_step({rnn.input_x: x_batch.eval(), rnn.input_y: y_batch.eval()})
                if global_step.eval() % EVALUATE_EVERY == 0:
                    eval_step({rnn.input_x: dev_x[:BATCH_SIZE], rnn.input_y: dev_y[:BATCH_SIZE]})
        except tf.errors.OutOfRangeError:
            print("Yay, training done!")
            eval_step({rnn.input_x: dev_x, rnn.input_y: dev_y})
        finally:
            coord.request_stop()
        coord.join(threads)

ValueError: Cannot feed value of shape (64, 128) for Tensor u'Placeholder:0', which has shape (Dimension(64), Dimension(64))