In [45]:
import sys
import os
import tensorflow as tf
from tensorflow.models.rnn import rnn_cell
from sklearn.cross_validation import train_test_split
sys.path.append(os.pardir)
from models.base import BaseNN
from utils import ymr_data
from models.trainer import Trainer

In [None]:
# Parameters
# ==================================================

# Model Hyperparameters
SENTENCE_LENGTH_PADDED = int(os.getenv("SENTENCE_LENGTH_PADDED", "128"))
HIDDEN_DIM = int(os.getenv("HIDDEN_DIM", "256"))
AFFINE_DIM = int(os.getenv("HIDDEN_DIM", "256"))
EMBEDDING_SIZE = int(os.getenv("EMBEDDING_SIZE", "128"))

# Training parameters
LEARNING_RATE = float(os.getenv("LEARNING_RATE", "1e-4"))
NUM_EPOCHS = int(os.getenv("NUM_EPOCHS", "100"))
BATCH_SIZE = int(os.getenv("BATCH_SIZE", "32"))
EVALUATE_EVERY = int(os.getenv("EVALUATE_EVERY", "16"))
NUM_CLASSES = 2

In [52]:
train_x, train_y, test_x, test_y = ymr_data.generate_dataset(fixed_length=SENTENCE_LENGTH_PADDED)
train_x, dev_x, train_y, dev_y = train_test_split(train_x, train_y, test_size=0.05)
VOCABULARY_SIZE = max(train_x.max(), dev_x.max(), test_x.max())
print("\ntrain/dev/test size: {:d}/{:d}/{:d}\n".format(len(train_y), len(dev_y), len(test_y)))


train/dev/test size: 29017/1528/7637



In [70]:
class CharRNN(BaseNN):
    
    def __init__(self, vocabulary_size, embedding_size=128, hidden_dim=256, batch_size=32, num_gpus=1,
                 cell=None, optimizer=None):
        self.vocabulary_size = vocabulary_size
        self.embedding_size = embedding_size
        self.hidden_dim = hidden_dim
        self.batch_size = batch_size
        self.num_gpus = num_gpus
        self.cell = cell if cell else rnn_cell.LSTMCell(hidden_dim, embedding_size)
        self.optimizer = optimizer if optimizer else tf.train.AdamOptimizer(1e-4)
    
    def inference(self, x, labels):
        sequence_length = x.get_shape().as_list()[1]
        num_classes = labels.get_shape().as_list()[1]
        
        with tf.variable_scope("embedding"):
            embedded_chars = nn.build_embedding_layer([self.vocabulary_size, self.embedding_size], x)
        
        with tf.variable_scope("rnn") as scope:
            state_init = tf.Variable(tf.zeros([1, self.cell.state_size]))
            state = tf.tile(state_init, [self.batch_size, 1])
            for i in range(sequence_length):
                if i > 0:
                    scope.reuse_variables()
                output, state = cell(embedded_chars[:, i, :], state)
            final_state = state
            final_output = output
        
        with tf.variable_scope("affine"):
            h_affine = self.build_affine([self.hidden_dim, self.hidden_dim], final_output)
        
        predictions = self.build_softmax([self.hidden_dim, num_classes], h_affine)
        return predictions


In [71]:
with tf.Graph().as_default():

    with tf.variable_scope("input"):
        placeholder_x = tf.placeholder(tf.int32, train_x.shape)
        placeholder_y = tf.placeholder(tf.float32, train_y.shape)
        train_x_var = tf.Variable(placeholder_x, trainable=False, collections=[])
        train_y_var = tf.Variable(placeholder_y, trainable=False, collections=[])
        x, labels = nn.build_input_batches(train_x_var, train_y_var, NUM_EPOCHS, BATCH_SIZE)
    
    rnn = CharRNN(
        VOCABULARY_SIZE,
        embedding_size=EMBEDDING_SIZE,
        hidden_dim=HIDDEN_DIM,
        batch_size=BATCH_SIZE,
    )
    
    predictions = rnn.inference(x, labels)
    
    with tf.variable_scope("loss"):
        loss = rnn.loss(predictions, labels)
    
    # Train
    global_step = tf.Variable(0, name="global_step")
    train_op = rnn.train(loss, global_step)
    
    with tf.variable_scope("metrics"):
        tf.scalar_summary("loss", loss)
        tf.scalar_summary("accuracy", rnn.accuracy(predictions, labels))
        summary_op = tf.merge_all_summaries()    
    
    sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True))
    with sess.as_default():
        # Create a train helper
        eval_feed_dict = {x: dev_x[:BATCH_SIZE], labels: dev_y[:BATCH_SIZE]}
        trainer = Trainer(
            train_op, global_step, summary_op, eval_feed_dict, evaluate_every=EVALUATE_EVERY,
            save_every=EVALUATE_EVERY)
        # Initialize Variables and input data
        sess.run(
            [tf.initialize_all_variables(), train_x_var.initializer, train_y_var.initializer],
            {placeholder_x: train_x, placeholder_y: train_y})
        # Initialize queues
        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(sess=sess, coord=coord)
        # Print model parameters
        rnn.print_parameters()
        try:
            while not coord.should_stop():
                trainer.step()
        except tf.errors.OutOfRangeError:
            print("Done!")
        finally:
            coord.request_stop()
        coord.join(threads)


Parameters:
----------
input/input_producer/input_producer/limit_epochs/epochs:0: 1
embedding/W:0: 479,360
rnn/Variable:0: 512
rnn/BasicLSTMCell/Linear/Matrix:0: 393,216
rnn/BasicLSTMCell/Linear/Bias:0: 1,024
affine/W:0: 65,536
affine/b:0: 256
W:0: 512
b:0: 2
global_step:0: 1
beta1_power:0: 1
beta2_power:0: 1
Total Parameters: 940,422

2015-11-21T13:37:43.650882: Step 1
2015-11-21T13:37:47.197433: Step 2
2015-11-21T13:37:50.739243: Step 3
2015-11-21T13:37:55.078635: Step 4
2015-11-21T13:37:58.674011: Step 5
2015-11-21T13:38:02.517531: Step 6
2015-11-21T13:38:06.754900: Step 7
2015-11-21T13:38:10.852787: Step 8
2015-11-21T13:38:14.467210: Step 9
2015-11-21T13:38:17.709150: Step 10
2015-11-21T13:38:21.173051: Step 11
2015-11-21T13:38:24.433918: Step 12
2015-11-21T13:38:27.650688: Step 13
2015-11-21T13:38:30.838809: Step 14
2015-11-21T13:38:34.810621: Step 15
2015-11-21T13:38:38.382810: Step 16

Saved model parameters to ./runs/1448109434/checkpoints/model-16

loss: 0.350186
accuracy: 0.

KeyboardInterrupt: 