# Imports

In [1]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import time

import numpy as np
import tensorflow as tf

from tensorflow.models.rnn import rnn_cell
from tensorflow.models.rnn.seq2seq import sequence_loss_by_example

# parses the dataset
import ptb_reader

# Dataset

In [2]:
train_data, valid_data, test_data, _ = ptb_reader.ptb_raw_data("ptb")

# Model

In [3]:
class PTBModel(object):
    def __init__(self, CellType, is_training, config):
        self.batch_size = batch_size = config.batch_size
        self.num_steps = num_steps = config.num_steps
        size = config.hidden_size
        vocab_size = config.vocab_size

        self.input_data = tf.placeholder(tf.int32, [batch_size, num_steps])
        self.targets = tf.placeholder(tf.int32, [batch_size, num_steps])

        lstm_cell = CellType(size)
        if is_training and config.keep_prob < 1:
            lstm_cell = rnn_cell.DropoutWrapper(lstm_cell, output_keep_prob=config.keep_prob)
        cell = rnn_cell.MultiRNNCell([lstm_cell] * config.num_layers)
        self.initial_state = cell.zero_state(batch_size, tf.float32)
        
        # initializer used for reusable variable initializer (see `get_variable`)
        initializer = tf.random_uniform_initializer(-config.init_scale, config.init_scale)

        with tf.device("/cpu:0"):
            embedding = tf.get_variable("embedding", [vocab_size, size], initializer=initializer)
            inputs = tf.nn.embedding_lookup(embedding, self.input_data)

        if is_training and config.keep_prob < 1:
            inputs = tf.nn.dropout(inputs, config.keep_prob)

        outputs = []
        states = []
        state = self.initial_state

        with tf.variable_scope("RNN", initializer=initializer):
            for time_step in range(num_steps):
                if time_step > 0:
                    tf.get_variable_scope().reuse_variables()

                inputs_slice = inputs[:,time_step,:]
                (cell_output, state) = cell(inputs_slice, state)

                outputs.append(cell_output)
                states.append(state)

        self.final_state = states[-1]

        output = tf.reshape(tf.concat(1, outputs), [-1, size])
        w = tf.get_variable("softmax_w",
                                    [size, vocab_size],
                                    initializer=initializer)
        b = tf.get_variable("softmax_b", [vocab_size], initializer=initializer)

        logits = tf.nn.xw_plus_b(output, w, b) # compute logits for loss
        targets = tf.reshape(self.targets, [-1]) # reshape our target outputs
        weights = tf.ones([batch_size * num_steps]) # used to scale the loss average

        # computes loss and performs softmax on our fully-connected output layer
        loss = sequence_loss_by_example([logits], [targets], [weights], vocab_size)
        self.cost = cost = tf.reduce_sum(loss) / batch_size

        if is_training:
            # define training operation and clip the gradients
            tvars = tf.trainable_variables()
            grads, _ = tf.clip_by_global_norm(tf.gradients(cost, tvars), config.max_grad_norm)
            optimizer = tf.train.MomentumOptimizer(config.learning_rate, config.momentum)
            self.train_op = optimizer.apply_gradients(zip(grads, tvars))
        else:
            # if this model isn't for training (i.e. testing/validation) then we don't do anything here
            self.train_op = tf.no_op()

# Epoch

In [4]:
def run_epoch(sess, model, data, is_training=False, verbose=False):
    epoch_size = ((len(data) // model.batch_size) - 1) // model.num_steps
    start_time = time.time()

    # accumulated counts
    costs = 0.0
    iters = 0

    # initial RNN state
    state = model.initial_state.eval()

    for step, (x, y) in enumerate(ptb_reader.ptb_iterator(data, model.batch_size, model.num_steps)):
        cost, state, _ = sess.run([model.cost, model.final_state, model.train_op], {
            model.input_data: x,
            model.targets: y,
            model.initial_state: state
        })
        costs += cost
        iters += model.num_steps

        perplexity = np.exp(costs / iters)

        if verbose and step % 10 == 0:
            progress = (step / epoch_size) * 100
            wps = iters * model.batch_size / (time.time() - start_time)
            print("%.1f%% perplexity: %.3f speed: %.0f wps" % (progress, perplexity, wps))

    return perplexity

# Config

In [5]:
class Config(object):
    batch_size = 50
    num_steps = 30
    hidden_size = 200
    vocab_size = 10000
    learning_rate = 1e-1
    momentum = 0.9
    max_grad_norm = 5
    init_scale = 0.1
    keep_prob = 0.5
    num_layers = 2

In [6]:
# default settings for training
train_config = Config()

# our evaluation runs (validation and testing), use a batch size and time step of one
eval_config = Config()
eval_config.batch_size = 1
eval_config.num_steps = 1

# number of training epochs to perform over the training data
num_epochs = 10

# Cell Variant

In [None]:
# here we import and specify our cell variant
# (all variants are subclasses of tensorflow.models.rnn.rnn_cell.RNNCell)
from variants.vanilla import VanillaLSTMCell
CellType = VanillaLSTMCell

In [None]:
with tf.Graph().as_default(), tf.Session() as sess:
    # define our training model
    with tf.variable_scope("model", reuse=None):
        train_model = PTBModel(CellType, is_training=True, config=train_config)

    # we create a separate model for validation and testing to alter the batch size and time steps
    # reuse=True reuses variables from the previously defined `train_model`
    with tf.variable_scope("model", reuse=True):
        valid_model = PTBModel(CellType, is_training=False, config=train_config)
        test_model = PTBModel(CellType, is_training=False, config=eval_config)

    sess.run(tf.initialize_all_variables())

    for i in range(num_epochs):
        # run training pass
        train_perplexity = run_epoch(sess, train_model, train_data, verbose=True)
        print("%i training complete, perplexity: %.3f" % (i, train_perplexity))

        # run validation pass
        valid_perplexity = run_epoch(sess, valid_model, valid_data)
        print("%i validation complete, perplexity: %.3f" % (i, valid_perplexity))

    # run test pass
    test_perplexity = run_epoch(sess, test_model, test_data)
    print("testing complete, perplexity: %.3f" % (test_perplexity,))

0.0% perplexity: 10029.717 speed: 615 wps
1.6% perplexity: 6048.330 speed: 839 wps
3.2% perplexity: 3008.970 speed: 852 wps
4.8% perplexity: 2130.620 speed: 855 wps
6.5% perplexity: 1702.811 speed: 857 wps
8.1% perplexity: 1489.620 speed: 861 wps
9.7% perplexity: 1351.585 speed: 862 wps
11.3% perplexity: 1248.888 speed: 865 wps
12.9% perplexity: 1175.546 speed: 866 wps