In [234]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import time

import numpy as np
import tensorflow as tf

from tensorflow.models.rnn import seq2seq

from lstm_search.vanilla import VanillaLSTMCell
from lstm_search.nog import NOGLSTMCell
from lstm_search.nfg import NFGLSTMCell
from lstm_search.nig import NIGLSTMCell
from lstm_search.niaf import NIAFLSTMCell
from lstm_search.noaf import NOAFLSTMCell
from lstm_search.cifg import CIFGLSTMCell
from lstm_search.np import NPLSTMCell
from lstm_search.fgr import FGRLSTMCell

import ptb_reader

In [235]:
raw_data = ptb_reader.ptb_raw_data("datasets/ptb")
train_data, valid_data, test_data, _ = raw_data

In [236]:
batch_size = 30
num_steps = 20
hidden_size = 200
vocab_size = 10000
learning_rate = 1e-1
momentum = 0.9
max_grad_norm = 5
num_epochs = 10

In [237]:
def run_epoch(data_iterator, data_size, input_data, targets, init_state, ops):
    epoch_size = ((data_size // batch_size) - 1) // num_steps
    start_time = time.time()
    costs = 0.0
    iters = 0
    next_state = init_state.eval()

    for step, (x, y) in enumerate(data_iterator):
        cost, state, _ = sess.run(ops, {
            input_data: x,
            targets: y,
            init_state: next_state
        })
        
        next_state = state

        costs += cost
        iters += num_steps

        perplexity = np.exp(costs / iters)
        wps = (iters * batch_size) / (time.time() - start_time)

        if step % 10 == 0:
            print("%.1f%% perplexity: %.3f %.0f" % ((step / epoch_size) * 100, perplexity, wps))
            
    return perplexity

In [238]:
with tf.Graph().as_default(), tf.Session() as sess:
    # initializer used for reusable variable initializer (see `get_variable`)
    initializer = tf.random_uniform_initializer(-0.1, 0.1)

    # setup our inputs and target outputs
    input_data = tf.placeholder(tf.int32, [batch_size, num_steps])
    targets = tf.placeholder(tf.int32, [batch_size, num_steps])

    lstm_cell = VanillaLSTMCell(hidden_size)
    init_state = lstm_cell.zero_state(batch_size, tf.float32)

    # define embeddeding
    embedding = tf.get_variable("embedding",
                                [vocab_size, hidden_size],
                                initializer=initializer)
    inputs = tf.nn.embedding_lookup(embedding, input_data)

    # unroll our RNN by creating variables for each time-step:
    # by using `reuse_variables` we're sharing the appropriate
    # variables between each step
    outputs = []
    states = []
    state = init_state
    with tf.variable_scope("RNN"):
        for time_step in range(num_steps):
            if time_step > 0:
                tf.get_variable_scope().reuse_variables()

            cell_output, state = lstm_cell(inputs[:,time_step,:], state)

            outputs.append(cell_output)
            states.append(state)
    last_state = states[-1]

    # define fully-connected layer
    output = tf.reshape(tf.concat(1, outputs), [-1, hidden_size])
    logits = tf.nn.xw_plus_b(output,
                             tf.get_variable("softmax_w",
                                             [hidden_size, vocab_size],
                                             initializer=initializer),
                             tf.get_variable("softmax_b",
                                             [vocab_size],
                                             initializer=initializer))

    # define loss
    loss = seq2seq.sequence_loss_by_example([logits],
                                            [tf.reshape(targets, [-1])],
                                            [tf.ones([batch_size * num_steps])],
                                            vocab_size)
    cost_op = tf.reduce_sum(loss) / batch_size

    # define optimizer and gradient clipping
    tvars = tf.trainable_variables()
    grads, _ = tf.clip_by_global_norm(tf.gradients(cost_op, tvars), max_grad_norm)
    optimizer = tf.train.MomentumOptimizer(learning_rate, momentum)
    train_op = optimizer.apply_gradients(zip(grads, tvars))

    sess.run(tf.initialize_all_variables())

    for i in range(num_epochs):
        # training pass
        train_iter = ptb_reader.ptb_iterator(train_data, batch_size, num_steps)
        perplexity = run_epoch(train_iter,
                               len(train_data),
                               input_data, targets,
                               init_state,
                               [cost_op, last_state, train_op])
        print("%i training complete, perplexity: %.3f" % (i, perplexity))

        # validation pass
        valid_iter = ptb_reader.ptb_iterator(valid_data, 1, 1)
        run_epoch(valid_iter,
                  len(valid_data),
                  input_data,
                  targets,
                  init_state,
                  [cost_op, last_state, tf.no_op()])
        print("%i validation complete, perplexity: %.3f" % (i, perplexity))

    # test pass
    test_iter = ptb_reader.ptb_iterator(test_data, 1, 1)
    run_epoch(test_iter,
              len(test_data),
              input_data,
              targets,
              init_state,
              [cost_op, last_state, tf.no_op()])
    print("testing complete, perplexity: %.3f" % (perplexity,))

NameError: global name 'tf' is not defined