In [1]:
#!wget http://www.fit.vutbr.cz/~imikolov/rnnlm/simple-examples.tgz
#!tar xvf simple-examples.tgz
#!rm simple-examples.tgz

In [2]:
!ls simple-examples/

1-train		   5-one-iter		       9-char-based-lm	temp
2-nbest-rescore    6-recovery-during-training  data
3-combination	   7-dynamic-evaluation        models
4-data-generation  8-direct		       rnnlm-0.2b


In [1]:
class SmallConfig(object):
  """Small config."""
  init_scale = 0.1
  learning_rate = 1.0
  max_grad_norm = 5
  num_layers = 2
  num_steps = 25
  hidden_size = 200
  max_epoch = 4
  max_max_epoch = 13
  keep_prob = 1.0
  lr_decay = 0.5
  batch_size = 20
  vocab_size = 10000


class MediumConfig(object):
  """Medium config."""
  init_scale = 0.05
  learning_rate = 1.0
  max_grad_norm = 5
  num_layers = 2
  num_steps = 35
  hidden_size = 650
  max_epoch = 6
  max_max_epoch = 39
  keep_prob = 0.5
  lr_decay = 0.8
  batch_size = 20
  vocab_size = 10000


class LargeConfig(object):
  """Large config."""
  init_scale = 0.04
  learning_rate = 1.0
  max_grad_norm = 10
  num_layers = 2
  num_steps = 35
  hidden_size = 1500
  max_epoch = 14
  max_max_epoch = 55
  keep_prob = 0.35
  lr_decay = 1 / 1.15
  batch_size = 20
  vocab_size = 10000

In [2]:
import tensorflow as tf
import numpy as np
from tensorflow.models.rnn.ptb import reader
import time

sess = tf.InteractiveSession()

In [3]:
raw_data = reader.ptb_raw_data("./simple-examples/data/")
train_data, valid_data, test_data, _ = raw_data

# Display the data
#!head -n6 simple-examples/data/ptb.train.txt
#[(len(x),x[:10]) if type(x) == list else (x,) for x in raw_data[:-1]]

In [4]:
config = SmallConfig()

input_data_train, targets_train = reader.ptb_producer(train_data, config.batch_size, config.num_steps, name="train_input")
input_data_test, targets_test = reader.ptb_producer(test_data, config.batch_size, config.num_steps, name="test_input")
input_data_valid, targets_valid = reader.ptb_producer(valid_data, config.batch_size, config.num_steps, name="valid_input")

In [5]:
class NlpRnnModel:
    def __init__(self, input_data, targets, name=None):
        #input_data, targets = input_data_train, targets_train

        #Create the RNN cell
        lstm_cell = tf.nn.rnn_cell.BasicLSTMCell(config.hidden_size, forget_bias=0.0, state_is_tuple=True)
        if config.keep_prob < 1.0:
            lstm_cell = tf.nn.rnn_cell.DropoutWrapper(lstm_cell, output_keep_prob=config.keep_prob)
        cell = tf.nn.rnn_cell.MultiRNNCell([lstm_cell] * config.num_layers, state_is_tuple=True)
        
        initial_state = cell.zero_state(config.batch_size, tf.float32)

        embedding = tf.get_variable("embedding", [config.vocab_size, config.hidden_size], dtype=tf.float32)
        inputs = tf.nn.embedding_lookup(embedding, input_data)

        # Run cell for each time step
        outputs = [] 
        state = initial_state
        with tf.variable_scope("RNN_loop"):
            for time_step in range(config.num_steps):
                if time_step > 0: tf.get_variable_scope().reuse_variables()
                cell_output, state = cell(inputs[:, time_step, :], state) # input:[20, 200], output:[20, 200]
                outputs.append(cell_output)

        # Convert RNN output to logits
        output = tf.reshape(tf.concat(1, outputs), [-1, config.hidden_size]) # [20*25, 200]
        softmax_w = tf.get_variable("softmax_w", [config.hidden_size, config.vocab_size], dtype=tf.float32) # [200, 10000]
        softmax_b = tf.get_variable("softmax_b", [config.vocab_size], dtype=tf.float32) # [10000]
        logits = tf.matmul(output, softmax_w) + softmax_b #[20*25, 10,000]

        # Calc loss function and optimize it
        loss = tf.nn.seq2seq.sequence_loss_by_example(
            [logits],
            [tf.reshape(targets, [-1])],
            [tf.ones([config.batch_size * config.num_steps], dtype=tf.float32)])
        cost = tf.reduce_sum(loss) / config.batch_size
        final_state = state

        learning_rate = tf.Variable(0.0, trainable=False)
        tvars = tf.trainable_variables()
        grads, _ = tf.clip_by_global_norm(tf.gradients(cost, tvars), config.max_grad_norm)
        optimizer = tf.train.GradientDescentOptimizer(learning_rate)
        train_op = optimizer.apply_gradients(zip(grads, tvars), global_step=tf.contrib.framework.get_or_create_global_step())

        new_learning_rate = tf.placeholder(tf.float32, shape=[], name="new_learning_rate")
        learning_rate_update = tf.assign(learning_rate, new_learning_rate)
        
        self.cell = cell
        self.initial_state = initial_state
        self.final_state = final_state
        self.loss = loss
        self.cost = cost
        self.optimizer = optimizer
        self.train_op = train_op
        self.learning_rate = learning_rate
        self.new_learning_rate = new_learning_rate
        self.tvars = tvars
        self.grads = grads
        self.outputs = outputs
        self.output = output
        
def run_epoch(sess, model, epoch_size, eval_op=None, verbose=False):
    start_time = time.time()
    costs = 0.0
    iters = 0
    state = sess.run(model.initial_state)

    for step in range(epoch_size):
        feed_dict = {}
        for i, (c, h) in enumerate(model.initial_state):
            feed_dict[c] = state[i].c
            feed_dict[h] = state[i].h

        fetches = {"cost": model.cost, "final_state": model.final_state}
        if eval_op is not None: fetches["eval_op"] = eval_op

        vals = sess.run(fetches, feed_dict)
        cost = vals["cost"]
        state = vals["final_state"]

        costs += cost
        iters += config.num_steps

        if verbose and step % (epoch_size / 10) == 10:
            print("%.3f perplexity: %.3f speed: %.0f wps" %
                (step * 1.0 / epoch_size, np.exp(costs / iters),
                 iters * config.batch_size / (time.time() - start_time)))

    avg_cost = np.exp(costs / iters)
    return avg_cost

In [11]:
input_data, inputs, (len(outputs), outputs[0]), output, (targets, tf.reshape(targets, [-1]))

(<tf.Tensor 'train_input/Slice:0' shape=(20, 25) dtype=int32>,
 <tf.Tensor 'embedding_lookup:0' shape=(20, 25, 200) dtype=float32>,
 (25,
  <tf.Tensor 'RNN_loop/MultiRNNCell/Cell1/BasicLSTMCell/mul_2:0' shape=(20, 200) dtype=float32>),
 <tf.Tensor 'Reshape:0' shape=(500, 200) dtype=float32>,
 (<tf.Tensor 'train_input/Slice_1:0' shape=(20, 25) dtype=int32>,
  <tf.Tensor 'Reshape_3:0' shape=(500,) dtype=int32>))

In [6]:
initializer = tf.random_uniform_initializer(-config.init_scale, config.init_scale)

with tf.variable_scope("Model", reuse=None, initializer=initializer):
    train_model = NlpRnnModel(input_data_train, targets_train, "train_model")
with tf.variable_scope("Model", reuse=True, initializer=initializer):
    test_model = NlpRnnModel(input_data_test, targets_test, "test_model")
with tf.variable_scope("Model", reuse=True, initializer=initializer):
    valid_model = NlpRnnModel(input_data_valid, targets_valid, "valid_model")

def calc_epoch_size(data, batch_size, num_steps):
    return ((len(train_data) // batch_size) - 1) // num_steps

train_epoch_size = calc_epoch_size(train_data, config.batch_size, config.num_steps)
test_epoch_size = calc_epoch_size(test_data, config.batch_size, config.num_steps)
valid_epoch_size = calc_epoch_size(valid_data, config.batch_size, config.num_steps)
    
sess.run(tf.initialize_all_variables())

In [None]:
for i in range(4):
    print("iter {}".format(i))
    
    learning_rate_decay = config.lr_decay ** max(i - config.max_epoch, 0.0)
    print("Epoch: %d Learning rate: %.3f" % (i + 1, sess.run(train_model.learning_rate)))

    train_perplexity = run_epoch(sess, train_model, train_epoch_size, eval_op=train_model.train_op, verbose=True)
    print("Epoch: %d Train Perplexity: %.3f" % (i + 1, train_perplexity))

    valid_perplexity = run_epoch(sess, valid_model, valid_epoch_size)
    print("Epoch: %d Valid Perplexity: %.3f" % (i + 1, valid_perplexity))

    test_perplexity = run_epoch(sess, test_model, test_epoch_size)
    print("Test Perplexity: %.3f" % test_perplexity)

iter 0
Epoch: 1 Learning rate: 0.000


<__main__.NlpRnnModel instance at 0x7f2105596cb0>