# Recurrent Neural Networks

https://www.tensorflow.org/versions/r0.11/tutorials/recurrent/index.html

https://github.com/tensorflow/tensorflow/tree/r0.11/tensorflow/models/rnn/ptb

https://github.com/tensorflow/tensorflow/blob/r0.11/tensorflow/models/rnn/ptb/ptb_word_lm.py

https://github.com/tensorflow/tensorflow/blob/r0.11/tensorflow/models/rnn/ptb/reader.py

http://colah.github.io/posts/2015-08-Understanding-LSTMs/

http://karpathy.github.io/2015/05/21/rnn-effectiveness/

https://arxiv.org/abs/1409.2329

http://www.cis.upenn.edu/~treebank/

In [1]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import tensorflow as tf

In [2]:
import os
import tarfile
import shutil

HOME_DIR = 'treebank'
DATA_DIR = os.path.join(HOME_DIR, 'data')

print('Unpacking treebank dataset...')

TAR_FILE = 'simple-examples.tgz'
TAR_PATH = os.path.join(DATA_DIR, TAR_FILE)

from tensorflow.contrib.learn.python.learn.datasets.base import maybe_download
maybe_download(TAR_FILE, DATA_DIR, 'http://www.fit.vutbr.cz/~imikolov/rnnlm/simple-examples.tgz')

def extract(tar, filename, dst_path):
    print('Extracting', filename)
    dst_file = os.path.join(dst_path, os.path.basename(filename))
    with open(dst_file, 'wb') as fout:
        fin = tar.extractfile(filename)
        shutil.copyfileobj(fin, fout)

with tarfile.open(TAR_PATH, mode='r:gz') as t:
    extract(t, './simple-examples/data/ptb.test.txt', DATA_DIR)
    extract(t, './simple-examples/data/ptb.train.txt', DATA_DIR)
    extract(t, './simple-examples/data/ptb.valid.txt', DATA_DIR)

Unpacking treebank dataset...
Extracting ./simple-examples/data/ptb.test.txt
Extracting ./simple-examples/data/ptb.train.txt
Extracting ./simple-examples/data/ptb.valid.txt


In [3]:
from tensorflow.models.rnn.ptb import reader

raw_data = reader.ptb_raw_data(DATA_DIR)
train_data, valid_data, test_data, _ = raw_data

print("Train size:", len(train_data))
print("Validation size:", len(valid_data))
print("Test size:", len(test_data))

Train size: 929589
Validation size: 73760
Test size: 82430


In [4]:
# Small config
init_scale = 0.1
learning_rate = 1.0
max_grad_norm = 5
num_layers = 2
num_steps = 20
hidden_size = 200
max_epoch = 4
max_max_epoch = 13
keep_prob = 1.0
lr_decay = 0.5
batch_size = 20
vocab_size = 10000

In [5]:
graph = tf.Graph()

with graph.as_default():
    initializer = tf.random_uniform_initializer(-init_scale, init_scale)
    
    with tf.variable_scope("model", reuse=None, initializer=initializer):
        size = hidden_size
        
        input_data = tf.placeholder(tf.int32, [batch_size, num_steps])
        targets = tf.placeholder(tf.int32, [batch_size, num_steps])

        lstm_cell = tf.nn.rnn_cell.BasicLSTMCell(size, forget_bias=0.0, state_is_tuple=True)
        cell = tf.nn.rnn_cell.MultiRNNCell([lstm_cell] * num_layers, state_is_tuple=True)
        
        initial_state = cell.zero_state(batch_size, tf.float32)
        
        embedding = tf.get_variable("embedding", [vocab_size, size], dtype=tf.float32)
        inputs = tf.nn.embedding_lookup(embedding, input_data)
        
        outputs = []
        state = initial_state
        with tf.variable_scope("RNN"):
            for time_step in range(num_steps):
                if time_step > 0: tf.get_variable_scope().reuse_variables()
                (cell_output, state) = cell(inputs[:, time_step, :], state)
                outputs.append(cell_output)

        output = tf.reshape(tf.concat(1, outputs), [-1, size])
        softmax_w = tf.get_variable("softmax_w", [size, vocab_size], dtype=tf.float32)
        softmax_b = tf.get_variable("softmax_b", [vocab_size], dtype=tf.float32)
        logits = tf.matmul(output, softmax_w) + softmax_b
        
        loss = tf.nn.seq2seq.sequence_loss_by_example(
            [logits],
            [tf.reshape(targets, [-1])],
            [tf.ones([batch_size * num_steps], dtype=tf.float32)])
        
        cost = tf.reduce_sum(loss) / batch_size
        final_state = state

        lr = tf.Variable(0.0, trainable=False)
        tvars = tf.trainable_variables()
        grads, _ = tf.clip_by_global_norm(tf.gradients(cost, tvars), max_grad_norm)
        optimizer = tf.train.GradientDescentOptimizer(lr)
        train_op = optimizer.apply_gradients(zip(grads, tvars))

        new_lr = tf.placeholder(tf.float32, shape=[], name="new_learning_rate")
        lr_update = tf.assign(lr, new_lr)
        
    init = tf.initialize_all_variables()

In [6]:
import time

import numpy as np

def run_epoch(session, data, eval_op, verbose):
    epoch_size = ((len(data) // batch_size) - 1) // num_steps
    start_time = time.time()
    costs = 0.0
    iters = 0
    state = session.run(initial_state)

    for step, (x, y) in enumerate(reader.ptb_iterator(data, batch_size, num_steps)):
        fetches = [cost, final_state, eval_op]

        feed_dict = {}
        feed_dict[input_data] = x
        feed_dict[targets] = y
        for k, (c, h) in enumerate(initial_state):
            feed_dict[c] = state[k].c
            feed_dict[h] = state[k].h

        batch_cost, state, _ = session.run(fetches, feed_dict)
        costs += batch_cost
        iters += num_steps

        if verbose and step % (epoch_size // 10) == 10:
            print("%.3f perplexity: %.3f speed: %.0f wps" % (
                step * 1.0 / epoch_size,
                np.exp(costs / iters),
                iters * batch_size / (time.time() - start_time)))

    return np.exp(costs / iters)

with tf.Session(graph=graph) as session:
    init.run()
    print("Initialized")

    for i in range(max_max_epoch):
        decay = lr_decay ** max(i + 1 - max_epoch, 0.0)
        lr_value = learning_rate * decay
        session.run(lr_update, feed_dict={new_lr: lr_value})
        
        print("Epoch: %d Learning rate: %.3f" % (i + 1, session.run(lr)))
        
        train_perplexity = run_epoch(session, train_data, train_op, True)
        print("Epoch: %d Train Perplexity: %.3f" % (i + 1, train_perplexity))
        
        valid_perplexity = run_epoch(session, valid_data, tf.no_op(), False)
        print("Epoch: %d Valid Perplexity: %.3f" % (i + 1, valid_perplexity))
        
    #batch_size = 1
    #num_steps = 1
    #test_perplexity = run_epoch(session, test_data, tf.no_op(), False)
    #print("Test Perplexity: %.3f" % test_perplexity)

Initialized
Epoch: 1 Learning rate: 1.000
0.004 perplexity: 5390.795 speed: 949 wps
0.104 perplexity: 829.494 speed: 994 wps
0.204 perplexity: 616.211 speed: 1004 wps
0.304 perplexity: 499.248 speed: 1008 wps
0.404 perplexity: 432.527 speed: 1011 wps
0.504 perplexity: 387.496 speed: 1010 wps
0.604 perplexity: 349.120 speed: 1010 wps
0.703 perplexity: 322.719 speed: 1011 wps
0.803 perplexity: 301.941 speed: 1011 wps
0.903 perplexity: 282.687 speed: 1011 wps
Epoch: 1 Train Perplexity: 268.426
Epoch: 1 Valid Perplexity: 178.786
Epoch: 2 Learning rate: 1.000
0.004 perplexity: 211.240 speed: 991 wps
0.104 perplexity: 150.767 speed: 1008 wps
0.204 perplexity: 157.818 speed: 1011 wps
0.304 perplexity: 152.909 speed: 1011 wps
0.404 perplexity: 149.949 speed: 1011 wps
0.504 perplexity: 147.430 speed: 1011 wps
0.604 perplexity: 142.870 speed: 1011 wps
0.703 perplexity: 140.730 speed: 1012 wps
0.803 perplexity: 138.650 speed: 1011 wps
0.903 perplexity: 134.994 speed: 1011 wps
Epoch: 2 Train Perpl

ValueError: Cannot feed value of shape (1, 1) for Tensor u'model/Placeholder_1:0', which has shape '(20, 20)'