In [1]:
#!wget http://www.fit.vutbr.cz/~imikolov/rnnlm/simple-examples.tgz
#!tar xvf simple-examples.tgz
#!rm simple-examples.tgz

In [2]:
!ls simple-examples/

1-train		   5-one-iter		       9-char-based-lm	temp
2-nbest-rescore    6-recovery-during-training  data
3-combination	   7-dynamic-evaluation        models
4-data-generation  8-direct		       rnnlm-0.2b


In [1]:
class SmallConfig(object):
  """Small config."""
  init_scale = 0.1
  learning_rate = 1.0
  max_grad_norm = 5
  num_layers = 2
  num_steps = 25
  hidden_size = 200
  max_epoch = 4
  max_max_epoch = 13
  keep_prob = 1.0
  lr_decay = 0.5
  batch_size = 20
  vocab_size = 10000


class MediumConfig(object):
  """Medium config."""
  init_scale = 0.05
  learning_rate = 1.0
  max_grad_norm = 5
  num_layers = 2
  num_steps = 35
  hidden_size = 650
  max_epoch = 6
  max_max_epoch = 39
  keep_prob = 0.5
  lr_decay = 0.8
  batch_size = 20
  vocab_size = 10000


class LargeConfig(object):
  """Large config."""
  init_scale = 0.04
  learning_rate = 1.0
  max_grad_norm = 10
  num_layers = 2
  num_steps = 35
  hidden_size = 1500
  max_epoch = 14
  max_max_epoch = 55
  keep_prob = 0.35
  lr_decay = 1 / 1.15
  batch_size = 20
  vocab_size = 10000

In [2]:
import tensorflow as tf
import numpy as np
from tensorflow.models.rnn.ptb import reader

sess = tf.InteractiveSession()

In [3]:
raw_data = reader.ptb_raw_data("./simple-examples/data/")
train_data, valid_data, test_data, _ = raw_data

!head -n6 simple-examples/data/ptb.train.txt
[(len(x),x[:10]) if type(x) == list else (x,) for x in raw_data[:-1]]

 aer banknote berlitz calloway centrust cluett fromstein gitano guterman hydro-quebec ipo kia memotec mlx nahb punts rake regatta rubens sim snack-food ssangyong swapo wachter 
 pierre <unk> N years old will join the board as a nonexecutive director nov. N 
 mr. <unk> is chairman of <unk> n.v. the dutch publishing group 
 rudolph <unk> N years old and former chairman of consolidated gold fields plc was named a nonexecutive director of this british industrial conglomerate 
 a form of asbestos once used to make kent cigarette filters has caused a high percentage of cancer deaths among a group of workers exposed to it more than N years ago researchers reported 
 the asbestos fiber <unk> is unusually <unk> once it enters the <unk> with even brief exposures to it causing symptoms that show up decades later researchers said 


[(929589, [9970, 9971, 9972, 9974, 9975, 9976, 9980, 9981, 9982, 9983]),
 (73760, [1132, 93, 358, 5, 329, 51, 9836, 6, 326, 2476]),
 (82430, [102, 14, 24, 32, 752, 381, 2, 29, 120, 0])]

In [4]:
config = SmallConfig()

input_data_train, targets_train = reader.ptb_producer(train_data, config.batch_size, config.num_steps)
input_data_test, targets_test = reader.ptb_producer(test_data, config.batch_size, config.num_steps)
input_data_valid, targets_valid = reader.ptb_producer(valid_data, config.batch_size, config.num_steps)

In [5]:
input_data, targets = input_data_train, targets_train

In [None]:
!wget http://www.fit.vutbr.cz/~imikolov/rnnlm/simple-examples.tgz
!tar xvf simple-examples.tgz

raw_data = reader.ptb_raw_data("./simple-examples/data/")
train_data, valid_data, test_data, _ = raw_data

input_data, targets = reader.ptb_producer(train_data, 20, 25)
cell = tf.nn.rnn_cell.BasicLSTMCell(200, forget_bias=1.0, state_is_tuple=True)
initial_state = cell.zero_state(20, tf.float32)
embedding = tf.get_variable("embedding", [10000, 200], dtype=tf.float32)
inputs = tf.nn.embedding_lookup(embedding, input_data)

input_data_train # <tf.Tensor 'PTBProducer/Slice:0' shape=(20, 25) dtype=int32>
inputs # <tf.Tensor 'embedding_lookup:0' shape=(20, 25, 200) dtype=float32>

outputs = []
state = initial_state
for time_step in range(25):
    if time_step > 0: 
        tf.get_variable_scope().reuse_variables()
        
    cell_output, state = cell(inputs[:, time_step, :], state)
    outputs.append(cell_output)
    
output = tf.reshape(tf.concat(1, outputs), [-1, 200])

outputs # list of 20: <tf.Tensor 'BasicLSTMCell/mul_2:0' shape=(20, 200) dtype=float32>
output # <tf.Tensor 'Reshape_2:0' shape=(500, 200) dtype=float32>

softmax_w = tf.get_variable("softmax_w", [config.hidden_size, config.vocab_size], dtype=tf.float32)
softmax_b = tf.get_variable("softmax_b", [config.hidden_size, config.vocab_size], dtype=tf.float32)
logits = tf.matmul(output, softmax_w) + softmax_b

loss = tf.nn.seq2seq.sequence_loss_by_example([logits], [tf.reshape(targets, [-1])],[tf.ones([20*25], dtype=tf.float32)])
cost = tf.reduce_sum(loss) / batch_size

In [6]:
lstm_cell = tf.nn.rnn_cell.BasicLSTMCell(config.hidden_size, forget_bias=0.0, state_is_tuple=True)

if config.keep_prob < 1.0:
    lstm_cell = tf.nn.rnn_cell.DropoutWrapper(lstm_cell, output_keep_prob=config.keep_prob)

cell = tf.nn.rnn_cell.MultiRNNCell([lstm_cell] * config.num_layers, state_is_tuple=True)

In [7]:
initial_state = cell.zero_state(config.batch_size, tf.float32)

embedding = tf.get_variable("embedding", [config.vocab_size, config.hidden_size], dtype=tf.float32)
inputs = tf.nn.embedding_lookup(embedding, input_data)

In [10]:
input_data_train, inputs, config.vocab_size, config.hidden_size

(<tf.Tensor 'PTBProducer/Slice:0' shape=(20, 25) dtype=int32>,
 <tf.Tensor 'embedding_lookup:0' shape=(20, 25, 200) dtype=float32>,
 10000,
 200)

In [14]:
outputs = []
state = initial_state
for time_step in range(config.num_steps):
    if time_step > 0: 
        tf.get_variable_scope().reuse_variables()
        
    cell_output, state = cell(inputs[:, time_step, :], state)
    outputs.append(cell_output)

In [25]:
outputs[0], tf.concat(1, outputs), tf.reshape(tf.concat(1, outputs), [-1, config.hidden_size]), config.vocab_size, config.hidden_size

(<tf.Tensor 'MultiRNNCell_3/Cell1/BasicLSTMCell/mul_2:0' shape=(20, 200) dtype=float32>,
 <tf.Tensor 'concat_7:0' shape=(20, 5000) dtype=float32>,
 <tf.Tensor 'Reshape_2:0' shape=(500, 200) dtype=float32>,
 10000,
 200)

In [18]:
output = tf.reshape(tf.concat(1, outputs), [-1, config.hidden_size])
softmax_w = tf.get_variable("softmax_w", [config.hidden_size, config.vocab_size], dtype=tf.float32)
softmax_b = tf.get_variable("softmax_b", [config.hidden_size, config.vocab_size], dtype=tf.float32)
logits = tf.matmul(output, softmax_w) + softmax_b

loss = tf.nn.seq2seq.sequence_loss_by_example([logits], [tf.reshape(targets, [-1])], [tf.ones([batch_size*num_steps], dtype=tf.float32)])

logits = tf.matmul(output, softmax_w) + softmax_b
loss = tf.nn.seq2seq.sequence_loss_by_example(
    [logits],
    [tf.reshape(input_.targets, [-1])],
    [tf.ones([batch_size * num_steps], dtype=data_type())])
cost = tf.reduce_sum(loss) / batch_size
final_state = state


[<tf.Tensor 'MultiRNNCell_3/Cell1/BasicLSTMCell/mul_2:0' shape=(20, 200) dtype=float32>,
 <tf.Tensor 'MultiRNNCell_4/Cell1/BasicLSTMCell/mul_2:0' shape=(20, 200) dtype=float32>,
 <tf.Tensor 'MultiRNNCell_5/Cell1/BasicLSTMCell/mul_2:0' shape=(20, 200) dtype=float32>,
 <tf.Tensor 'MultiRNNCell_6/Cell1/BasicLSTMCell/mul_2:0' shape=(20, 200) dtype=float32>,
 <tf.Tensor 'MultiRNNCell_7/Cell1/BasicLSTMCell/mul_2:0' shape=(20, 200) dtype=float32>,
 <tf.Tensor 'MultiRNNCell_8/Cell1/BasicLSTMCell/mul_2:0' shape=(20, 200) dtype=float32>,
 <tf.Tensor 'MultiRNNCell_9/Cell1/BasicLSTMCell/mul_2:0' shape=(20, 200) dtype=float32>,
 <tf.Tensor 'MultiRNNCell_10/Cell1/BasicLSTMCell/mul_2:0' shape=(20, 200) dtype=float32>,
 <tf.Tensor 'MultiRNNCell_11/Cell1/BasicLSTMCell/mul_2:0' shape=(20, 200) dtype=float32>,
 <tf.Tensor 'MultiRNNCell_12/Cell1/BasicLSTMCell/mul_2:0' shape=(20, 200) dtype=float32>,
 <tf.Tensor 'MultiRNNCell_13/Cell1/BasicLSTMCell/mul_2:0' shape=(20, 200) dtype=float32>,
 <tf.Tensor 'Mult