# Chapter 7. RNN

In [1]:
import time
import sys
import os
sys.path.append(os.getcwd()+'\simple-examples')
import reader
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt

from tensorflow.contrib.rnn import BasicLSTMCell, MultiRNNCell, DropoutWrapper

In [2]:
class SmallConfig(object):
    init_scale=0
    learning_rate=1.0
    max_grad_norm=5
    num_layers=2
    num_steps=20
    hidden_size=200
    max_epoch=4
    max_max_epoch=13
    keep_prob=1.0
    lr_decay=0.5
    batch_size=20
    vocab_size=10000

config=SmallConfig()
eval_config=SmallConfig()
eval_config.batch_size=1
eval_config.num_steps=1

In [3]:
class PTBModel(object):
    def __init__(self,config,is_training=False):
        self.batch_size=config.batch_size
        self.num_steps=config.num_steps
        input_size=[config.batch_size,config.num_steps]
        self.input_data=tf.placeholder(tf.int32,input_size)
        self.targets=tf.placeholder(tf.int32,input_size)
        
        lstm_cell=BasicLSTMCell(config.hidden_size,forget_bias=0.0,state_is_tuple=True)
        
        if is_training and config.keep_prob<1:
            lstm_cell=DropoutWrapper(lstm_cell,config.keep_prob)
        
        cell = MultiRNNCell([lstm_cell]*config.num_layers,state_is_tuple=True)
        self.initial_state=cell.zero_state(config.batch_size,tf.float32)
        
        with tf.device("/cpu:0"):
            embedding_size=[config.vocab_size,config.hidden_size]
            embedding=tf.get_variable("embedding",embedding_size)
            inputs=tf.nn.embedding_lookup(embedding,self.input_data)
        
        if is_training and config.keep_prob<1:
            inputs=tf.nn.dropout(inputs,config.keep_prob)
        
        outputs=[]
        state=self.initial_state
        with tf.variable_scope("RNN"):
            for time_step in range(config.num_steps):
                if time_step>0:tf.get_variable_scope().reuse_variables()
                (cell_output,state)=cell(inputs[:,time_step,:],state)
                outputs.append(cell_output)
        
        output=tf.reshape(tf.concat(outputs,1),[-1,config.hidden_size])
        softmax_w_size=[config.hidden_size,config.vocab_size]
        softmax_w=tf.get_variable("softmax_w",softmax_w_size)
        softmax_b=tf.get_variable("sorfmax_b",[config.vocab_size])
        logits=tf.matmul(output,softmax_w)+softmax_b
        
        loss=tf.contrib.legacy_seq2seq.sequence_loss_by_example(
        [logits],[tf.reshape(self.targets,[-1])],[tf.ones([config.batch_size*config.num_steps])])
        
        self.cost=tf.reduce_sum(loss)/config.batch_size
        self.final_state=state
        
        if not is_training:return
        
        self.lr=tf.Variable(0.0,trainable=False)
        tvars=tf.trainable_variables()
        
        grads,_=tf.clip_by_global_norm(tf.gradients(self.cost,tvars),config.max_grad_norm)
        optimizer=tf.train.GradientDescentOptimizer(self.lr)
        self.train_op=optimizer.apply_gradients(zip(grads,tvars))
        
    def assign_lr(self,session,lr_value):
        session.run(tf.assign(self.lr,lr_value))
        

In [4]:
def run_epoch(session,m,data,is_training=False):
    epoch_size=((len(data)//m.batch_size)-1)
    start_time=time.time()
    costs=0.0
    iters=0
    
    eval_op=m.train_op if is_training else tf.no_op()
    
    state_list=[]
    for c,h in m.initial_state:
        state_list.extend([c.eval(),h.eval()])
    
    ptb_iter=reader.ptb_iterator(data,m.batch_size,m.num_steps)
#     x_,y_=tf.unstack(ptb_iter)
#         for step in range(m.epoch_size):
#     step=0
    for step,(x,y) in enumerate(ptb_iter):
        fetch_list=[m.cost]
        for c,h in m.final_state:
            fetch_list.extend([c,h])
        fetch_list.append(eval_op)

        feed_dict={m.input_data:x,m.targets:y}
        for i in range(len(m.initial_state)):
            c,h = m.initial_state[i]
            feed_dict[c],feed_dict[h]=state_list[i*2:(i+1)*2]

        cost,*state_list,_=session.run(fetch_list,feed_dict)

        cost+=cost
        iters+=m.num_steps

        if is_training and step%(epoch_size//10)==10:
            print("%.3f perplexity: %.3f spped: %.0f wps" % (
            step*1.0/epoch_size,np.exp(costs/iters),iters*m.batch_size/(time.time()-start_time)))
#         step+=1
    return np.exp(costs/iters)

In [5]:
raw_data=reader.ptb_raw_data('./simple-examples/data')
train_data,valid_data,test_data,_=raw_data

In [None]:
with tf.Graph().as_default(),tf.Session() as session:
#     tf.enable_eager_execution()
    
    initializer=tf.random_uniform_initializer(-config.init_scale,config.init_scale)
    
    with tf.variable_scope("model",reuse=None,initializer=initializer):
        m=PTBModel(config,is_training=True)
    with tf.variable_scope("model",reuse=True,initializer=initializer):
        mvalid=PTBModel(config)
        mtest=PTBModel(eval_config)
    
    tf.global_variables_initializer().run()
    
    for i in range(config.max_max_epoch):
        lr_decay=config.lr_decay ** max(i-config.max_epoch,0.0)
        m.assign_lr(session,config.learning_rate*lr_decay)
        print("Epoch: %d Learning rate: %.3f" % (i+1,session.run(m.lr)))
        
        perplexity=run_epoch(session,m,train_data,is_training=True)
        print("Epoch: %d Training Perplexity: %.3f" % (i+1,perplexity))
        
        perplexity=run_epoch(session,mvalid,valid_data)
        print("Epoch: %d Valid Perplexity: %.3f" % (i+1,perplexity))
        
    perplexity=run_epoch(session,mtest,test_data)
    print("Test Perplexity: %.3f" % perplexity)

Instructions for updating:
This class is deprecated, please use tf.nn.rnn_cell.LSTMCell, which supports all the feature this cell currently has. Please replace the existing code with tf.nn.rnn_cell.LSTMCell(name='basic_lstm_cell').
Epoch: 1 Learning rate: 1.000
0.000 perplexity: 1.000 spped: 2010 wps
