In [1]:
import collections
import numpy
import os
import sys
import numpy as np
import tensorflow as tf

In [2]:
dataDir=os.path.join(os.path.abspath(os.path.dirname('__file__')),'data')

In [3]:
def _read_words(filename):
    with tf.gfile.GFile(filename,'r') as f:
        if sys.version_info[0]>=3:
            return f.read().replace('\n','<eos>').split()
        else:
            return f.read().decode('utf-8').replace('\n','<eos>').split()

In [4]:
def _build_vocab(filename):
    words=_read_words(filename)
    counter=collections.Counter(words)
    word_pairs=sorted(counter.items(),key=lambda x:(-x[1],x[0]))
    words,_=list(zip(*word_pairs))
    word_to_id=dict(zip(words,range(len(words))))
    return word_to_id

In [5]:
def _file_to_words_ids(filename,word_to_id):
    data=_read_words(filename)
    return [word_to_id[word] for word in data if word in word_to_id]

In [6]:
def ptb_raw_data(data_path=dataDir):
    train_path=os.path.join(dataDir,'ptb.train.txt')
    valid_path=os.path.join(dataDir,'ptb.valid.txt')
    test_path=os.path.join(dataDir,'ptb.test.txt')

    word_to_id=_build_vocab(train_path)
    train_data=_file_to_words_ids(train_path,word_to_id)
    valid_data=_file_to_words_ids(valid_path,word_to_id)
    test_data=_file_to_words_ids(test_path,word_to_id)
    vocab_len=len(word_to_id)
    return train_data,valid_data,test_data,vocab_len

In [7]:
train_data,valid_data,test_data,vocab_len=ptb_raw_data(dataDir)

In [58]:
def ptb_producer(raw_data,batch_size,num_steps,name=None):
    with tf.name_scope(name,'PTBProducer',
                      [raw_data,batch_size,num_steps]):
        raw_data=tf.convert_to_tensor(raw_data,name='raw_data',dtype=tf.int32)
        data_len=tf.size(raw_data)
        batch_len=data_len//batch_size
        data=tf.reshape(raw_data[0:batch_len*batch_size],[batch_size,batch_len])
        epoch_size=(batch_len-1)//num_steps
        assertion=tf.assert_positive(epoch_size,
                                     message='epoch_size==0,decrease batch_size or num_steps')
        
        with tf.control_dependencies([assertion]):
            epoch_size=tf.identity(epoch_size,name='epoch_size')
        i=tf.train.range_input_producer(epoch_size,shuffle=False).dequeue() 
        x=tf.strided_slice(data,[0, i*num_steps],[batch_size,(i+1)*num_steps])
        x.set_shape([batch_size,num_steps])
        
        y=tf.strided_slice(data,[0,i*num_steps+1],[batch_size,(i+1)*num_steps+1])
        y.set_shape([batch_size,num_steps])
        return x,y       
        

In [59]:
tf.reset_default_graph()

In [60]:
### build the rnn model

In [61]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

In [62]:
import inspect
import time
import numpy as np

In [63]:
flags=tf.flags
logging=tf.logging

In [64]:
# flags.DEFINE_string('save_path',None,'Model output directory')
# FLAGS=flags.FLAGS

In [65]:
class PIBInput(object):
    def __init__(self,config,data,name=None):
        self.batch_size=batch_size=config.batch_size
        self.num_steps=num_steps=config.num_steps
        self.epoch_size=((len(data)//batch_size)-1)//num_steps
        self.input_data,self.targets=ptb_producer(data,batch_size,num_steps,name=name)

In [106]:
class PTBModel(object):
    def __init__(self,is_training,config,input_):
        self.input=input_
        batch_size=input_.batch_size
        num_steps=input_.num_steps
        size=config.hidden_size
        vocab_size=config.vocab_size
        
        def lstm_cell():
            if 'reuse' in inspect.getargspec(tf.contrib.rnn.BasicLSTMCell.__init__):
                return tf.contrib.rnn.BasicLSTMCell(size,forget_bias=0.0,state_is_tuple=True,reuse=tf.get_variable_scope().reuse())
            else:
                return tf.contrib.rnn.BasicLSTMCell(size,forget_bias=0.0,state_is_tuple=True)
        
        attn_cell=lstm_cell
        if is_training and config.keep_prob<1:
            def attn_cell():
                return tf.contrib.rnn.DropoutWrapper(lstm_cell(),output_keep_prob)
        
        cell=tf.contrib.rnn.MultiRNNCell([attn_cell() for _ in range(config.num_layers)],state_is_tuple=True)
        
        self.initial_state=cell.zero_state(batch_size,tf.float32)
        
        with tf.device('/cpu:0'):
            embedding=tf.get_variable('embedding',[vocab_size,size],dtype=tf.float32)
            inputs=tf.nn.embedding_lookup(embedding,input_.input_data)
        
        if is_training and config.keep_prob<1:
            inputs=tf.nn.dropout(inputs,config.keep_prob)
        
        outputs=[]
        
        state=self.initial_state
        with tf.variable_scope('RNN'):
            for time_step in range(num_steps):
                if time_step > 0:tf.get_variable_scope().reuse_variables()
                ((cell_output,state))=cell(inputs[:,time_step,:],state)
                outputs.append(cell_output)
                
        output=tf.reshape(tf.stack(axis=1,values=outputs),[-1,size])
        
        softmax_w=tf.get_variable('softmax_w',[size,vocab_size],dtype=tf.float32)
        softmax_b=tf.get_variable('softmax_b',[vocab_size],dtype=tf.float32)
        logits=tf.matmul(output,softmax_w)+softmax_b
        
        # reshape logits to be 3-D tensor for sequence loss
        logits=tf.reshape(logits,[batch_size,num_steps,vocab_size])
        
        #use the contrib sequence loss and average over the batches
        loss=tf.contrib.seq2seq.sequence_loss(logits,
                                             input_.targets,
                                             tf.ones([batch_size,num_steps],dtype=tf.float32),
                                             average_across_timesteps=False,
                                             average_across_batch=True)
        
        # update the cost variables
        self.cost=cost=tf.reduce_sum(loss)
        self.final_state=state
        
        if not is_training:
            return
        
        self.lr=tf.Variable(0.0, trainable=False)
        tvars=tf.trainable_variables()
        grads,_=tf.clip_by_global_norm(tf.gradients(cost,tvars),
                                       config.max_grad_norm)
        optimizer=tf.train.GradientDescentOptimizer(self.lr)
        self.train_op=optimizer.apply_gradients(zip(grads,tvars),
                                               global_step=tf.contrib.framework.get_or_create_global_step())
        
        self.new_lr=tf.placeholder(tf.float32,shape=[],name='new_learning_rate')
        self.lr_update=tf.assign(self.lr,self.new_lr)
    
    def assign_lr(self,session,lr_value):
        session.run(self.lr_update,feed_dict={self.new_lr:lr_value})
        
    
class SmallConfig(object):
    init_scale=0.1
    learning_rate=1.0
    max_grad_norm=5
    num_layers=2
    num_steps=20
    hidden_size=200
    max_epoch=4
    max_max_epoch=13
    keep_prob=1.0
    lr_delay=0.5
    batch_size=20
    vocab_size=10000

def run_epoch(session,model,eval_op=None,verbose=False):
    start_time=time.time()
    costs=0.0
    iters=0
    state=session.run(model.initial_state)

    fetches={'cost':model.cost,
             'final_state':model.final_state}

    if eval_op is not None:
        fetches['eval_op']=eval_op

    for step in range(model.input.epoch_size):
        feed_dict={}
        for i,(c,h) in enumerate(model.initial_state):
            feed_dict[c]=state[i].c
            feed_dict[h]=state[i].h

        vals=session.run(fetches,feed_dict)
        cost=vals['cost']
        state=vals['final_state']
        
        costs+=cost
        iters+=model.input.num_steps

        if verbose and step%(model.input.epoch_size//10)==0:
            print('%0.3f perplexity: %.3f speed: %.0f wps'%(step*1.0/model.input.epoch_size,
                                                           np.exp(costs/iters),
                                                           (iters*model.input.batch_size/(time.time()-start_time))))


    return np.exp(costs/iters)



In [107]:
raw_data=ptb_raw_data(dataDir)
train_data,valid_data,test_data,_=raw_data

In [None]:
config=SmallConfig()
eval_config=SmallConfig()
eval_config.batch_size=1
eval_config.num_steps=1

with tf.Graph().as_default():
    initializer=tf.random_uniform_initializer(-config.init_scale,config.init_scale)
    
    with tf.name_scope('Train'):
        train_input=PIBInput(config=config,data=train_data,name='TrainInput')
        
        with tf.variable_scope('Model',reuse=None,initializer=initializer):
            m=PTBModel(is_training=True,config=config,input_=train_input)
        tf.summary.scalar('Training_Loss',m.cost)
        tf.summary.scalar('Learning_Rate',m.lr)
        
    
    with tf.name_scope('Valid'):
        valid_input=PIBInput(config=config,data=valid_data,name='ValidInput')
        
        with tf.variable_scope('Model',reuse=True,initializer=initializer):
            mvalid=PTBModel(is_training=False,config=config,input_=valid_input)
        tf.summary.scalar('Validation_Loss',mvalid.cost)
        
    with tf.name_scope('Test'):
        test_input=PIBInput(config=config,data=test_data,name='TestInput')
        
        with tf.variable_scope('Model',reuse=True,initializer=initializer):
            mtest=PTBModel(is_training=False,config=eval_config,input_=test_input)
    
    sv=tf.train.Supervisor()
    with sv.managed_session() as session:
        for i in range(config.max_max_epoch):
            lr_delay=config.lr_delay**max(i+1-config.max_epoch,0.0)
            m.assign_lr(session,config.learning_rate*lr_delay)
            
            print('Epoch: %d Learning rate:%.3f'%(i+1,session.run(m.lr)))
            
            train_perplexity=run_epoch(session,m,eval_op=m.train_op,verbose=True)
            
            print('Epoch: %d Training Perplexity: %0.3f'%(i+1,train_perplexity))
            
            valid_perplexity=run_epoch(session,mvalid)
            print('Epoch: %d Valid Perplexity: %0.3f'%(i+1,valid_perplexity))
            
            test_perplexity=run_epoch(session,mtest)
            print('Epoch: %d Testing Perplexity:%0.3f'%(i+1test_perplexity))

In [11]:
## testing---tf.stride_slice

In [12]:
import tensorflow as tf
t = tf.constant([[[11, 22, 33, 44, 55], [11, 22, 33, 44, 55], [11, 22, 33, 44, 55]],
                 [[66, 77, 88, 99, 100], [66, 77, 88, 99, 100], [66, 77, 88, 99, 100]],
                 [[111, 222, 333, 444, 555], [111, 222, 333, 444, 555], [111, 222, 333, 444, 555]]])

z1 = tf.strided_slice(t, [1], [-1], [1])
z2 = tf.strided_slice(t, [1, 0], [-1, 2], [1, 1])
z3 = tf.strided_slice(t, [1, 0, 1], [-1, 2, 3], [1, 1, 1])

with tf.Session() as sess:
    print(sess.run(z1))
    print()
    print(sess.run(z2))
    print()
    print(sess.run(z3))

[[[ 66  77  88  99 100]
  [ 66  77  88  99 100]
  [ 66  77  88  99 100]]]

[[[ 66  77  88  99 100]
  [ 66  77  88  99 100]]]

[[[77 88]
  [77 88]]]


In [13]:
## testing--tf.train.range_input_producer

In [2]:
batch_size=6
NUM_EPOCHS=5

def input_producer():
    array=np.array(range(30))+1
    i=tf.train.range_input_producer(NUM_EPOCHS,num_epochs=1,shuffle=False).dequeue()
    inputs=tf.slice(array,[i*batch_size],[batch_size])
    return inputs

In [None]:
inputs=input_producer()
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    print(inputs.eval())