# MultiLSTM

In this Notebook we will build upon our vanilla RNN by learning how to use Tensorflow’s scan and dynamic_rnn models, upgrading the RNN cell and stacking multiple RNNs, and adding dropout. We will then use our upgraded RNN to generate some text, character by character.

<a href="https://r2rt.com/recurrent-neural-networks-in-tensorflow-ii">[Ref]</a>

In [1]:
import tensorflow as tf
import NLP_Utils as nlp
import time
import numpy as np

In [2]:
#Data
FILENAME = 'tiny_shakespeare.txt'

#Cell
STATE_SIZE = 100

#Batches & Vocab
BATCH_SIZE = 32 #Default: 64
NUM_STEPS = 80 #Default: 50
OVERLAP = NUM_STEPS #Default 25
VOCAB_STRATEGY = 'all'

#Test
TEST_SIZE = 300 #Default: 300

#Model
NUM_EPOCHS = 20
NUM_LAYERS = 3
LEARNING_RATE = 1e-4

In [3]:
batchManager = nlp.BatchManager()
batchManager.set_params(FILENAME,BATCH_SIZE,NUM_STEPS,OVERLAP,VOCAB_STRATEGY,mode='dense')
batchManager.get_params()

NUM_BATCHES = batchManager._num_batches
VOCAB_SIZE = batchManager._vocab_size
NUM_CLASSES = VOCAB_SIZE

DISPLAY_FREQ = NUM_BATCHES//10

@params:
[filename : "tiny_shakespeare.txt"]
[text : "First Citi"]
[text_size : 1115394]
[vocab : ['c', 'x', 'W', 'y', 'z']]
[vocab_size : 65]
[mode : dense]
[batch_size : 32]
[seq_length : 80]
[overlap : 80]
[num_batches : 435]


In [4]:
batchManager.stats()

The corpus has 1115394 characters
Configuration:
[batch_size : 32]
[seq_length : 80]
[overlap : 80]

The current configuration gives us 435 batches of 32 observations each one looking 80 steps in the past and overlapping 0 steps


In [5]:
with open(FILENAME,'r') as f:
    raw_data = f.read()
    print("Data length:", len(raw_data))

vocab = set(raw_data)
vocab_size = len(vocab)
idx_to_vocab = dict(enumerate(vocab))
vocab_to_idx = dict(zip(idx_to_vocab.values(), idx_to_vocab.keys()))

data = [vocab_to_idx[c] for c in raw_data]
del raw_data

Data length: 1115394


## Functions

In [6]:
def reset_graph():
    if 'sess' in globals() and sess:
        sess.close()
    tf.reset_default_graph()

## Model

In [7]:
def build_multilayer_lstm_graph_with_dynamic_rnn(state_size,num_classes,batch_size,num_steps,num_layers,learning_rate):
    
    reset_graph()
    
    #Placholders
    x = tf.placeholder(tf.int32,[batch_size,num_steps],name='input_placeholder')
    y= tf.placeholder(tf.int32,[batch_size,num_steps],name='labels_placeholder')
    
    embeddings = tf.get_variable('embedding_matrix',[num_classes,state_size])
    
    #rnn inputs is a tensor of [batch_size, num_steps, state_size]
    rnn_inputs = tf.nn.embedding_lookup(embeddings,x)
    
    #RNN Cell
    cell = tf.nn.rnn_cell.LSTMCell(state_size,state_is_tuple=True)
    cell = tf.nn.rnn_cell.MultiRNNCell([cell] * num_layers, state_is_tuple=True)
    init_state = cell.zero_state(batch_size,tf.float32)
    
    rnn_outputs,final_state = tf.nn.dynamic_rnn(cell,rnn_inputs,initial_state=init_state)
    
    with tf.variable_scope('softmax'):
        W = tf.get_variable('W',[state_size,num_classes])
        b = tf.get_variable('b',[num_classes],initializer=tf.constant_initializer(0.0))
        
    #Reshape rnn_inputs and y so we can get the logits in a single matmul
    rnn_outputs = tf.reshape(rnn_outputs,[-1,state_size])
    y_reshaped = tf.reshape(y,[-1])
        
    #Logits & predictions
    logits = tf.matmul(rnn_outputs,W) + b
    predictions = tf.nn.softmax(logits)
    
    total_loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits,labels=y_reshaped))
    train_step = tf.train.AdamOptimizer(learning_rate).minimize(total_loss)
    
    saver = tf.train.Saver()
    
    ret_dict = dict(x=x, y=y, init_state=init_state, final_state=final_state, 
                    total_loss=total_loss, train_step=train_step,preds=predictions,saver=saver)
    
    return ret_dict

## Train function

In [8]:
def train_network(g,num_epochs,num_steps,batch_size,save='saves/last_model'):
    step = 0
    batchManager.generate_batches()
    
    with tf.Session() as sess:
        tf.global_variables_initializer().run()
        training_losses = []
        
        for X,Y,epoch in batchManager.generate_batches(num_epochs):
                        
            training_loss = 0
            training_state = None

            step += 1

            feed_dict = {g['x']:X,g['y']:Y}

            if training_state is not None:
                feed_dict[g['init_state']] = training_state
            training_loss_, training_state_, _ = sess.run([g['total_loss'],g['final_state'],g['train_step']],feed_dict)

            training_loss += training_loss_
                
            
            if(step % DISPLAY_FREQ == 0):
                print('Epoch %d - At step %d average training loss: %.3f'%(epoch,step,training_loss/step))
                training_losses.append(training_loss/step)
            
        step = 0
        if isinstance(save,str):
            g['saver'].save(sess,save)
            
        return training_losses

## Test function

In [9]:
def generate_characters(g,checkpoint,num_chars,prompt='A',pick_top_chars = None):
    
    with tf.Session() as sess:
        tf.global_variables_initializer().run()
        g['saver'].restore(sess,checkpoint)
        
        state = None
        current_char = batchManager.vocab_encode(prompt)[0]
        chars = [current_char]
        
        for i in range(num_chars):
            if state is not None:
                feed_dict = {g['x']:[[current_char]], g['init_state']:state}
            else:
                feed_dict = {g['x']:[[current_char]]}
                
            preds,state = sess.run([g['preds'],g['final_state']],feed_dict)
            
            if(pick_top_chars is not None):
                p = np.squeeze(preds)
                p[np.argsort(p)[:-pick_top_chars]] = 0
                p = p / np.sum(p)
                current_char = np.random.choice(vocab_size,1,p=p)[0]
            else:
                current_char = np.random.choice(vocab_size,1,p=np.squeeze(preds))[0]
                
            chars.append(current_char)
        
    chars = batchManager.vocab_decode(chars)
    print("".join(chars))
    return ("".join(chars))

## Train

In [10]:
g = build_multilayer_lstm_graph_with_dynamic_rnn(STATE_SIZE,NUM_CLASSES,BATCH_SIZE,NUM_STEPS,NUM_LAYERS,LEARNING_RATE)

In [11]:
training_losses = train_network(g,NUM_EPOCHS,NUM_STEPS,BATCH_SIZE)

Epoch 0 - At step 43 average training loss: 0.094
Epoch 0 - At step 86 average training loss: 0.041
Epoch 0 - At step 129 average training loss: 0.027
Epoch 0 - At step 172 average training loss: 0.020
Epoch 0 - At step 215 average training loss: 0.015
Epoch 0 - At step 258 average training loss: 0.013
Epoch 0 - At step 301 average training loss: 0.011
Epoch 0 - At step 344 average training loss: 0.010
Epoch 0 - At step 387 average training loss: 0.009
Epoch 0 - At step 430 average training loss: 0.008
Epoch 1 - At step 473 average training loss: 0.007
Epoch 1 - At step 516 average training loss: 0.007
Epoch 1 - At step 559 average training loss: 0.006
Epoch 1 - At step 602 average training loss: 0.006
Epoch 1 - At step 645 average training loss: 0.005
Epoch 1 - At step 688 average training loss: 0.005
Epoch 1 - At step 731 average training loss: 0.004
Epoch 1 - At step 774 average training loss: 0.004
Epoch 1 - At step 817 average training loss: 0.004
Epoch 1 - At step 860 average tra

Epoch 15 - At step 6794 average training loss: 0.000
Epoch 15 - At step 6837 average training loss: 0.000
Epoch 15 - At step 6880 average training loss: 0.000
Epoch 15 - At step 6923 average training loss: 0.000
Epoch 16 - At step 6966 average training loss: 0.000
Epoch 16 - At step 7009 average training loss: 0.000
Epoch 16 - At step 7052 average training loss: 0.000
Epoch 16 - At step 7095 average training loss: 0.000
Epoch 16 - At step 7138 average training loss: 0.000
Epoch 16 - At step 7181 average training loss: 0.000
Epoch 16 - At step 7224 average training loss: 0.000
Epoch 16 - At step 7267 average training loss: 0.000
Epoch 16 - At step 7310 average training loss: 0.000
Epoch 16 - At step 7353 average training loss: 0.000
Epoch 17 - At step 7396 average training loss: 0.000
Epoch 17 - At step 7439 average training loss: 0.000
Epoch 17 - At step 7482 average training loss: 0.000
Epoch 17 - At step 7525 average training loss: 0.000
Epoch 17 - At step 7568 average training loss:

## Test

In [12]:
g = build_multilayer_lstm_graph_with_dynamic_rnn(STATE_SIZE,NUM_CLASSES,batch_size=1,num_steps=1,num_layers=NUM_LAYERS,learning_rate=LEARNING_RATE)

In [13]:
generate_characters(g,checkpoint='saves/last_model',num_chars=750,prompt='A',pick_top_chars=5)

INFO:tensorflow:Restoring parameters from saves/last_model
Asd ant angen hase thint
I the the to tout at, the me asd.

ANELTI:
I thor wount thas sat ort as soush the
The wo with wish thus hit thing sour thim, asd and.

ANOOO:
Is shett tho te mather, hite are he the sond,
Wheth thin athar to mertest sive,
Thant sharl sond me han angent there thar ant sontile hind arl,
Ang that to shit withe, sour mesd and hict shang the thos ant masese sonten arlire as to sottere and,
Bons to sounthess hand afse so the shale to shour his
Thim ther hos thee horss and the thour thar the
shut asd whan ant me and her to cittis tering and matter the sher, mes sonss.

INTI I IRIO:
Woul sort mere asd,
Ang and. Insest to and so muul to toud thee sorder.

IONI:
A sons mo theud mour a sontithes werle to and, the ham mat thes.



'Asd ant angen hase thint\nI the the to tout at, the me asd.\n\nANELTI:\nI thor wount thas sat ort as soush the\nThe wo with wish thus hit thing sour thim, asd and.\n\nANOOO:\nIs shett tho te mather, hite are he the sond,\nWheth thin athar to mertest sive,\nThant sharl sond me han angent there thar ant sontile hind arl,\nAng that to shit withe, sour mesd and hict shang the thos ant masese sonten arlire as to sottere and,\nBons to sounthess hand afse so the shale to shour his\nThim ther hos thee horss and the thour thar the\nshut asd whan ant me and her to cittis tering and matter the sher, mes sonss.\n\nINTI I IRIO:\nWoul sort mere asd,\nAng and. Insest to and so muul to toud thee sorder.\n\nIONI:\nA sons mo theud mour a sontithes werle to and, the ham mat thes.\n'