# Identify tags in airline database

## Bidirectional recurrent neural network

    - Improving the model of: 00_identify_tags_in_airline_database_LSTM - SOLVED  


In [1]:
from __future__ import print_function

import os 
import numpy as np 
import tensorflow as tf 

print(tf.__version__)

0.12.head


## Dataset transformation


In [2]:
# Read data
import pickle
import sys

atis_file = '/home/ubuntu/data/training/text/atis/atis.pkl'

with open(atis_file,'rb') as f:
    if sys.version_info.major==2:
        train, test, dicts = pickle.load(f) #python2.7
    else:
        train, test, dicts = pickle.load(f, encoding='bytes') #python3

#Dictionaries and train test partition
w2idx, ne2idx, labels2idx = dicts[b'words2idx'], dicts[b'tables2idx'], dicts[b'labels2idx']
    
idx2w  = dict((v,k) for k,v in w2idx.items())
idx2la = dict((v,k) for k,v in labels2idx.items())

train_x, _, train_label = train
test_x,  _,  test_label  = test


# Max value of word coding to assign the ID_PAD
ID_PAD = np.max([np.max(tx) for tx in train_x]) + 1
print('ID_PAD: ', ID_PAD)

def context(l, size=3):
    l = list(l)
    lpadded = size // 2 * [ID_PAD] + l + size // 2 * [ID_PAD]
    out = [lpadded[i:(i + size)] for i in range(len(l))]
    return out


# Create train and test X y.
X_trn=[]
for s in train_x:
    X_trn += context(s,size=10)
X_trn = np.array(X_trn)

X_tst=[]
for s in test_x:
    X_tst += context(s,size=10)
X_tst = np.array(X_tst)
print('X trn shape: ', X_trn.shape)
print('X_tst shape: ',X_tst.shape)

y_trn=[]
for s in train_label:
    y_trn += list(s)
y_trn = np.array(y_trn)
print('y_trn shape: ',y_trn.shape)

y_tst=[]
for s in test_label:
    y_tst += list(s)
y_tst = np.array(y_tst)
print('y_tst shape: ',y_tst.shape)

print('Num labels: ',len(set(y_trn)))
print('Num words: ',len(set(idx2w)))

ID_PAD:  572
X trn shape:  (56590, 10)
X_tst shape:  (9198, 10)
y_trn shape:  (56590,)
y_tst shape:  (9198,)
Num labels:  121
Num words:  572


# Simpe LSTM model

## Architecture
    - tf.nn.embedding_lookup
    - tf.nn.dynamic_rnn layer
    - Dense layer: tf.nn.relu(tf.matmul(x, W) + b)
    
## Features
    - Dropout
    - Saver
    - Cross entropy with loss regularization
    - Score function

In [3]:
#General parameters
LOG_DIR = '/tmp/tensorboard/airline/BLSTM/'

# data attributes
input_seq_length = X_trn.shape[1]
input_vocabulary_size = len(set(idx2w)) + 1
output_length = 127

#Model parameters
embedding_size=64
num_hidden_lstm = 128


In [4]:
# Define the tensorflow graph

graph = tf.Graph()

with graph.as_default():
    # graph definition
    # Inputs
    with tf.name_scope('Inputs') as scope:
        x = tf.placeholder(tf.int32, shape=[None, input_seq_length], name='x')
        x_length = tf.placeholder(tf.int32, shape=[None], name='x_length')
        y = tf.placeholder(tf.int64, shape=[None], name='y')

    with tf.name_scope('Embeddings') as scope:
        W_embedding = tf.Variable(tf.random_uniform([input_vocabulary_size, embedding_size], -1.0, 1.0) ,name="W")
        embedding_layer = tf.nn.embedding_lookup(W_embedding, x)
        print('embedding_layer: ', embedding_layer)

    with tf.name_scope('BLSTM') as scope:
        # Dropout parameter
        keep_prob = tf.placeholder(tf.float32, name='keep_prob')
        
        # Forward RNN cell
        cell_fw = tf.contrib.rnn.LSTMCell(num_hidden_lstm, 
                                          initializer=tf.random_uniform_initializer(-0.1, 0.1, seed=123))
        cell_fw = tf.contrib.rnn.DropoutWrapper(cell_fw, output_keep_prob=keep_prob)
        
        # Backward RNN cell
        cell_bw = tf.contrib.rnn.LSTMCell(num_hidden_lstm, 
                                          initializer=tf.random_uniform_initializer(-0.1, 0.1, seed=113))
        cell_bw = tf.contrib.rnn.DropoutWrapper(cell_bw, output_keep_prob=keep_prob)
        
        # Bidirectional RNN: output: list of concatenate outputs of the fw and bw networks.
        lstm_outputs, _ = tf.nn.bidirectional_dynamic_rnn(cell_fw, cell_bw, embedding_layer,
                                                          sequence_length=x_length, dtype=tf.float32)
        print('lstm_outputs: ', lstm_outputs)
        
        # Concat the fw and bw outputs of the network.
        final_lstm_outputs = tf.concat([lstm_outputs[0][:,-1,:], lstm_outputs[1][:,-1,:]], 1)
        print('final_lstm_outputs: ', final_lstm_outputs)
   

    #Dense layer form RNN outs to prediction
    with tf.name_scope('Dense') as scope:
        W_dense = tf.Variable(tf.truncated_normal([num_hidden_lstm*2, output_length], stddev=0.1), name='W_dense')
        b_dense = tf.Variable(tf.constant(0.1, shape=[output_length]), name='b_dense')
        dense_output = tf.nn.relu(tf.matmul(final_lstm_outputs, W_dense) + b_dense)
        print('dense_output: ', dense_output)

        

    # Loss function
    with tf.name_scope("xent") as scope:
        cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=dense_output, 
                                                                       labels=y, name='cross_entropy')
        ce_summary = tf.summary.scalar("cross_entropy", tf.reduce_mean(cross_entropy))

    #Optimizer
    with tf.name_scope("train") as scope:
        optimizer = tf.train.AdamOptimizer(0.001)
        train_op = optimizer.minimize(cross_entropy, name='train_op')


    #Accuracy
    with tf.name_scope("test") as scope:
        #Prediction
        y_pred = tf.nn.softmax(dense_output, name='y_pred')
        #Accuracy
        correct_prediction = tf.equal(tf.argmax(dense_output,1), y)
        accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32), name='accuracy')
        accuracy_summary = tf.summary.scalar("accuracy", accuracy)


    # Merge all the summaries
    with tf.name_scope('summaries') as scope:
        merged = tf.summary.merge_all()
 

embedding_layer:  Tensor("Embeddings/embedding_lookup:0", shape=(?, 10, 64), dtype=float32)
lstm_outputs:  (<tf.Tensor 'BLSTM/bidirectional_rnn/fw/fw/transpose:0' shape=(?, 10, 128) dtype=float32>, <tf.Tensor 'BLSTM/ReverseSequence:0' shape=(?, 10, 128) dtype=float32>)
final_lstm_outputs:  Tensor("BLSTM/concat:0", shape=(?, 256), dtype=float32)
dense_output:  Tensor("Dense/Relu:0", shape=(?, 127), dtype=float32)


In [5]:
#batch generator
def batch_generator(x=X_trn, y=y_trn, batch_size=128):
    from sklearn.utils import shuffle
    x_shuffle, y_shuffle = shuffle(x, y, random_state=0)
    for i in range(0, x.shape[0]-batch_size, batch_size):
        x_batch = x_shuffle[i:i+batch_size,:]
        y_batch = y_shuffle[i:i+batch_size]
        x_length_batch = np.ones([batch_size])*10
        yield x_batch, x_length_batch, y_batch
    
seq = batch_generator(x=X_trn, y=y_trn, batch_size=20)
print(next(seq))

(array([[554, 241, 481, 165, 193, 197, 208, 379, 502,  64],
       [193, 514, 208,  77, 502, 137, 359, 544,  40, 481],
       [232, 331, 237, 358,  13, 193, 208,  77, 502, 137],
       [ 32, 194,  40, 183, 208, 137, 502, 415, 205, 572],
       [232, 331,  13, 277, 353, 194, 208, 452, 375, 195],
       [572, 193, 348, 208, 313, 502, 282,  71, 358, 249],
       [193, 208, 128, 502, 415, 205, 572, 572, 572, 572],
       [358, 481, 174, 353,  65, 524, 435, 572, 572, 572],
       [208, 481,  29, 234, 379, 502, 159, 572, 572, 572],
       [572, 572, 572, 439, 301, 481, 194, 208, 415, 205],
       [481, 265, 193, 208,  64, 502, 137, 358, 248, 435],
       [157,  37,  26, 221, 561,  13, 105, 353, 430, 111],
       [534, 358, 481, 190, 105,  37,  26, 193, 208, 376],
       [572, 572, 572, 383, 276, 530, 194,  73,  77,  40],
       [572, 572, 572, 572, 554, 194,  50, 389,  86,  37],
       [481, 193, 501, 481, 321, 358, 530,  26, 200, 426],
       [572, 572, 572, 572,  13, 190, 105, 193, 358,  3

In [6]:
import time

batch_size = 256
nEpochs = 20

start = time.time()

gpu_options = tf.GPUOptions(allow_growth = True)
with tf.Session(graph=graph, config=tf.ConfigProto(gpu_options=gpu_options)) as session:

    #Create sumaries writers
    train_writer = tf.summary.FileWriter(LOG_DIR + 'train', session.graph, flush_secs=2)
    test_writer  = tf.summary.FileWriter(LOG_DIR + 'test', flush_secs=2)

        
    print('Initializing')
    print('Epoch - Loss(trn) -  Acc(trn)   -   Loss(tst) -   Acc(tst)')
    session.run(tf.global_variables_initializer())
    for epoch in range(nEpochs):
        ce_c=[]
        acc_c=[]
        ce_c_tst=[]
        acc_c_tst=[]
        
        batch_list = batch_generator(x=X_trn, y=y_trn, batch_size=batch_size)
        for batch in batch_list:
            feedDict = {x: batch[0], x_length: batch[1], y: batch[2], keep_prob: 0.5} # dictionary of batch data to run the graph
            _, ce, acc = session.run([train_op, cross_entropy, accuracy], feed_dict=feedDict)
            ce_c += [ce]
            acc_c += [acc]
        # Sumaries train    
        summary_str_trn = session.run(merged, feedDict)
        train_writer.add_summary(summary_str_trn, epoch)            
            
        batch_list_tst = batch_generator(x=X_tst, y=y_tst, batch_size=batch_size)
        for x_batch, x_length_batch, y_batch in batch_list_tst:
            feedDict = {x: x_batch, x_length:x_length_batch, y: y_batch, keep_prob: 1} # dictionary of batch data to run the graph
            ce_tst, acc_tst = session.run([cross_entropy, accuracy], feed_dict=feedDict)
            ce_c_tst += [ce_tst]
            acc_c_tst += [acc_tst]
        # Sumaries test    
        summary_str_tst = session.run(merged, feedDict)
        test_writer.add_summary(summary_str_tst, epoch)            
                    
        print(epoch, np.mean(ce_c), np.mean(acc_c), np.mean(ce_c_tst), np.mean(acc_c_tst), sep='   -   ')
print('Time to train:', time.time() - start)
# 468 secs in CPU i7
# 244 secs in GPU Mac

Initializing
Epoch - Loss(trn) -  Acc(trn)   -   Loss(tst) -   Acc(tst)
0   -   1.80767   -   0.630798   -   1.41786   -   0.662165
1   -   1.07813   -   0.745139   -   0.887994   -   0.806808
2   -   0.666591   -   0.853701   -   0.61383   -   0.877679
3   -   0.446592   -   0.90671   -   0.444665   -   0.908594
4   -   0.322   -   0.934195   -   0.364356   -   0.929129
5   -   0.251194   -   0.948689   -   0.31351   -   0.937612
6   -   0.20607   -   0.959753   -   0.283727   -   0.945759
7   -   0.171239   -   0.966841   -   0.259671   -   0.95067
8   -   0.153001   -   0.970341   -   0.243757   -   0.955469
9   -   0.133434   -   0.97483   -   0.240453   -   0.95692
10   -   0.122681   -   0.97605   -   0.23278   -   0.957143
11   -   0.103522   -   0.980274   -   0.223035   -   0.961049
12   -   0.0977799   -   0.980946   -   0.222208   -   0.959375
13   -   0.0908937   -   0.982519   -   0.215212   -   0.962165
14   -   0.0817911   -   0.983986   -   0.219331   -   0.961049
15   