# Identify tags in airline database

## Minimal code

    - Read dataset
    - transform data
    - Minimal model
        - Embedings
        - Dense
        


In [2]:
from __future__ import print_function

import os 
import numpy as np 

import tensorflow as tf 
print(tf.__version__)


0.12.0-rc0


## Dataset

ATIS (Airline Travel Information System) dataset. Available in: https://github.com/mesnilgr/is13/blob/master/data/load.py

### Example:

Input (words)	show	flights	from	Boston	to	New	York	today

Output (labels)	O	O	O	B-dept	O	B-arr	I-arr	B-date




In [3]:
# Read data
import pickle

atis_file = '/Users/jorge/data/training/text/atis/atis.pkl'
with open(atis_file,'rb') as f:
    #train, test, dicts = pickle.load(f, encoding='bytes') #python3
    train, test, dicts = pickle.load(f)


## train / test sets:
    - X: list of input sequences
    - label: List of target labels asociated to each word in each sentence.
## Dictionaries
    - labels2idx:  To decode the labels
    - words2idx: To decode the sentences

In [4]:
#Dictionaries and train test partition
w2idx, ne2idx, labels2idx = dicts[b'words2idx'], dicts[b'tables2idx'], dicts[b'labels2idx']
    
idx2w  = dict((v,k) for k,v in w2idx.items())
idx2la = dict((v,k) for k,v in labels2idx.items())

train_x, _, train_label = train
test_x,  _,  test_label  = test



# Visualize data
wlength = 35
for e in ['train','test']:
    print(e)
    for sw, sl in zip(eval(e+'_x')[:2], eval(e+'_label')[:2]):
        print( 'WORD'.rjust(wlength), 'LABEL'.rjust(wlength))
        for wx, la in zip(sw, sl): print( idx2w[wx].rjust(wlength), idx2la[la].rjust(wlength))
        print( '\n'+'**'*30+'\n')


train
                               WORD                               LABEL
                                  i                                   O
                               want                                   O
                                 to                                   O
                                fly                                   O
                               from                                   O
                             boston                 B-fromloc.city_name
                                 at                                   O
                    DIGITDIGITDIGIT                  B-depart_time.time
                                 am                  I-depart_time.time
                                and                                   O
                             arrive                                   O
                                 in                                   O
                             denver                   B-to

In [5]:
#Select words for the label 48: b'B-fromloc.city_name' in train and test to check that are different:
for e in ['train','test']:
    print(e)
    print('---------')
    for sw, sl in zip(eval(e+'_x')[:5], eval(e+'_label')[:5]):
        for wx, la in zip(sw, sl): 
            if la==48:
                print( idx2w[wx])
    print('\n')


train
---------
boston
pittsburgh
san
washington
tacoma
pittsburgh


test
---------
charlotte
tacoma
phoenix
phoenix
orlando




## Data transformation
    - Convert the list of sequences of words into an array of words x characteristics.
    - The characteristics are the context of the word in the sentence.
        - For each word in the sentence, generate the context with the previous and the next words in the sentence.
        - For words at the beggining and the end, use padding to complete the context.

In [8]:
# Max value of word coding to assign the ID_PAD
print('Min val: ', np.min([np.min(tx) for tx in train_x]))
ID_PAD = np.max([np.max(tx) for tx in train_x]) + 1
print('Max val (ID_PAD): ', ID_PAD)

def context(l, size=3):
    l = list(l)
    lpadded = size // 2 * [ID_PAD] + l + size // 2 * [ID_PAD]
    out = [lpadded[i:(i + size)] for i in range(len(l))]
    return out

x = np.array([0, 1, 2, 3, 4], dtype=np.int32)
print('Context vectors sample: ', context(x))

Min val:  0
Max val (ID_PAD):  572
Context vectors sample:  [[572, 0, 1], [0, 1, 2], [1, 2, 3], [2, 3, 4], [3, 4, 572]]


In [9]:
# Create train and test X y.
X_trn=[]
for s in train_x:
    X_trn += context(s,size=10)
X_trn = np.array(X_trn)

X_tst=[]
for s in test_x:
    X_tst += context(s,size=10)
X_tst = np.array(X_tst)

print('X trn shape: ', X_trn.shape)
print('X_tst shape: ',X_tst.shape)


y_trn=[]
for s in train_label:
    y_trn += list(s)
y_trn = np.array(y_trn)
print('y_trn shape: ',y_trn.shape)

y_tst=[]
for s in test_label:
    y_tst += list(s)
y_tst = np.array(y_tst)
print('y_tst shape: ',y_tst.shape)


X trn shape:  (56590, 10)
X_tst shape:  (9198, 10)
y_trn shape:  (56590,)
y_tst shape:  (9198,)


In [10]:
# Encode words wiht dummy vars
X_trn_enc = np.zeros([X_trn.shape[0], X_trn.shape[1], ID_PAD+1])
for ib, b in enumerate(X_trn):
    for iw, w in enumerate(b):
        X_trn_enc[ib, iw, X_trn[ib,iw]] = 1


X_tst_enc = np.zeros([X_tst.shape[0], X_tst.shape[1], ID_PAD+1])
for ib, b in enumerate(X_tst):
    for iw, w in enumerate(b):
        X_tst_enc[ib, iw, X_tst[ib,iw]] = 1


print(X_trn_enc.shape, X_tst_enc.shape)
print(X_trn[0])
print(X_trn_enc[0,0])

(56590, 10, 573) (9198, 10, 573)
[572 572 572 572 572 232 542 502 196 208]
[ 0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0. 

In [11]:
print('Num labels: ',len(set(y_trn)))
print('Num words: ',len(set(idx2w)))

Num labels:  121
Num words:  572


# First model

## Architecture
    - tf.nn.embedding_lookup
    - tf.nn.dynamic_rnn layer
    - Dense layer: tf.nn.relu(tf.matmul(x, W) + b)
    
## Features
    - Dropout
    - Saver
    - Cross entropy with loss regularization
    - Score function

In [12]:
#General parameters
LOG_DIR = '/tmp/airline/dummy/'

# data attributes
input_seq_length = X_trn.shape[1]
output_length = 127

#Model parameters
embedding_size=64


In [20]:
from tensorflow.contrib.tensorboard.plugins import projector

# Define the tensorflow graph

graph = tf.Graph()
with graph.as_default():
    # graph definition
    # Inputs
    with tf.name_scope('Inputs') as scope:
        x = tf.placeholder(tf.float32, shape=[None, input_seq_length, ID_PAD+1], name='x')
        y = tf.placeholder(tf.int64, shape=[None], name='y')

        x_flatten = tf.reshape(x, [-1,input_seq_length*(ID_PAD+1)])

    
    #Dense layer form RNN outs to prediction
    with tf.name_scope('Dense') as scope:
        W_dense = tf.Variable(tf.truncated_normal([input_seq_length*(ID_PAD+1), output_length], stddev=0.1), name='W_dense')
        b_dense = tf.Variable(tf.constant(0.1, shape=[output_length]), name='b_dense')
        dense_output = tf.nn.relu(tf.matmul(x_flatten, W_dense) + b_dense)
        print('dense_output: ', dense_output)

        
    #Prediction
    y_pred = tf.nn.softmax(dense_output, name='y_pred')

    # Loss function
    with tf.name_scope("xent") as scope:
        cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(dense_output, y, name='cross_entropy')

    #Optimizer
    with tf.name_scope("train") as scope:
        optimizer = tf.train.AdamOptimizer(0.001)
        train_op = optimizer.minimize(cross_entropy, name='train_op')


    #Accuracy
    with tf.name_scope("test") as scope:
        correct_prediction = tf.equal(tf.argmax(dense_output,1), y)
        accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32), name='accuracy')

        

dense_output:  Tensor("Dense/Relu:0", shape=(?, 127), dtype=float32)


In [17]:
#batch generator
def batch_generator(x, y, batch_size=256):
    for i in range(0, x.shape[0]-batch_size, batch_size):
        x_batch = x[i:i+batch_size,:]
        y_batch = y[i:i+batch_size]
        yield x_batch, y_batch
    
seq = batch_generator(X_trn_enc, y_trn, batch_size=20)
print(next(seq))

(array([[[ 0.,  0.,  0., ...,  0.,  0.,  1.],
        [ 0.,  0.,  0., ...,  0.,  0.,  1.],
        [ 0.,  0.,  0., ...,  0.,  0.,  1.],
        ..., 
        [ 0.,  0.,  0., ...,  0.,  0.,  0.],
        [ 0.,  0.,  0., ...,  0.,  0.,  0.],
        [ 0.,  0.,  0., ...,  0.,  0.,  0.]],

       [[ 0.,  0.,  0., ...,  0.,  0.,  1.],
        [ 0.,  0.,  0., ...,  0.,  0.,  1.],
        [ 0.,  0.,  0., ...,  0.,  0.,  1.],
        ..., 
        [ 0.,  0.,  0., ...,  0.,  0.,  0.],
        [ 0.,  0.,  0., ...,  0.,  0.,  0.],
        [ 0.,  0.,  0., ...,  0.,  0.,  0.]],

       [[ 0.,  0.,  0., ...,  0.,  0.,  1.],
        [ 0.,  0.,  0., ...,  0.,  0.,  1.],
        [ 0.,  0.,  0., ...,  0.,  0.,  1.],
        ..., 
        [ 0.,  0.,  0., ...,  0.,  0.,  0.],
        [ 0.,  0.,  0., ...,  0.,  0.,  0.],
        [ 0.,  0.,  0., ...,  0.,  0.,  0.]],

       ..., 
       [[ 0.,  0.,  0., ...,  0.,  0.,  0.],
        [ 0.,  0.,  0., ...,  0.,  0.,  0.],
        [ 0.,  0.,  0., ...,  0.,  0.,

In [21]:
# Execute the graph to train a network
batch_size = 64
nEpochs = 10

with tf.Session(graph=graph) as session:
    print('Initializing')
    print('Epoch - Loss(trn) -  Acc(trn)   -   Loss(tst) -   Acc(tst)')
    session.run(tf.initialize_all_variables())
    for epoch in range(nEpochs):
        ce_c=[]
        acc_c=[]
        ce_c_tst=[]
        acc_c_tst=[]
        
        batch_list = batch_generator(X_trn_enc,y_trn, batch_size=batch_size)
        for i, batch in enumerate(batch_list):
            feedDict = {x: batch[0], y: batch[1]} # dictionary of batch data to run the graph
            _, ce, acc = session.run([train_op, cross_entropy, accuracy], feed_dict=feedDict)
            ce_c += [ce]
            acc_c += [acc]
            
        batch_list_tst = batch_generator(X_tst_enc, y_tst, batch_size=batch_size)
        for x_batch, y_batch in batch_list_tst:
            feedDict = {x: x_batch, y: y_batch} # dictionary of batch data to run the graph
            ce_tst, acc_tst = session.run([cross_entropy, accuracy], feed_dict=feedDict)
            ce_c_tst += [ce_tst]
            acc_c_tst += [acc_tst]
        
        print(epoch, np.mean(ce_c), np.mean(acc_c), np.mean(ce_c_tst), np.mean(acc_c_tst), sep='   -   ')

Initializing
Epoch - Loss(trn) -  Acc(trn)   -   Loss(tst) -   Acc(tst)
Instructions for updating:
Use `tf.global_variables_initializer` instead.


ResourceExhaustedError: OOM when allocating tensor with shape[5730,127]
	 [[Node: train/Dense/W_dense/Adam/Assign = Assign[T=DT_FLOAT, _class=["loc:@Dense/W_dense"], use_locking=true, validate_shape=true, _device="/job:localhost/replica:0/task:0/gpu:0"](train/Dense/W_dense/Adam, train/zeros)]]

Caused by op u'train/Dense/W_dense/Adam/Assign', defined at:
  File "/Users/jorge/anaconda3/envs/py27/lib/python2.7/runpy.py", line 162, in _run_module_as_main
    "__main__", fname, loader, pkg_name)
  File "/Users/jorge/anaconda3/envs/py27/lib/python2.7/runpy.py", line 72, in _run_code
    exec code in run_globals
  File "/Users/jorge/anaconda3/envs/py27/lib/python2.7/site-packages/ipykernel/__main__.py", line 3, in <module>
    app.launch_new_instance()
  File "/Users/jorge/anaconda3/envs/py27/lib/python2.7/site-packages/traitlets/config/application.py", line 653, in launch_instance
    app.start()
  File "/Users/jorge/anaconda3/envs/py27/lib/python2.7/site-packages/ipykernel/kernelapp.py", line 474, in start
    ioloop.IOLoop.instance().start()
  File "/Users/jorge/anaconda3/envs/py27/lib/python2.7/site-packages/zmq/eventloop/ioloop.py", line 162, in start
    super(ZMQIOLoop, self).start()
  File "/Users/jorge/anaconda3/envs/py27/lib/python2.7/site-packages/tornado/ioloop.py", line 887, in start
    handler_func(fd_obj, events)
  File "/Users/jorge/anaconda3/envs/py27/lib/python2.7/site-packages/tornado/stack_context.py", line 275, in null_wrapper
    return fn(*args, **kwargs)
  File "/Users/jorge/anaconda3/envs/py27/lib/python2.7/site-packages/zmq/eventloop/zmqstream.py", line 440, in _handle_events
    self._handle_recv()
  File "/Users/jorge/anaconda3/envs/py27/lib/python2.7/site-packages/zmq/eventloop/zmqstream.py", line 472, in _handle_recv
    self._run_callback(callback, msg)
  File "/Users/jorge/anaconda3/envs/py27/lib/python2.7/site-packages/zmq/eventloop/zmqstream.py", line 414, in _run_callback
    callback(*args, **kwargs)
  File "/Users/jorge/anaconda3/envs/py27/lib/python2.7/site-packages/tornado/stack_context.py", line 275, in null_wrapper
    return fn(*args, **kwargs)
  File "/Users/jorge/anaconda3/envs/py27/lib/python2.7/site-packages/ipykernel/kernelbase.py", line 276, in dispatcher
    return self.dispatch_shell(stream, msg)
  File "/Users/jorge/anaconda3/envs/py27/lib/python2.7/site-packages/ipykernel/kernelbase.py", line 228, in dispatch_shell
    handler(stream, idents, msg)
  File "/Users/jorge/anaconda3/envs/py27/lib/python2.7/site-packages/ipykernel/kernelbase.py", line 390, in execute_request
    user_expressions, allow_stdin)
  File "/Users/jorge/anaconda3/envs/py27/lib/python2.7/site-packages/ipykernel/ipkernel.py", line 196, in do_execute
    res = shell.run_cell(code, store_history=store_history, silent=silent)
  File "/Users/jorge/anaconda3/envs/py27/lib/python2.7/site-packages/ipykernel/zmqshell.py", line 501, in run_cell
    return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
  File "/Users/jorge/anaconda3/envs/py27/lib/python2.7/site-packages/IPython/core/interactiveshell.py", line 2717, in run_cell
    interactivity=interactivity, compiler=compiler, result=result)
  File "/Users/jorge/anaconda3/envs/py27/lib/python2.7/site-packages/IPython/core/interactiveshell.py", line 2821, in run_ast_nodes
    if self.run_code(code, result):
  File "/Users/jorge/anaconda3/envs/py27/lib/python2.7/site-packages/IPython/core/interactiveshell.py", line 2881, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-20-ce749c280b25>", line 34, in <module>
    train_op = optimizer.minimize(cross_entropy, name='train_op')
  File "/Users/jorge/anaconda3/envs/py27/lib/python2.7/site-packages/tensorflow/python/training/optimizer.py", line 279, in minimize
    name=name)
  File "/Users/jorge/anaconda3/envs/py27/lib/python2.7/site-packages/tensorflow/python/training/optimizer.py", line 393, in apply_gradients
    self._create_slots(var_list)
  File "/Users/jorge/anaconda3/envs/py27/lib/python2.7/site-packages/tensorflow/python/training/adam.py", line 119, in _create_slots
    self._zeros_slot(v, "m", self._name)
  File "/Users/jorge/anaconda3/envs/py27/lib/python2.7/site-packages/tensorflow/python/training/optimizer.py", line 593, in _zeros_slot
    named_slots[var] = slot_creator.create_zeros_slot(var, op_name)
  File "/Users/jorge/anaconda3/envs/py27/lib/python2.7/site-packages/tensorflow/python/training/slot_creator.py", line 108, in create_zeros_slot
    colocate_with_primary=colocate_with_primary)
  File "/Users/jorge/anaconda3/envs/py27/lib/python2.7/site-packages/tensorflow/python/training/slot_creator.py", line 86, in create_slot
    return _create_slot_var(primary, val, scope)
  File "/Users/jorge/anaconda3/envs/py27/lib/python2.7/site-packages/tensorflow/python/training/slot_creator.py", line 50, in _create_slot_var
    slot = variables.Variable(val, name=scope, trainable=False)
  File "/Users/jorge/anaconda3/envs/py27/lib/python2.7/site-packages/tensorflow/python/ops/variables.py", line 224, in __init__
    expected_shape=expected_shape)
  File "/Users/jorge/anaconda3/envs/py27/lib/python2.7/site-packages/tensorflow/python/ops/variables.py", line 360, in _init_from_args
    validate_shape=validate_shape).op
  File "/Users/jorge/anaconda3/envs/py27/lib/python2.7/site-packages/tensorflow/python/ops/gen_state_ops.py", line 47, in assign
    use_locking=use_locking, name=name)
  File "/Users/jorge/anaconda3/envs/py27/lib/python2.7/site-packages/tensorflow/python/framework/op_def_library.py", line 759, in apply_op
    op_def=op_def)
  File "/Users/jorge/anaconda3/envs/py27/lib/python2.7/site-packages/tensorflow/python/framework/ops.py", line 2240, in create_op
    original_op=self._default_original_op, op_def=op_def)
  File "/Users/jorge/anaconda3/envs/py27/lib/python2.7/site-packages/tensorflow/python/framework/ops.py", line 1128, in __init__
    self._traceback = _extract_stack()

ResourceExhaustedError (see above for traceback): OOM when allocating tensor with shape[5730,127]
	 [[Node: train/Dense/W_dense/Adam/Assign = Assign[T=DT_FLOAT, _class=["loc:@Dense/W_dense"], use_locking=true, validate_shape=true, _device="/job:localhost/replica:0/task:0/gpu:0"](train/Dense/W_dense/Adam, train/zeros)]]
