In [1]:
!git clone https://github.com/chsasank/ATIS.keras.git
!cd ATIS.keras

Cloning into 'ATIS.keras'...
remote: Enumerating objects: 30, done.[K
remote: Total 30 (delta 0), reused 0 (delta 0), pack-reused 30[K
Unpacking objects: 100% (30/30), done.


In [2]:
!pwd

/content


In [3]:
!cp -r ATIS.keras/* ./

In [4]:
import numpy as np
import data.load

In [5]:
train_set, valid_set, dicts = data.load.atisfull()
w2idx, labels2idx = dicts['words2idx'], dicts['labels2idx']

In [6]:
train_x, _, train_label = train_set
val_x, _, val_label = valid_set

In [7]:
# Create index to word/label dicts
idx2w  = {w2idx[k]:k for k in w2idx}
idx2la = {labels2idx[k]:k for k in labels2idx}

In [8]:
# For conlleval script
words_train = [ list(map(lambda x: idx2w[x], w)) for w in train_x]
labels_train = [ list(map(lambda x: idx2la[x], y)) for y in train_label]
words_val = [ list(map(lambda x: idx2w[x], w)) for w in val_x]
labels_val = [ list(map(lambda x: idx2la[x], y)) for y in val_label]

In [9]:
n_classes = len(idx2la)
n_vocab = len(idx2w)

In [10]:
print("Example sentence : {}".format(words_train[0]))
print("Encoded form: {}".format(train_x[0]))
print()
print("It's label : {}".format(labels_train[0]))
print("Encoded form: {}".format(train_label[0]))

Example sentence : ['i', 'want', 'to', 'fly', 'from', 'boston', 'at', 'DIGITDIGITDIGIT', 'am', 'and', 'arrive', 'in', 'denver', 'at', 'DIGITDIGITDIGITDIGIT', 'in', 'the', 'morning']
Encoded form: [232 542 502 196 208  77  62  10  35  40  58 234 137  62  11 234 481 321]

It's label : ['O', 'O', 'O', 'O', 'O', 'B-fromloc.city_name', 'O', 'B-depart_time.time', 'I-depart_time.time', 'O', 'O', 'O', 'B-toloc.city_name', 'O', 'B-arrive_time.time', 'O', 'O', 'B-arrive_time.period_of_day']
Encoded form: [126 126 126 126 126  48 126  35  99 126 126 126  78 126  14 126 126  12]


In [11]:
from keras.models import Sequential
from keras.layers.embeddings import Embedding
from keras.layers.recurrent import SimpleRNN
from keras.layers.core import Dense, Dropout
from keras.layers.wrappers import TimeDistributed
from keras.layers import Convolution1D

model = Sequential()
model.add(Embedding(n_vocab,100))
model.add(Dropout(0.25))
model.add(SimpleRNN(100,return_sequences=True))
model.add(TimeDistributed(Dense(n_classes, activation='softmax')))
model.compile('rmsprop', 'categorical_crossentropy')

In [13]:
print(model.summary())

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding (Embedding)        (None, None, 100)         57200     
_________________________________________________________________
dropout (Dropout)            (None, None, 100)         0         
_________________________________________________________________
simple_rnn (SimpleRNN)       (None, None, 100)         20100     
_________________________________________________________________
time_distributed (TimeDistri (None, None, 127)         12827     
Total params: 90,127
Trainable params: 90,127
Non-trainable params: 0
_________________________________________________________________
None


In [14]:
import progressbar
n_epochs = 3

for i in range(n_epochs):
    print("Training epoch {}".format(i))
    
    bar = progressbar.ProgressBar(max_value=len(train_x))
    for n_batch, sent in bar(enumerate(train_x)):
        label = train_label[n_batch]
        # Make labels one hot
        label = np.eye(n_classes)[label][np.newaxis,:] 
        # View each sentence as a batch
        sent = sent[np.newaxis,:]
        
        if sent.shape[1] > 1: #ignore 1 word sentences
            model.train_on_batch(sent, label)

  0% (6 of 4978) |                       | Elapsed Time: 0:00:00 ETA:   0:01:29

Training epoch 0


100% (4978 of 4978) |####################| Elapsed Time: 0:01:15 Time:  0:01:15
  0% (7 of 4978) |                       | Elapsed Time: 0:00:00 ETA:   0:01:16

Training epoch 1


100% (4978 of 4978) |####################| Elapsed Time: 0:01:16 Time:  0:01:16
  0% (7 of 4978) |                       | Elapsed Time: 0:00:00 ETA:   0:01:18

Training epoch 2


100% (4978 of 4978) |####################| Elapsed Time: 0:01:16 Time:  0:01:16


In [15]:
from metrics.accuracy import conlleval

labels_pred_val = []

bar = progressbar.ProgressBar(max_value=len(val_x))
for n_batch, sent in bar(enumerate(val_x)):
    label = val_label[n_batch]
    label = np.eye(n_classes)[label][np.newaxis,:]
    sent = sent[np.newaxis,:]

    pred = model.predict_on_batch(sent)
    pred = np.argmax(pred,-1)[0]
    labels_pred_val.append(pred)

labels_pred_val = [ list(map(lambda x: idx2la[x], y)) \
                                    for y in labels_pred_val]
con_dict = conlleval(labels_pred_val, labels_val, 
                            words_val, 'measure.txt')

print('Precision = {}, Recall = {}, F1 = {}'.format(
            con_dict['r'], con_dict['p'], con_dict['f1']))

100% (893 of 893) |######################| Elapsed Time: 0:00:06 Time:  0:00:06


Precision = 86.32, Recall = 88.28, F1 = 87.29
