In [3]:
import data.load
import numpy as np

In [4]:
train_set, valid_set, dicts = data.load.atisfull()
w2idx, labels2idx = dicts['words2idx'], dicts['labels2idx']

In [5]:
train_x, _, train_label = train_set
val_x, _, val_label = valid_set

In [6]:
# Create index to word/label dicts
idx2w  = {w2idx[k]:k for k in w2idx}
idx2la = {labels2idx[k]:k for k in labels2idx}


In [7]:
# For conlleval script
words_train = [ list(map(lambda x: idx2w[x], w)) for w in train_x]
labels_train = [ list(map(lambda x: idx2la[x], y)) for y in train_label]
words_val = [ list(map(lambda x: idx2w[x], w)) for w in val_x]
labels_val = [ list(map(lambda x: idx2la[x], y)) for y in val_label]


In [41]:
n_classes = len(idx2la)
n_vocab = len(idx2w)
print(n_vocab)

572


In [9]:
print("Example sentence : {}".format(words_train[1]))
print("Encoded form: {}".format(train_x[1]))
print()
print("It's label : {}".format(labels_train[1]))
print("Encoded form: {}".format(train_label[1]))

Example sentence : ['what', 'flights', 'are', 'available', 'from', 'pittsburgh', 'to', 'baltimore', 'on', 'thursday', 'morning']
Encoded form: [554 194  50  66 208 379 502  69 358 496 321]

It's label : ['O', 'O', 'O', 'O', 'O', 'B-fromloc.city_name', 'O', 'B-toloc.city_name', 'O', 'B-depart_date.day_name', 'B-depart_time.period_of_day']
Encoded form: [126 126 126 126 126  48 126  78 126  26  33]


In [29]:
import keras
from keras.models import Sequential
from keras.layers.embeddings import Embedding
from keras.layers.recurrent import SimpleRNN, GRU, LSTM
from keras.layers.core import Dense, Dropout
from keras.layers.wrappers import TimeDistributed
from keras.layers import Convolution1D
from keras.callbacks import ModelCheckpoint, TensorBoard

In [30]:
model = Sequential()
model.add(Embedding(n_vocab,100))
model.add(Convolution1D(188,5,border_mode='same', activation='relu'))
model.add(Dropout(0.25))
model.add(LSTM(100,return_sequences=True))
model.add(TimeDistributed(Dense(n_classes, activation='softmax')))
model.compile('rmsprop', 'categorical_crossentropy')

  app.launch_new_instance()


In [31]:
import progressbar

In [43]:
n_epochs = 30

#Callbacks
path = "output/"
checkpointer = ModelCheckpoint(path+"ATIS_LSTM-"+str(i)+".h5", verbose=0,
                          save_best_only=True)
tensorboard = TensorBoard(log_dir=path, write_images=True, 
                      write_graph=True, histogram_freq=0)

In [42]:
for i in range(n_epochs):
    print("Training epoch {}".format(i))
    
    bar = progressbar.ProgressBar(max_value=len(train_x))
    for n_batch, sent in bar(enumerate(train_x)):
        label = train_label[n_batch]
        # Make labels one hot
        label = np.eye(n_classes)[label][np.newaxis,:] 
        # View each sentence as a batch
        sent = sent[np.newaxis,:]
        
        if sent.shape[1] > 1: #ignore 1 word sentences
            model.train_on_batch(sent, label)     
            
    from metrics.accuracy import conlleval
    labels_pred_val = []
    bar = progressbar.ProgressBar(max_value=len(val_x))
    for n_batch, sent in bar(enumerate(val_x)):
        label = val_label[n_batch]
        label = np.eye(n_classes)[label][np.newaxis,:]
        sent = sent[np.newaxis,:]

        pred = model.predict_on_batch(sent)
        pred = np.argmax(pred,-1)[0]
        labels_pred_val.append(pred)

    labels_pred_val = [ list(map(lambda x: idx2la[x], y)) \
                                        for y in labels_pred_val]
    con_dict = conlleval(labels_pred_val, labels_val, 
                                words_val, 'measure.txt')

    print('Precision = {}, Recall = {}, F1 = {}'.format(
                con_dict['r'], con_dict['p'], con_dict['f1']))
    
    
    
    model.save_weights(path+"ATIS_LSTM-"+str(i)+".h5")
    model_json = model.to_json()
    with open(path+"model_lstm.json","w") as jf:
        jf.write(model_json)


  0% (12 of 4978) |                       | Elapsed Time: 0:00:00 ETA:  0:00:42

Training epoch 0


100% (4978 of 4978) |#####################| Elapsed Time: 0:00:31 Time: 0:00:31
100% (893 of 893) |#######################| Elapsed Time: 0:00:01 Time: 0:00:01
  0% (14 of 4978) |                       | Elapsed Time: 0:00:00 ETA:  0:00:36

Precision = 92.7, Recall = 93.43, F1 = 93.06
Training epoch 1


100% (4978 of 4978) |#####################| Elapsed Time: 0:00:35 Time: 0:00:35
100% (893 of 893) |#######################| Elapsed Time: 0:00:01 Time: 0:00:01


Precision = 93.16, Recall = 93.86, F1 = 93.51
