**Modification:**
- Use Bidirectional LSTM 

```l1 = Bidirectional(LSTM(100, consume_less='gpu', init='glorot_uniform', return_sequences=True, dropout_W=0.2))(main_input)```

- Change from target_chars to targetchartoindice (same len but target_chars not exist) 

```act_output = Dense(len(targetchartoindice), activation='softmax', init='glorot_uniform', name='act_output')(b2_1)```

# Load data

In [1]:
import os
import argparse
import pandas as pd
import numpy as np
import pickle

In [2]:
from keras.models import Sequential, Model
from keras.layers.core import Dense
from keras.layers.recurrent import LSTM, GRU, SimpleRNN
from keras.layers import Input, merge
from keras.layers.wrappers import Bidirectional
from keras.utils.data_utils import get_file
#from keras.regularizers import WeightRegularizer
from keras import optimizers
from keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
from keras.layers.normalization import BatchNormalization
from theano.ifelse import ifelse

Using Theano backend.


In [3]:
name = 'helpdesk'
args = {
    'inputdir': '../input/{}/'.format(name),   
    'outputdir': './output_files/{}/'.format(name)
}

args = argparse.Namespace(**args)

In [4]:
if not os.path.isdir(args.outputdir):
    os.makedirs(args.outputdir)

In [5]:
with open(args.inputdir + 'parameters.pkl', "rb") as f:
    maxlen = pickle.load(f)
    num_features = pickle.load(f)
    chartoindice = pickle.load(f)
    targetchartoindice = pickle.load(f)
    divisor = pickle.load(f)
    divisor2 = pickle.load(f)

In [6]:
with open(args.inputdir + 'preprocessed_data.pkl', "rb") as f:
    X = pickle.load(f)
    y_a = pickle.load(f)
    y_t = pickle.load(f)
    X_test = pickle.load(f)
    y_a_test = pickle.load(f)
    y_t_test = pickle.load(f)

# Model

In [7]:
print('Build model BLSTM...')

main_input = Input(shape=(maxlen, num_features), name='main_input')

# shared layer
l1 = Bidirectional(LSTM(50, return_sequences=True, kernel_initializer="glorot_uniform", implementation=2))(main_input) # the shared layer
b1 = BatchNormalization()(l1)

# layers
l2_1 = Bidirectional(LSTM(50, return_sequences=False, kernel_initializer="glorot_uniform", implementation=2, dropout=0.2))(b1) # the layer specialized in activity prediction
b2_1 = BatchNormalization()(l2_1)

l2_2 = Bidirectional(LSTM(50, return_sequences=False, kernel_initializer="glorot_uniform", implementation=2, dropout=0.5))(b1) # the layer specialized in time prediction
b2_2 = BatchNormalization()(l2_2)

act_output = Dense(len(targetchartoindice), kernel_initializer='glorot_uniform', activation='softmax', name='act_output')(b2_1)
time_output = Dense(1, kernel_initializer='glorot_uniform', name='time_output')(b2_2)

model = Model(inputs=[main_input], outputs=[act_output, time_output])

#compilations
opt = optimizers.Nadam(lr=0.002, beta_1=0.9, beta_2=0.999, epsilon=1e-08, schedule_decay=0.004, clipvalue=3)
model.compile(loss={'act_output':'categorical_crossentropy', 'time_output':'mean_absolute_error'}, optimizer=opt)

#callbacks
early_stopping = EarlyStopping(monitor='val_loss', patience=83)
model_checkpoint = ModelCheckpoint(args.outputdir + 'model_{epoch:02d}-{val_loss:.2f}.h5', 
                                   monitor='val_loss', verbose=0, save_best_only=True, 
                                   save_weights_only=False, mode='auto')
lr_reducer = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=10, 
                               verbose=0, mode='auto', epsilon=0.0001, cooldown=0, min_lr=0)

#fit
model.fit(X, {'act_output':y_a, 'time_output':y_t}, validation_split=0.2, verbose=2, 
          callbacks=[early_stopping, model_checkpoint, lr_reducer], batch_size=16, epochs=500)

Build model BLSTM...
Train on 7344 samples, validate on 1837 samples
Epoch 1/500
21s - loss: 2.0070 - act_output_loss: 0.7981 - time_output_loss: 1.2089 - val_loss: 1.6749 - val_act_output_loss: 0.6253 - val_time_output_loss: 1.0496
Epoch 2/500
22s - loss: 1.6433 - act_output_loss: 0.6108 - time_output_loss: 1.0325 - val_loss: 1.5531 - val_act_output_loss: 0.5734 - val_time_output_loss: 0.9797
Epoch 3/500
22s - loss: 1.6073 - act_output_loss: 0.5981 - time_output_loss: 1.0092 - val_loss: 1.5958 - val_act_output_loss: 0.6015 - val_time_output_loss: 0.9942
Epoch 4/500
21s - loss: 1.5915 - act_output_loss: 0.5891 - time_output_loss: 1.0024 - val_loss: 1.5700 - val_act_output_loss: 0.5977 - val_time_output_loss: 0.9722
Epoch 5/500
22s - loss: 1.5888 - act_output_loss: 0.5904 - time_output_loss: 0.9983 - val_loss: 1.5370 - val_act_output_loss: 0.5666 - val_time_output_loss: 0.9704
Epoch 6/500
22s - loss: 1.5887 - act_output_loss: 0.5944 - time_output_loss: 0.9944 - val_loss: 1.6063 - val_ac

<keras.callbacks.History at 0x7ff11cadee10>

In [8]:
model.summary()

____________________________________________________________________________________________________
Layer (type)                     Output Shape          Param #     Connected to                     
main_input (InputLayer)          (None, 15, 14)        0                                            
____________________________________________________________________________________________________
bidirectional_1 (Bidirectional)  (None, 15, 100)       26000       main_input[0][0]                 
____________________________________________________________________________________________________
batch_normalization_1 (BatchNorm (None, 15, 100)       400         bidirectional_1[0][0]            
____________________________________________________________________________________________________
bidirectional_2 (Bidirectional)  (None, 100)           60400       batch_normalization_1[0][0]      
___________________________________________________________________________________________

# Model

In [None]:
def reshapeOutput(y_a):
    pad_output = np.pad(y_a, [(0, 0), (0, num_features-len(targetchartoindice))], mode ='constant', constant_values=0)
    out = b.reshape((y_a.shape[0], 1, num_features))
    return out

In [None]:
a = reshapeOutput(y_a)

In [None]:
a.shape

In [None]:
Concatenate([X, a])

In [None]:
from keras.layers.core import Reshape
from keras.layers.convolutional import ZeroPadding1D
import keras

In [None]:
from keras.layers import Input, Embedding, LSTM, Dense
from keras.models import Model

# Headline input: meant to receive sequences of 100 integers, between 1 and 10000.
# Note that we can name any layer by passing it a "name" argument.
main_input = Input(shape=(100,), dtype='int32', name='main_input')

# This embedding layer will encode the input sequence
# into a sequence of dense 512-dimensional vectors.
x = Embedding(output_dim=512, input_dim=10000, input_length=100)(main_input)

# A LSTM will transform the vector sequence into a single vector,
# containing information about the entire sequence
lstm_out = LSTM(32)(x)

auxiliary_output = Dense(1, activation='sigmoid', name='aux_output')(lstm_out)
auxiliary_input = Input(shape=(5,), name='aux_input')
x = keras.layers.concatenate([lstm_out, auxiliary_input])

# We stack a deep densely-connected network on top
x = Dense(64, activation='relu')(x)
x = Dense(64, activation='relu')(x)
x = Dense(64, activation='relu')(x)

# And finally we add the main logistic regression layer
main_output = Dense(1, activation='sigmoid', name='main_output')(x)
model = Model(inputs=[main_input, auxiliary_input], outputs=[main_output, auxiliary_output])

In [None]:
model.summary()