**Modification:**
- Use Bidirectional LSTM 

```l1 = Bidirectional(LSTM(100, consume_less='gpu', init='glorot_uniform', return_sequences=True, dropout_W=0.2))(main_input)```

- Change from target_chars to targetchartoindice (same len but target_chars not exist) 

```act_output = Dense(len(targetchartoindice), activation='softmax', init='glorot_uniform', name='act_output')(b2_1)```

# Load data

In [1]:
import os
import argparse
import pandas as pd
import numpy as np
import pickle
import matplotlib.pyplot as plt
import seaborn as sns

%matplotlib inline

In [2]:
from keras.models import Sequential, Model
from keras.layers.core import Dense
from keras.layers.recurrent import LSTM, GRU, SimpleRNN
from keras.layers import Input, merge
from keras.layers.wrappers import Bidirectional
from keras import optimizers
from keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau, History
from keras.layers.normalization import BatchNormalization

Using Theano backend.


In [3]:
#name = 'bpi_12_w'
name = 'helpdesk'
args = {
    'inputdir': '../input/{}/'.format(name),   
    'outputdir': './output_files/{}/'.format(name)
}

args = argparse.Namespace(**args)

In [4]:
if not os.path.isdir(args.outputdir):
    os.makedirs(args.outputdir)

In [5]:
with open(args.inputdir + 'parameters.pkl', "rb") as f:
    maxlen = pickle.load(f)
    num_features = pickle.load(f)
    chartoindice = pickle.load(f)
    targetchartoindice = pickle.load(f)
    divisor = pickle.load(f)
    divisor2 = pickle.load(f)

In [6]:
with open(args.inputdir + 'preprocessed_data.pkl', "rb") as f:
    X = pickle.load(f)
    y_a = pickle.load(f)
    y_t = pickle.load(f)
    X_test = pickle.load(f)
    y_a_test = pickle.load(f)
    y_t_test = pickle.load(f)

# Model

In [7]:
print('Build model BLSTM...')

main_input = Input(shape=(maxlen, num_features), name='main_input')

# shared layer
l1 = Bidirectional(LSTM(50, return_sequences=True, kernel_initializer="glorot_uniform", implementation=2))(main_input) # the shared layer
b1 = BatchNormalization()(l1)

# layers
l2_1 = Bidirectional(LSTM(50, return_sequences=False, kernel_initializer="glorot_uniform", implementation=2, dropout=0.2))(b1) # the layer specialized in activity prediction
b2_1 = BatchNormalization()(l2_1)

l2_2 = Bidirectional(LSTM(50, return_sequences=False, kernel_initializer="glorot_uniform", implementation=2))(b1) # the layer specialized in time prediction
b2_2 = BatchNormalization()(l2_2)

act_output = Dense(len(targetchartoindice), kernel_initializer='glorot_uniform', activation='softmax', name='act_output')(b2_1)
time_output = Dense(1, kernel_initializer='glorot_uniform', name='time_output')(b2_2)

model = Model(inputs=[main_input], outputs=[act_output, time_output])

#compilations
opt = optimizers.Nadam(lr=0.002, beta_1=0.9, beta_2=0.999, epsilon=1e-08, schedule_decay=0.004, clipvalue=3)
model.compile(loss={'act_output':'categorical_crossentropy', 'time_output':'mean_absolute_error'}, 
              optimizer=opt, metrics=['accuracy'])

#callbacks
early_stopping = EarlyStopping(monitor='val_loss', patience=500)
model_checkpoint = ModelCheckpoint(args.outputdir + 'model_{epoch:02d}-{val_loss:.2f}.h5', 
                                   monitor='val_loss', verbose=0, save_best_only=True, 
                                   save_weights_only=False, mode='auto')
lr_reducer = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=10, 
                               verbose=0, mode='auto', epsilon=0.0001, cooldown=0, min_lr=0)
history = History()

#fit
model.fit(X, {'act_output':y_a, 'time_output':y_t}, validation_split=0.2, verbose=2, 
          callbacks=[early_stopping, model_checkpoint, lr_reducer, history], batch_size=16, epochs=10000)

Build model BLSTM...
Train on 7344 samples, validate on 1837 samples
Epoch 1/10000
20s - loss: 1.9370 - act_output_loss: 0.8260 - time_output_loss: 1.1110 - act_output_acc: 0.7726 - time_output_acc: 0.2470 - val_loss: 1.7186 - val_act_output_loss: 0.6477 - val_time_output_loss: 1.0709 - val_act_output_acc: 0.8187 - val_time_output_acc: 0.2842
Epoch 2/10000
23s - loss: 1.6423 - act_output_loss: 0.6116 - time_output_loss: 1.0307 - act_output_acc: 0.8107 - time_output_acc: 0.2801 - val_loss: 1.5850 - val_act_output_loss: 0.5737 - val_time_output_loss: 1.0113 - val_act_output_acc: 0.8214 - val_time_output_acc: 0.2842
Epoch 3/10000
21s - loss: 1.6097 - act_output_loss: 0.5999 - time_output_loss: 1.0098 - act_output_acc: 0.8166 - time_output_acc: 0.2815 - val_loss: 1.5894 - val_act_output_loss: 0.5896 - val_time_output_loss: 0.9998 - val_act_output_acc: 0.8236 - val_time_output_acc: 0.2852
Epoch 4/10000
20s - loss: 1.6033 - act_output_loss: 0.5968 - time_output_loss: 1.0065 - act_output_acc:

KeyboardInterrupt: 

In [None]:
model.summary()

In [None]:
history.history.keys()

In [None]:
# summarize history for activity accuracy
plt.plot(history.history['act_output_acc'])
plt.plot(history.history['val_act_output_acc'])
plt.title('Activity accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'val'], loc='upper right')
plt.show()

In [None]:
# summarize history for activity loss
plt.plot(history.history['act_output_loss'])
plt.plot(history.history['val_act_output_loss'])
plt.title('Activity loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'val'], loc='upper right')
plt.show()

In [None]:
# summarize history for time loss
plt.plot(history.history['time_output_loss'])
plt.plot(history.history['val_time_output_loss'])
plt.title('Time loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'val'], loc='upper right')
plt.show()

In [None]:
# summarize history for model loss
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'val'], loc='upper right')
plt.show()