**Modification:**
- Use Bidirectional LSTM 

```l1 = Bidirectional(LSTM(100, consume_less='gpu', init='glorot_uniform', return_sequences=True, dropout_W=0.2))(main_input)```

- Change from target_chars to targetchartoindice (same len but target_chars not exist) 

```act_output = Dense(len(targetchartoindice), activation='softmax', init='glorot_uniform', name='act_output')(b2_1)```

# Load data

In [1]:
import os
import argparse
import pandas as pd
import numpy as np
import pickle

In [2]:
from keras.models import Sequential, Model
from keras.layers.core import Dense
from keras.layers.recurrent import LSTM, GRU, SimpleRNN
from keras.layers import Input
from keras.layers.wrappers import Bidirectional
from keras.utils.data_utils import get_file
from keras.regularizers import WeightRegularizer
from keras.optimizers import Nadam
from keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
from keras.layers.normalization import BatchNormalization
from theano.ifelse import ifelse

Using Theano backend.
ERROR (theano.sandbox.cuda): Failed to compile cuda_ndarray.cu: libcublas.so.8.0: cannot open shared object file: No such file or directory


In [3]:
name = 'helpdesk'
args = {
    'inputdir': '../input/{}/'.format(name),   
    'outputdir': './output_files/{}/'.format(name)
}

args = argparse.Namespace(**args)

In [4]:
if not os.path.isdir(args.outputdir):
    os.makedirs(args.outputdir)

In [5]:
with open(args.inputdir + 'parameters.pkl', "rb") as f:
    maxlen = pickle.load(f)
    num_features = pickle.load(f)
    chartoindice = pickle.load(f)
    targetchartoindice = pickle.load(f)
    divisor = pickle.load(f)
    divisor2 = pickle.load(f)

In [6]:
with open(args.inputdir + 'preprocessed_data.pkl', "rb") as f:
    X = pickle.load(f)
    y_a = pickle.load(f)
    y_t = pickle.load(f)
    X_test = pickle.load(f)
    y_a_test = pickle.load(f)
    y_t_test = pickle.load(f)

In [10]:
# build the model: 
print('Build model...')
main_input = Input(shape=(maxlen, num_features), name='main_input')
# shared layer
l1 = Bidirectional(LSTM(100, consume_less='gpu', init='glorot_uniform', return_sequences=True))(main_input) # the shared layer
b1 = BatchNormalization()(l1)

# layers
l2_1 = Bidirectional(LSTM(100, consume_less='gpu', init='glorot_uniform', return_sequences=False, dropout_W=0.2))(b1) # the layer specialized in activity prediction
b2_1 = BatchNormalization()(l2_1)
l2_2 = Bidirectional(LSTM(75, consume_less='gpu', init='glorot_uniform', return_sequences=False, dropout_W=0.5))(b1) # the layer specialized in time prediction
b2_2 = BatchNormalization()(l2_2)

act_output = Dense(len(targetchartoindice), activation='softmax', init='glorot_uniform', name='act_output')(b2_1)
time_output = Dense(1, init='glorot_uniform', name='time_output')(b2_2)

model = Model(input=[main_input], output=[act_output, time_output])

opt = Nadam(lr=0.002, beta_1=0.9, beta_2=0.999, epsilon=1e-08, schedule_decay=0.004, clipvalue=3)

model.compile(loss={'act_output':'categorical_crossentropy', 'time_output':'mae'}, optimizer=opt)
early_stopping = EarlyStopping(monitor='val_loss', patience=83)
model_checkpoint = ModelCheckpoint(args.outputdir + 'model_{epoch:02d}-{val_loss:.2f}.h5', monitor='val_loss', verbose=0, save_best_only=True, save_weights_only=False, mode='auto')
lr_reducer = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=10, verbose=0, mode='auto', epsilon=0.0001, cooldown=0, min_lr=0)

model.fit(X, {'act_output':y_a, 'time_output':y_t}, validation_split=0.2, verbose=2, callbacks=[early_stopping, model_checkpoint, lr_reducer], batch_size=maxlen, nb_epoch=500)

Build model...
Train on 7344 samples, validate on 1837 samples
Epoch 1/500
28s - loss: 2.0344 - act_output_loss: 0.8110 - time_output_loss: 1.2234 - val_loss: 1.6764 - val_act_output_loss: 0.6359 - val_time_output_loss: 1.0405
Epoch 2/500
32s - loss: 1.6682 - act_output_loss: 0.6292 - time_output_loss: 1.0390 - val_loss: 1.6836 - val_act_output_loss: 0.6735 - val_time_output_loss: 1.0100
Epoch 3/500
32s - loss: 1.6325 - act_output_loss: 0.6145 - time_output_loss: 1.0180 - val_loss: 1.5922 - val_act_output_loss: 0.5998 - val_time_output_loss: 0.9923
Epoch 4/500
32s - loss: 1.6175 - act_output_loss: 0.6051 - time_output_loss: 1.0124 - val_loss: 1.6028 - val_act_output_loss: 0.6078 - val_time_output_loss: 0.9950
Epoch 5/500
32s - loss: 1.6186 - act_output_loss: 0.6111 - time_output_loss: 1.0074 - val_loss: 1.5900 - val_act_output_loss: 0.6158 - val_time_output_loss: 0.9742
Epoch 6/500
32s - loss: 1.6033 - act_output_loss: 0.5975 - time_output_loss: 1.0058 - val_loss: 1.5905 - val_act_outp

21s - loss: 1.5081 - act_output_loss: 0.5392 - time_output_loss: 0.9688 - val_loss: 1.5134 - val_act_output_loss: 0.5700 - val_time_output_loss: 0.9435
Epoch 52/500
21s - loss: 1.5083 - act_output_loss: 0.5382 - time_output_loss: 0.9701 - val_loss: 1.5097 - val_act_output_loss: 0.5690 - val_time_output_loss: 0.9407
Epoch 53/500
21s - loss: 1.4973 - act_output_loss: 0.5325 - time_output_loss: 0.9647 - val_loss: 1.5057 - val_act_output_loss: 0.5656 - val_time_output_loss: 0.9401
Epoch 54/500
21s - loss: 1.5052 - act_output_loss: 0.5353 - time_output_loss: 0.9699 - val_loss: 1.5048 - val_act_output_loss: 0.5632 - val_time_output_loss: 0.9416
Epoch 55/500
21s - loss: 1.4956 - act_output_loss: 0.5283 - time_output_loss: 0.9673 - val_loss: 1.5076 - val_act_output_loss: 0.5675 - val_time_output_loss: 0.9400
Epoch 56/500
20s - loss: 1.5050 - act_output_loss: 0.5319 - time_output_loss: 0.9730 - val_loss: 1.5103 - val_act_output_loss: 0.5679 - val_time_output_loss: 0.9424
Epoch 57/500
21s - loss

20s - loss: 1.4809 - act_output_loss: 0.5128 - time_output_loss: 0.9681 - val_loss: 1.5102 - val_act_output_loss: 0.5718 - val_time_output_loss: 0.9384
Epoch 102/500
20s - loss: 1.4854 - act_output_loss: 0.5171 - time_output_loss: 0.9682 - val_loss: 1.5133 - val_act_output_loss: 0.5730 - val_time_output_loss: 0.9403
Epoch 103/500
20s - loss: 1.4749 - act_output_loss: 0.5075 - time_output_loss: 0.9674 - val_loss: 1.5118 - val_act_output_loss: 0.5725 - val_time_output_loss: 0.9393
Epoch 104/500
20s - loss: 1.4795 - act_output_loss: 0.5133 - time_output_loss: 0.9662 - val_loss: 1.5132 - val_act_output_loss: 0.5727 - val_time_output_loss: 0.9404
Epoch 105/500
20s - loss: 1.4818 - act_output_loss: 0.5132 - time_output_loss: 0.9686 - val_loss: 1.5140 - val_act_output_loss: 0.5735 - val_time_output_loss: 0.9405


<keras.callbacks.History at 0x7ff1c44f3c50>

In [11]:
model.summary()

____________________________________________________________________________________________________
Layer (type)                     Output Shape          Param #     Connected to                     
main_input (InputLayer)          (None, 15, 14)        0                                            
____________________________________________________________________________________________________
bidirectional_10 (Bidirectional) (None, 15, 200)       92000       main_input[0][0]                 
____________________________________________________________________________________________________
batchnormalization_10 (BatchNorm (None, 15, 200)       800         bidirectional_10[0][0]           
____________________________________________________________________________________________________
bidirectional_11 (Bidirectional) (None, 200)           240800      batchnormalization_10[0][0]      
___________________________________________________________________________________________