This is the original model, run in Python 2

# Load data

In [1]:
import os
import argparse
import pandas as pd
import numpy as np
import pickle

In [2]:
from keras.models import Sequential, Model
from keras.layers.core import Dense
from keras.layers.recurrent import LSTM, GRU, SimpleRNN
from keras.layers import Input
from keras.layers.wrappers import Bidirectional
from keras.utils.data_utils import get_file
from keras.regularizers import WeightRegularizer
from keras.optimizers import Nadam
from keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
from keras.layers.normalization import BatchNormalization
from theano.ifelse import ifelse

Using Theano backend.


In [3]:
name = 'helpdesk'
args = {
    'inputdir': '../input/{}/'.format(name),   
    'outputdir': './output_files/{}/'.format(name)
}

args = argparse.Namespace(**args)

In [4]:
if not os.path.isdir(args.outputdir):
    os.makedirs(args.outputdir)

In [5]:
with open(args.inputdir + 'parameters.pkl', "rb") as f:
    maxlen = pickle.load(f)
    num_features = pickle.load(f)
    chartoindice = pickle.load(f)
    targetchartoindice = pickle.load(f)
    divisor = pickle.load(f)
    divisor2 = pickle.load(f)

In [6]:
with open(args.inputdir + 'preprocessed_data.pkl', "rb") as f:
    X = pickle.load(f)
    y_a = pickle.load(f)
    y_t = pickle.load(f)
    X_test = pickle.load(f)
    y_a_test = pickle.load(f)
    y_t_test = pickle.load(f)

# Model

In [7]:
# build the model: 
print('Build model...')
main_input = Input(shape=(maxlen, num_features), name='main_input')

# train a 2-layer LSTM with one shared layer
l1 = LSTM(100, consume_less='gpu', init='glorot_uniform', return_sequences=True, dropout_W=0.2)(main_input) # the shared layer
b1 = BatchNormalization()(l1)

l2_1 = LSTM(100, consume_less='gpu', init='glorot_uniform', return_sequences=False, dropout_W=0.2)(b1) # the layer specialized in activity prediction
b2_1 = BatchNormalization()(l2_1)
l2_2 = LSTM(100, consume_less='gpu', init='glorot_uniform', return_sequences=False, dropout_W=0.2)(b1) # the layer specialized in time prediction
b2_2 = BatchNormalization()(l2_2)

act_output = Dense(len(targetchartoindice), activation='softmax', init='glorot_uniform', name='act_output')(b2_1)
time_output = Dense(1, init='glorot_uniform', name='time_output')(b2_2)

model = Model(input=[main_input], output=[act_output, time_output])

opt = Nadam(lr=0.002, beta_1=0.9, beta_2=0.999, epsilon=1e-08, schedule_decay=0.004, clipvalue=3)

model.compile(loss={'act_output':'categorical_crossentropy', 'time_output':'mae'}, optimizer=opt)
early_stopping = EarlyStopping(monitor='val_loss', patience=42)
model_checkpoint = ModelCheckpoint(args.outputdir + 'model_{epoch:02d}-{val_loss:.2f}.h5', 
                                   monitor='val_loss', verbose=0, save_best_only=True, 
                                   save_weights_only=False, mode='auto')
lr_reducer = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=10, verbose=0, 
                               mode='auto', epsilon=0.0001, cooldown=0, min_lr=0)

model.fit(X, {'act_output':y_a, 'time_output':y_t}, validation_split=0.2, verbose=2, 
          callbacks=[early_stopping, model_checkpoint, lr_reducer], batch_size=maxlen, nb_epoch=500)

Build model...
Train on 7344 samples, validate on 1837 samples
Epoch 1/500
17s - loss: 2.1357 - act_output_loss: 0.9691 - time_output_loss: 1.1666 - val_loss: 1.7252 - val_act_output_loss: 0.6687 - val_time_output_loss: 1.0565
Epoch 2/500
18s - loss: 1.7551 - act_output_loss: 0.7126 - time_output_loss: 1.0425 - val_loss: 1.5890 - val_act_output_loss: 0.6077 - val_time_output_loss: 0.9813
Epoch 3/500
18s - loss: 1.7108 - act_output_loss: 0.6927 - time_output_loss: 1.0181 - val_loss: 1.6556 - val_act_output_loss: 0.6539 - val_time_output_loss: 1.0017
Epoch 4/500
17s - loss: 1.6910 - act_output_loss: 0.6768 - time_output_loss: 1.0142 - val_loss: 1.5662 - val_act_output_loss: 0.5818 - val_time_output_loss: 0.9845
Epoch 5/500
17s - loss: 1.6850 - act_output_loss: 0.6777 - time_output_loss: 1.0073 - val_loss: 1.5739 - val_act_output_loss: 0.5804 - val_time_output_loss: 0.9935
Epoch 6/500
17s - loss: 1.6667 - act_output_loss: 0.6627 - time_output_loss: 1.0040 - val_loss: 1.5550 - val_act_outp

17s - loss: 1.5935 - act_output_loss: 0.6070 - time_output_loss: 0.9865 - val_loss: 1.5225 - val_act_output_loss: 0.5657 - val_time_output_loss: 0.9568
Epoch 52/500
17s - loss: 1.5912 - act_output_loss: 0.6052 - time_output_loss: 0.9860 - val_loss: 1.5263 - val_act_output_loss: 0.5673 - val_time_output_loss: 0.9590
Epoch 53/500
17s - loss: 1.5929 - act_output_loss: 0.6085 - time_output_loss: 0.9844 - val_loss: 1.5223 - val_act_output_loss: 0.5660 - val_time_output_loss: 0.9562
Epoch 54/500
17s - loss: 1.5886 - act_output_loss: 0.6077 - time_output_loss: 0.9809 - val_loss: 1.5192 - val_act_output_loss: 0.5657 - val_time_output_loss: 0.9535
Epoch 55/500
17s - loss: 1.5928 - act_output_loss: 0.6064 - time_output_loss: 0.9863 - val_loss: 1.5185 - val_act_output_loss: 0.5644 - val_time_output_loss: 0.9541
Epoch 56/500
20s - loss: 1.5965 - act_output_loss: 0.6095 - time_output_loss: 0.9871 - val_loss: 1.5271 - val_act_output_loss: 0.5738 - val_time_output_loss: 0.9533
Epoch 57/500
18s - loss

17s - loss: 1.5770 - act_output_loss: 0.5974 - time_output_loss: 0.9796 - val_loss: 1.5172 - val_act_output_loss: 0.5672 - val_time_output_loss: 0.9500
Epoch 102/500
18s - loss: 1.5825 - act_output_loss: 0.5968 - time_output_loss: 0.9857 - val_loss: 1.5178 - val_act_output_loss: 0.5678 - val_time_output_loss: 0.9500
Epoch 103/500
17s - loss: 1.5809 - act_output_loss: 0.5963 - time_output_loss: 0.9846 - val_loss: 1.5167 - val_act_output_loss: 0.5662 - val_time_output_loss: 0.9504
Epoch 104/500
17s - loss: 1.5816 - act_output_loss: 0.6010 - time_output_loss: 0.9806 - val_loss: 1.5172 - val_act_output_loss: 0.5667 - val_time_output_loss: 0.9505
Epoch 105/500
17s - loss: 1.5731 - act_output_loss: 0.5921 - time_output_loss: 0.9810 - val_loss: 1.5161 - val_act_output_loss: 0.5657 - val_time_output_loss: 0.9504
Epoch 106/500
17s - loss: 1.5765 - act_output_loss: 0.5969 - time_output_loss: 0.9797 - val_loss: 1.5160 - val_act_output_loss: 0.5666 - val_time_output_loss: 0.9494
Epoch 107/500
18s 

<keras.callbacks.History at 0x7f75fcee1950>

In [8]:
model.summary()

____________________________________________________________________________________________________
Layer (type)                     Output Shape          Param #     Connected to                     
main_input (InputLayer)          (None, 15, 14)        0                                            
____________________________________________________________________________________________________
lstm_1 (LSTM)                    (None, 15, 100)       46000       main_input[0][0]                 
____________________________________________________________________________________________________
batchnormalization_1 (BatchNorma (None, 15, 100)       400         lstm_1[0][0]                     
____________________________________________________________________________________________________
lstm_2 (LSTM)                    (None, 100)           80400       batchnormalization_1[0][0]       
___________________________________________________________________________________________