**Modification:**
- Apply attention for time

In [1]:
import os
import argparse
import pandas as pd
import numpy as np
import pickle

In [2]:
from keras.models import Sequential, Model
from keras.layers.core import Dense
from keras.layers.recurrent import LSTM
from keras.layers import Input, Reshape, Flatten, merge
#from keras.utils.data_utils import get_file
from keras import optimizers
from keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
from keras.layers.normalization import BatchNormalization
#from theano.ifelse import ifelse

Using Theano backend.


In [3]:
name = 'helpdesk'
sub_name = 'at' #activity-time
args = {
    'inputdir': '../input/{}/'.format(name),   
    'outputdir': './output_files/{0}_{1}/'.format(name, sub_name)
}

args = argparse.Namespace(**args)

In [4]:
if not os.path.isdir(args.outputdir):
    os.makedirs(args.outputdir)

In [5]:
with open(args.inputdir + 'parameters.pkl', "rb") as f:
    maxlen = pickle.load(f)
    num_features = pickle.load(f)
    chartoindice = pickle.load(f)
    targetchartoindice = pickle.load(f)
    divisor = pickle.load(f)
    divisor2 = pickle.load(f)

In [6]:
with open(args.inputdir + 'preprocessed_data.pkl', "rb") as f:
    X = pickle.load(f)
    y_a = pickle.load(f)
    y_t = pickle.load(f)
    X_test = pickle.load(f)
    y_a_test = pickle.load(f)
    y_t_test = pickle.load(f)

In [7]:
from keras import backend as K
from keras.engine.topology import Layer, InputSpec
from keras import initializers, regularizers, constraints

In [8]:
class AttLayer(Layer):
    def __init__(self, **kwargs):
        self.init = initializers.get('normal')
        #self.input_spec = [InputSpec(ndim=3)]
        super(AttLayer, self).__init__(** kwargs)

    def build(self, input_shape):
        assert len(input_shape)==3
        #self.W = self.init((input_shape[-1],1))
        self.W = self.init((input_shape[-1],))
        #self.input_spec = [InputSpec(shape=input_shape)]
        self.trainable_weights = [self.W]
        super(AttLayer, self).build(input_shape)  # be sure you call this somewhere!

    def call(self, x, mask=None):
        eij = K.tanh(K.dot(x, self.W))

        ai = K.exp(eij)
        weights = ai/K.sum(ai, axis=1).dimshuffle(0,'x')

        weighted_input = x*weights.dimshuffle(0,1,'x')
        return weighted_input.sum(axis=1)

    def compute_output_shape(self, input_shape):
        return (input_shape[0], input_shape[-1])

In [9]:
# build the model: 
print('Build model attention for activity and output...')
main_input = Input(shape=(maxlen, num_features), name='main_input')

# one shared layer
l1 = LSTM(100, implementation=2, kernel_initializer="glorot_uniform", dropout=0.2, return_sequences=True)(main_input)
b1 = BatchNormalization()(l1)

# the layer specialized in activity prediction
l2_1 = LSTM(100, implementation=2, kernel_initializer="glorot_uniform", dropout=0.2, return_sequences=True)(b1) 
b2_1 = BatchNormalization()(l2_1)
att_1 = AttLayer()(b2_1)
# the layer specialized in time prediction
l2_2 = LSTM(100, implementation=2, kernel_initializer="glorot_uniform", dropout=0.2, return_sequences=True)(l1) 
b2_2 = BatchNormalization()(l2_2)
# attention
att_2 = AttLayer()(b2_2)

# last activation

act_output = Dense(len(targetchartoindice), activation="softmax", kernel_initializer="glorot_uniform", name="act_output")(att_1)
time_output = Dense(1, kernel_initializer="glorot_uniform", name="time_output")(att_2)



model = Model(inputs=[main_input], outputs=[act_output, time_output])

#compilations
opt = optimizers.Nadam(lr=0.002, beta_1=0.9, beta_2=0.999, epsilon=1e-08, schedule_decay=0.004, clipvalue=3)
model.compile(loss={'act_output':'categorical_crossentropy', 'time_output':'mean_absolute_error'}, optimizer=opt)

#callbacks
early_stopping = EarlyStopping(monitor='val_loss', patience=83)
model_checkpoint = ModelCheckpoint(args.outputdir + 'model_{epoch:02d}-{val_loss:.2f}.h5', 
                                   monitor='val_loss', verbose=0, save_best_only=True, 
                                   save_weights_only=False, mode='auto')
lr_reducer = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=10, 
                               verbose=0, mode='auto', epsilon=0.0001, cooldown=0, min_lr=0)

#fit
model.fit(X, {'act_output':y_a, 'time_output':y_t}, validation_split=0.2, verbose=2, 
          callbacks=[early_stopping, model_checkpoint, lr_reducer], batch_size=16, epochs=500)

Build model attention for activity and output...


AttributeError: 'TensorVariable' object has no attribute 'get_value'

In [None]:
model.summary()