In [1]:
import pandas as pd
import numpy as np
import pickle
from utils import * 

In [2]:
from keras.models import Sequential, Model
from keras.layers.core import Dense
from keras.layers.recurrent import LSTM, GRU, SimpleRNN
from keras.layers import Input
from keras.layers.wrappers import Bidirectional
from keras.utils.data_utils import get_file
from keras.regularizers import WeightRegularizer
from keras.optimizers import Nadam
from keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
from keras.layers.normalization import BatchNormalization
from theano.ifelse import ifelse

Using Theano backend.


In [3]:
with open('../input/parameters.pkl', "rb") as f:
    maxlen = pickle.load(f)
    num_features = pickle.load(f)
    chartoindice = pickle.load(f)
    targetchartoindice = pickle.load(f)
    divisor = pickle.load(f)
    divisor2 = pickle.load(f)

In [4]:
with open('../input/preprocessed_data.pkl', "rb") as f:
    X = pickle.load(f)
    y_a = pickle.load(f)
    y_t = pickle.load(f)
    X_test = pickle.load(f)
    y_a_test = pickle.load(f)
    y_t_test = pickle.load(f)

In [5]:
# build the model: 
print('Build model...')
main_input = Input(shape=(maxlen, num_features), name='main_input')
# train a 2-layer LSTM with one shared layer
l = LSTM(100, consume_less='gpu', init='glorot_uniform', return_sequences=True, dropout_W=0.2)(main_input) # the shared layer
b = BatchNormalization()(l)
l1 = LSTM(100, consume_less='gpu', init='glorot_uniform', return_sequences=True, dropout_W=0.2)(b) # the shared layer
b1 = BatchNormalization()(l1)

l2_1 = LSTM(100, consume_less='gpu', init='glorot_uniform', return_sequences=False, dropout_W=0.2)(b1) # the layer specialized in activity prediction
b2_1 = BatchNormalization()(l2_1)

l2_2 = LSTM(100, consume_less='gpu', init='glorot_uniform', return_sequences=False, dropout_W=0.2)(b1) # the layer specialized in time prediction
b2_2 = BatchNormalization()(l2_2)
act_output = Dense(len(targetchartoindice), activation='softmax', init='glorot_uniform', name='act_output')(b2_1)
time_output = Dense(1, init='glorot_uniform', name='time_output')(b2_2)

model = Model(input=[main_input], output=[act_output, time_output])

opt = Nadam(lr=0.002, beta_1=0.9, beta_2=0.999, epsilon=1e-08, schedule_decay=0.004, clipvalue=3)

model.compile(loss={'act_output':'categorical_crossentropy', 'time_output':'mae'}, optimizer=opt)
early_stopping = EarlyStopping(monitor='val_loss', patience=42)
model_checkpoint = ModelCheckpoint('output_files/models/model_{epoch:02d}-{val_loss:.2f}.h5', monitor='val_loss', verbose=0, save_best_only=True, save_weights_only=False, mode='auto')
lr_reducer = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=10, verbose=0, mode='auto', epsilon=0.0001, cooldown=0, min_lr=0)

model.fit(X, {'act_output':y_a, 'time_output':y_t}, validation_split=0.2, verbose=2, callbacks=[early_stopping, model_checkpoint, lr_reducer], batch_size=maxlen, nb_epoch=500)

Build model...
Train on 7344 samples, validate on 1837 samples
Epoch 1/500
24s - loss: 2.2196 - act_output_loss: 1.0416 - time_output_loss: 1.1780 - val_loss: 1.8102 - val_act_output_loss: 0.7149 - val_time_output_loss: 1.0953
Epoch 2/500
24s - loss: 1.7545 - act_output_loss: 0.7206 - time_output_loss: 1.0340 - val_loss: 1.5865 - val_act_output_loss: 0.5773 - val_time_output_loss: 1.0092
Epoch 3/500
25s - loss: 1.7045 - act_output_loss: 0.6914 - time_output_loss: 1.0131 - val_loss: 1.6175 - val_act_output_loss: 0.6147 - val_time_output_loss: 1.0028
Epoch 4/500
24s - loss: 1.6851 - act_output_loss: 0.6751 - time_output_loss: 1.0100 - val_loss: 1.5607 - val_act_output_loss: 0.5878 - val_time_output_loss: 0.9729
Epoch 5/500
24s - loss: 1.6816 - act_output_loss: 0.6742 - time_output_loss: 1.0074 - val_loss: 1.5588 - val_act_output_loss: 0.5859 - val_time_output_loss: 0.9730
Epoch 6/500
25s - loss: 1.6808 - act_output_loss: 0.6752 - time_output_loss: 1.0056 - val_loss: 1.5515 - val_act_outp

28s - loss: 1.6137 - act_output_loss: 0.6282 - time_output_loss: 0.9855 - val_loss: 1.5157 - val_act_output_loss: 0.5654 - val_time_output_loss: 0.9503
Epoch 52/500
28s - loss: 1.6135 - act_output_loss: 0.6260 - time_output_loss: 0.9874 - val_loss: 1.5141 - val_act_output_loss: 0.5613 - val_time_output_loss: 0.9528
Epoch 53/500
34s - loss: 1.6149 - act_output_loss: 0.6270 - time_output_loss: 0.9879 - val_loss: 1.5134 - val_act_output_loss: 0.5661 - val_time_output_loss: 0.9473
Epoch 54/500
32s - loss: 1.6027 - act_output_loss: 0.6148 - time_output_loss: 0.9880 - val_loss: 1.5279 - val_act_output_loss: 0.5714 - val_time_output_loss: 0.9565
Epoch 55/500
32s - loss: 1.6059 - act_output_loss: 0.6177 - time_output_loss: 0.9882 - val_loss: 1.5249 - val_act_output_loss: 0.5685 - val_time_output_loss: 0.9564
Epoch 56/500
28s - loss: 1.6153 - act_output_loss: 0.6267 - time_output_loss: 0.9886 - val_loss: 1.5149 - val_act_output_loss: 0.5658 - val_time_output_loss: 0.9491
Epoch 57/500
25s - loss

25s - loss: 1.5883 - act_output_loss: 0.6028 - time_output_loss: 0.9855 - val_loss: 1.5125 - val_act_output_loss: 0.5620 - val_time_output_loss: 0.9504
Epoch 102/500
26s - loss: 1.5819 - act_output_loss: 0.6031 - time_output_loss: 0.9788 - val_loss: 1.5113 - val_act_output_loss: 0.5617 - val_time_output_loss: 0.9496
Epoch 103/500
25s - loss: 1.5876 - act_output_loss: 0.6080 - time_output_loss: 0.9796 - val_loss: 1.5127 - val_act_output_loss: 0.5624 - val_time_output_loss: 0.9503
Epoch 104/500
25s - loss: 1.5902 - act_output_loss: 0.6058 - time_output_loss: 0.9843 - val_loss: 1.5110 - val_act_output_loss: 0.5618 - val_time_output_loss: 0.9492
Epoch 105/500
25s - loss: 1.5899 - act_output_loss: 0.6052 - time_output_loss: 0.9847 - val_loss: 1.5120 - val_act_output_loss: 0.5617 - val_time_output_loss: 0.9503
Epoch 106/500
25s - loss: 1.5889 - act_output_loss: 0.6076 - time_output_loss: 0.9813 - val_loss: 1.5153 - val_act_output_loss: 0.5622 - val_time_output_loss: 0.9531
Epoch 107/500
25s 

<keras.callbacks.History at 0x7fef807548d0>

In [6]:
model.summary()

____________________________________________________________________________________________________
Layer (type)                     Output Shape          Param #     Connected to                     
main_input (InputLayer)          (None, 15, 14)        0                                            
____________________________________________________________________________________________________
lstm_1 (LSTM)                    (None, 15, 100)       46000       main_input[0][0]                 
____________________________________________________________________________________________________
batchnormalization_1 (BatchNorma (None, 15, 100)       400         lstm_1[0][0]                     
____________________________________________________________________________________________________
lstm_2 (LSTM)                    (None, 15, 100)       80400       batchnormalization_1[0][0]       
___________________________________________________________________________________________