In [1]:
import keras
import pickle
import random
import numpy as np
import pandas as pd
import re
import multiprocessing
import os

from tqdm import *
from keras.models import Sequential, Model
from keras.layers import Dense, Embedding, Input, Reshape, concatenate, Flatten, Activation, LSTM, Dropout, Lambda
from keras.utils import np_utils

# import multi_gpu_utils2 as multi_gpu_utils

Using TensorFlow backend.


In [2]:
##### CONFIGURATION SETUP ####

data_path = "../logs/bpic2011.xes"
traces_finalpath = data_path.replace(".xes", "_traces_encoded.pickled")
traces_dictionarypath = data_path.replace(".xes", "_dictionaries.pickled")
n_sp2_features = 624
n_pfs_features = 25

traces = pickle.load(open(traces_finalpath, "rb"))
feature_dict = pickle.load(open(traces_dictionarypath, "rb"))
os.environ['CUDA_VISIBLE_DEVICES'] = '0'

### CONFIGURATION SETUP END ###

In [3]:
# shuffle complete traces and create test and training set
random.shuffle(traces)
sep_idx = 900 #int(0.8*len(traces))

# extract the feature indices
# data is organized like this: ordinal features | categorical features | SP2 features | PFS features | TARGET features
# needed as every of these features will get its own layer
feature_names  = traces[0].columns
trace_columns = list(map(lambda e: bool(re.match('^TARGET$', e)), feature_names))
target_col_start_index = trace_columns.index(True)

categorical_feature_names = feature_dict.keys()
pfs_col_start_index = target_col_start_index - n_pfs_features
sp2_col_start_index = pfs_col_start_index - n_sp2_features
cat_col_start_index = sp2_col_start_index - len(categorical_feature_names)

ordinal_feature_names     = feature_names[0:cat_col_start_index]

In [None]:
def generate_input_name(var_name):
    return "input_{0}".format(''.join(c for c in var_name if c.isalnum()))

In [59]:
def stagger_sequence(s):
    for idx in range(1,len(s)):
        x = s["concept:name"][0:idx]
        y = s["TARGET"][idx-1]
        yield x,y

## Model creation

In [85]:
batch_size = None # None translates to unknown batch size
# [samples, time steps, features]
il = Input(batch_shape=(batch_size,None,1))
# main_output = il
main_output = Embedding(624, 500)(il)
main_output = Reshape(target_shape=(-1,500))(main_output) # reshape layer does not need to know BATCH SIZE!!!

# sizes should be multiple of 32 since it trains faster due to np.float32
main_output = LSTM(500,
                   batch_input_shape=(batch_size,None,1),
                   stateful=False,
                   return_sequences=True,
                   unroll=False,
                   kernel_initializer=keras.initializers.Zeros())(main_output)
main_output = LSTM(500,
                   stateful=False,
                   return_sequences=True,
                   unroll=False,
                   kernel_initializer=keras.initializers.Zeros())(main_output)

main_output = Dense(len(feature_dict["concept:name"]["to_int"]), activation='softmax', name='dense_final')(main_output)

full_model = Model(inputs=[il], outputs=[main_output])
optimizerator = keras.optimizers.adam()

full_model.compile(loss='categorical_crossentropy', optimizer=optimizerator, metrics=['accuracy'])

for l in full_model.layers:
    print(l.name, "--> input_shape={}".format(l.input_shape), "output_shape={}".format(l.output_shape))

input_33 --> input_shape=(None, None, 1) output_shape=(None, None, 1)
embedding_20 --> input_shape=(None, None, 1) output_shape=(None, None, 1, 500)
reshape_19 --> input_shape=(None, None, 1, 500) output_shape=(None, None, 500)
lstm_25 --> input_shape=(None, None, 500) output_shape=(None, None, 500)
lstm_26 --> input_shape=(None, None, 500) output_shape=(None, None, 500)
dense_final --> input_shape=(None, None, 500) output_shape=(None, None, 625)


In [61]:
n_epochs = 1
for epoch in range(1,n_epochs+1):
    mean_tr_acc  = []
    mean_tr_loss = []

#     for t in tqdm(traces[:200], desc="Epoch {0}/{1}".format(epoch,n_epochs)):
    for t in traces:
        batch_x = []
        batch_y = []
        
        for x,y in stagger_sequence(t):
            y = np_utils.to_categorical(y, num_classes=625)
            batch_x.append(x)
            batch_y.append(y)
            
        batch_x = np.array(batch_x).reshape((len(t)-1, -1, 1))
        batch_y = np.array(batch_y)

        tr_loss, tr_acc = full_model.train_on_batch(batch_x, batch_y)
        mean_tr_acc.append(tr_acc)
        mean_tr_loss.append(tr_loss)
        break

    mean_tr_acc = np.mean(mean_tr_acc)
    print('Epoch {0} -- loss = {1:.5f} -- acc = {2:.5f}'.format(epoch,np.mean(mean_tr_loss), np.mean(mean_tr_acc)))

    if best_acc < int(mean_tr_acc):
        best_acc = int(mean_tr_acc)
        full_model.save('evermann_baseline_e{0}_acc{1:.3f}.h5'.format(epoch,best_acc))

ValueError: setting an array element with a sequence.

In [8]:
full_model = keras.models.load_model("evermann_baseline_e91.h5")

In [10]:
full_model.predict(np.array([batch_x[0:1]]))

array([[[5.79371024e-03, 2.87678123e-01, 2.24107155e-09, 2.40011854e-04,
         5.39651432e-04, 6.72204187e-04, 3.25576542e-03, 5.66909574e-02,
         3.24004889e-03, 1.52331265e-02, 1.20667368e-02, 1.37805268e-02,
         4.36056741e-02, 1.00682508e-02, 1.23797087e-02, 1.29512139e-02,
         2.94028167e-02, 1.01537124e-04, 3.46898800e-03, 1.32629136e-03,
         1.46427308e-03, 1.64528738e-03, 1.19171664e-02, 4.15197797e-02,
         5.31600369e-03, 4.70651686e-03, 1.11822579e-02, 5.35978284e-03,
         9.06471396e-05, 6.94872928e-04, 4.43080382e-04, 5.16606633e-05,
         5.72092496e-02, 4.81083960e-04, 2.37390968e-10, 8.23457260e-04,
         2.33945996e-03, 6.40116070e-07, 1.16019719e-03, 2.60202400e-02,
         3.63005302e-03, 3.83877242e-03, 4.23235586e-04, 4.53943041e-15,
         4.11286135e-04, 1.53355161e-03, 2.32875743e-03, 3.16886377e-04,
         3.86347981e-10, 1.44132297e-03, 6.61979416e-10, 3.90624069e-02,
         1.32486282e-03, 6.28044145e-05, 9.93238336

In [11]:
for t in traces[sep_idx: sep_idx+1]:
    batch_x = t["concept:name"].values.reshape((-1,1))
    batch_y = t["TARGET"].values
    
    print("Successive testing")
    for i in range(1,len(batch_x)):
        yhat = full_model.predict(np.array([batch_x[0:i]]))
        yhat = [np.argmax(yh) for yh in yhat[0]][-1]
    
        print("{0:2.2f}% ==>".format(i / len(batch_x) * 100), batch_y[0:i][-1], yhat)

Successive testing
0.66% ==> 7 624
1.32% ==> 7 624
1.99% ==> 74 1
2.65% ==> 75 1
3.31% ==> 76 32
3.97% ==> 8 32
4.64% ==> 9 1
5.30% ==> 10 1
5.96% ==> 77 624
6.62% ==> 11 11
7.28% ==> 12 1
7.95% ==> 24 1
8.61% ==> 25 32
9.27% ==> 26 624
9.93% ==> 39 1
10.60% ==> 40 1
11.26% ==> 13 32
11.92% ==> 14 624
12.58% ==> 78 1
13.25% ==> 79 1
13.91% ==> 67 32
14.57% ==> 80 32
15.23% ==> 81 1
15.89% ==> 49 1
16.56% ==> 69 32
17.22% ==> 15 32
17.88% ==> 82 1
18.54% ==> 0 1
19.21% ==> 1 1
19.87% ==> 32 624
20.53% ==> 212 1
21.19% ==> 70 75
21.85% ==> 16 1
22.52% ==> 7 1
23.18% ==> 7 1
23.84% ==> 7 624
24.50% ==> 9 1
25.17% ==> 10 1
25.83% ==> 11 624
26.49% ==> 12 624
27.15% ==> 39 1
27.81% ==> 40 1
28.48% ==> 44 32
29.14% ==> 45 32
29.80% ==> 49 1
30.46% ==> 15 1
31.13% ==> 22 624
31.79% ==> 23 624
32.45% ==> 51 1
33.11% ==> 16 1
33.77% ==> 253 624
34.44% ==> 7 32
35.10% ==> 7 1
35.76% ==> 7 1
36.42% ==> 7 32
37.09% ==> 76 624
37.75% ==> 88 1
38.41% ==> 88 32
39.07% ==> 88 1
39.74% ==> 143 32
40.40

In [None]:
prediction_index = [ np.argmax(p) for p in prediction ]

In [None]:
sum([prediction_index[i] == dataY[i] for i in prediction_index]) / len(prediction_index)