In [None]:
from opyenxes.model.XLog import XLog
from opyenxes.data_in.XUniversalParser import XUniversalParser
from opyenxes.classification.XEventAttributeClassifier import XEventAttributeClassifier

#from keras.models import Sequential
#from keras.layers import Dense
#from keras.layers import Dropout
#from keras.layers import LSTM
#from keras.callbacks import ModelCheckpoint
#from keras.utils import np_utils
import keras

import numpy as np
import itertools

In [None]:
bpic_2011_path = "../logs/bpic2011.xes"

with open(bpic_2011_path) as bpic2011_file:
    bpic2011_log = XUniversalParser().parse(bpic2011_file)[0]

raw_trace = bpic2011_log[0]
raw_event = raw_trace[0]
raw_attributes = raw_event.get_attributes()



In [None]:
event_traces = [[ ev.get_attributes()["concept:name"].get_value() for ev in trace ] for trace in bpic2011_log ]
event_traces = list(itertools.chain.from_iterable(event_traces))

In [None]:
events = sorted(list(set(event_traces)))
event_to_int = dict((c, i) for i, c in enumerate(events))
int_to_event = dict((i,c) for i,c in enumerate(events))

n_events = len(event_traces)
n_vocab = len(events)

print("Total events:", n_events)
print("Total number of event types:", n_vocab)

In [None]:
# prepare the dataset of input to output pairs encoded as integers
seq_length = 5
dataX = []
dataY = []

for i in range(0, n_events - seq_length, 1):
    seq_in  = event_traces[i:i + seq_length]
    seq_out = event_traces[i + seq_length]
    dataX.append([event_to_int[ev] for ev in seq_in])
    dataY.append(event_to_int[seq_out])
    
n_patterns = len(dataX)
print("Total Patterns:", n_patterns)

In [None]:
# reshape X to be [samples, time steps, features]
X = np.reshape(dataX, (n_patterns, seq_length, 1))
# normalize
X = X / float(n_vocab)
# one hot encode the output variable
y = keras.utils.np_utils.to_categorical(dataY)

In [None]:
# define the LSTM model
model = keras.models.Sequential()
model.add(keras.layers.LSTM(256, input_shape=(X.shape[1], X.shape[2])))
model.add(keras.layers.Dropout(0.2))
model.add(keras.layers.Dense(y.shape[1], activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam')

In [None]:
# define the checkpoint
filepath="weights-improvement-{epoch:02d}-{loss:.4f}.hdf5"
checkpoint = keras.callbacks.ModelCheckpoint(filepath, monitor='loss', verbose=1, save_best_only=True, mode='min')
callbacks_list = [checkpoint]

In [None]:
model.fit(X, y, epochs=2, batch_size=128, callbacks=callbacks_list)

In [None]:
model_filename = "weights-improvement-02-3.3127.hdf5"
model.load_weights(model_filename)

In [None]:
prediction = model.predict(X)

In [None]:
prediction_index = [ np.argmax(p) for p in prediction ]

In [None]:
sum([prediction_index[i] == dataY[i] for i in prediction_index]) / len(prediction_index)

In [None]:
# Test prefixspan here
from prefixspan import PrefixSpan

event_sequences = [[ ev.get_attributes()["concept:name"].get_value() for ev in trace ] for trace in bpic2011_log ]
translated_sequences = [ [event_to_int[ev] for ev in trace] for trace in event_sequences]

ps = PrefixSpan(translated_sequences)

print(ps.frequent(3))