In [1]:
import numpy as np
import chainer
from chainer.backends import cuda
from chainer import Function, gradient_check, report, training, utils, Variable
from chainer import datasets, iterators, optimizers, serializers
from chainer import Link, Chain, ChainList
import chainer.functions as F
import chainer.links as L
from chainer.training import extensions
import random
import itertools

from opyenxes.model.XLog import XLog
from opyenxes.data_in.XUniversalParser import XUniversalParser
from opyenxes.classification.XEventAttributeClassifier import XEventAttributeClassifier

In [13]:
try:
    gpu_no = 0
    cuda.check_cuda_available()
    cuda.get_device(gpu_no).use()
    import cupy
    print("GPU :"+str(gpu_no))
    xp = cupy
except:
    xp = np

GPU :0


In [3]:
bpic_2011_path = "../logs/bpic2011.xes"

with open(bpic_2011_path) as bpic2011_file:
    bpic2011_rlog = XUniversalParser().parse(bpic2011_file)

bpic2011_log = bpic2011_rlog[0] # the rest of this array is empty anyway as len(bpic2011_rlog) == 1

Unknown extension: http://www.xes-standard.org/meta_time.xesext
Unknown extension: http://www.xes-standard.org/meta_life.xesext
Unknown extension: http://www.xes-standard.org/meta_org.xesext
Unknown extension: http://www.xes-standard.org/meta_concept.xesext
Unknown extension: http://www.xes-standard.org/meta_3TU.xesext
Unknown extension: http://www.xes-standard.org/meta_general.xesext


## Dataset and Iterator Setup

In [14]:
# use a generator for the generation of every sample
def window_features(traces,windowsize):
    for trace in traces:
        for event_i in range(0, len(trace)-windowsize+1):
            yield([event_to_int[trace[i]] for i in range(event_i, event_i+windowsize)])
            
# extract evet names and terminate with beginning and end features
event_traces = [[ ev.get_attributes()["concept:name"].get_value() for ev in trace ] for trace in bpic2011_log ]
event_traces = [ ['<bos>'] + l + ['<eos>'] for l in event_traces ]

# generate word mappings to IDs
events       = sorted(list(set(itertools.chain.from_iterable(event_traces)))) 
event_to_int = dict((c, i) for i, c in enumerate(events))
int_to_event = dict((i,c) for i,c in enumerate(events))

random.shuffle(event_traces)
train_traces = event_traces[:int(.8*len(event_traces))]
test_traces  = event_traces[int(.8*len(event_traces)):]

trace_dt = xp.float32
train_traces = xp.array([ w for w in window_features(train_traces, 5) ], dtype=trace_dt)
test_traces  = xp.array([ w for w in window_features(test_traces, 5) ],  dtype=trace_dt)

train_x = train_traces[:, :4] #  extract all columns but the first from all rows
train_y = (train_traces[:, 4][:, None]).flatten().astype(xp.int32) # extract only the first column and put each element into an array of its own

test_x = test_traces[:, :4] #  extract all columns but the first from all rows
test_y = (test_traces[:, 4][:, None]).flatten().astype(xp.int32) # extract only the first column and put each element into an array of its own

train_ds = datasets.TupleDataset(train_x, train_y)
test_ds  = datasets.TupleDataset(test_x, test_y)

train_iter = chainer.iterators.SerialIterator(train_ds, 100, repeat=True, shuffle=False)
test_iter  = chainer.iterators.SerialIterator(test_ds,  100, repeat=False, shuffle=False)

## Neural Network Modeling

In [16]:
class RNN(Chain):
    def __init__(self,n_in,n_out):
        super(RNN, self).__init__()
        with self.init_scope():
            layer = chainer.Sequential(L.Linear(n_in), F.relu)
            self.model = layer.repeat(2)
            self.model.append(L.Linear(n_out))
            #self.l1 = L.Linear(2*n_in)
            #self.l2 = L.Linear(250)
            #self.l3 = L.Linear(n_out)


    def __call__(self, x):
        return self.model(x)
        #h1 = F.relu(self.l1(x))
        #h2 = F.relu(self.l2(h1))
        #return self.l3(h2)

def dummy(x, t):
    #print(x, t)
    return F.accuracy(x, t)
    
rnn   = RNN(4, len(events))
model = L.Classifier(rnn, accfun=F.accuracy)
optimizer = optimizers.SGD().setup(model)

## Linking Iterator And Optimizer together

In [17]:
updater = training.StandardUpdater(train_iter, optimizer, device=0)
trainer = training.Trainer(updater, (50, 'epoch'), out='result')

trainer.extend(extensions.LogReport())
trainer.extend(extensions.PrintReport(
    ['epoch', 'main/loss', 'validation/main/loss',
     'main/accuracy', 'validation/main/accuracy', 'elapsed_time']))
trainer.extend(extensions.Evaluator(test_iter, model, device=0))

model.to_gpu()
trainer.run()

epoch       main/loss   validation/main/loss  main/accuracy  validation/main/accuracy  elapsed_time
[J1           6.31406     6.07823               0.0967788      0.0916153                 5.52641       
[J2           5.89565     5.74788               0.0969078      0.0916153                 10.941        
[J3           5.58184     5.47222               0.0969407      0.0916153                 16.379        
[J4           5.33415     5.2672                0.0969242      0.0916153                 21.787        
[J5           5.15595     5.12208               0.0969104      0.0916153                 27.2307       
[J6           5.02793     5.01637               0.0969159      0.0916153                 32.667        
[J7           4.93254     4.9351                0.0969325      0.0916153                 38.1077       
[J8           4.8573      4.86951               0.0969407      0.0916153                 43.473        
[J9           4.79547     4.81482               0.096916   