In [1]:
import numpy as np
from pm4py.algo.simulation.tree_generator import factory as tree_gen_factory
from pm4py.objects.process_tree import semantics
from pm4py.visualization.process_tree import factory as pt_vis_factory
import copy, random
from random import choice, randint
import gensim

from gensim.corpora.dictionary import Dictionary
from pyemd import emd

# Definition of noise function

In [15]:
def add_noise_name(logdummy, noise_level, amount):
    changelog = copy.deepcopy(logdummy)
    actionset = set([]) #we use a set to only keep unique actions, for a vocabulary
    for i in range(len(logdummy)):
        for j in range(len(logdummy[i])):
            actionset.add(logdummy[i][j])
    actions = list(actionset)
    #print(actions)
    #we could also use activities = attributes_filter.get_attribute_values(log, "concept:name")
    
    amount_of_noisy_traces = int(noise_level * len(logdummy))
    #print(amount_of_noisy_traces)
    noisy_examples = random.sample(range(0, len(logdummy)), amount_of_noisy_traces)
    #print(noisy_examples)
    for i in range(0, len(logdummy)):
        if i in noisy_examples:
            random_actions = random.sample(range(0, len(logdummy[i])), amount)
            for x in random_actions:
                new = random.choice(actions)
                changelog[i][x] = new    
    return changelog

# Definitions of all generation stuff

In [6]:
def log_converter(logdummy):
    outputlog = []
    for i in range(len(logdummy)):
        dummytrace = []
        for j in range(len(logdummy[i])):
            dummytrace.append(logdummy[i][j]['concept:name'].replace(" ", ""))
        outputlog.append(dummytrace)
    return(outputlog)

In [7]:
def Gen_Log(sizelog, mode, minimum, maximum, sequence, choice, parallel, loops):
    parameters = {"mode": mode, "min": minimum, "max": maximum, "sequence": sequence, "choice": choice, "parallel": parallel, "loop": loops}
    tree = tree_gen_factory.apply(parameters=parameters)
    firstlog = semantics.generate_log(tree, no_traces=sizelog)
    log = log_converter(firstlog)
    return(log, tree)

# import scripts

In [8]:
import WMD
import TraceDist
import ICT

# Def Test

In [9]:
def get_average(list):
    av = []
    for trace in list:
        av.append(len(trace))
    return sum(av) / len(av) 

Use 40% noise, but vary amount of noise 1-10, only noise on name

In [16]:
def test(sizelog, mode, minimum, maximum, sequence, choice, parallel, loops, windowsize, k):
    print("Tree parameters", sizelog, mode, minimum, maximum, sequence, choice, parallel, loops)
    av_num_traces = 1.0
    while av_num_traces < 20:
        log, tree = Gen_Log(sizelog, mode, minimum, maximum, sequence, choice, parallel, loops)
        av_num_traces = get_average(log)
    print(av_num_traces)
    
    amounts = [1, 3, 5, 7, 9, 11, 13, 15]
    for amount in amounts:
        print("Amount:", amount)
        noise_log = add_noise_name(log, 0.4, amount)
        WMD1, WMD2 = WMD.get_dist(log, noise_log, windowsize)
        print("WMD:")
        print(WMD1)
        ICT1, ICT2 = ICT.get_dist(log, noise_log, windowsize)
        print("ICT:")
        print(ICT1)
        T1, T2 = TraceDist.get_dist(log, noise_log, windowsize)
        print("Trace:")
        print(T1)
        
        
    

In [17]:
test(1000, 40, 30, 50, 0.75, 0.0, 0.25, 0.0, 3, 3)

Tree parameters 1000 40 30 50 0.75 0.0 0.25 0.0
20.0
Amount: 1
WMD:
0.13726965822065632
ICT:
0.09560022566318511
Trace:
0.01615916895866394
Amount: 3
WMD:
0.3305165688873459
ICT:
0.2640739498376846
Trace:
0.0814462187886238
Amount: 5
WMD:
0.45836320497511507
ICT:
0.3750931152701378
Trace:
0.11554007959365845
Amount: 7
WMD:
0.552583649279077
ICT:
0.45815116891860963
Trace:
0.13119874835014345
Amount: 9
WMD:
0.6123702112836079
ICT:
0.523273820745945
Trace:
0.141084980905056
Amount: 11
WMD:
0.6562140328564082
ICT:
0.5524276784300803
Trace:
0.14447655528783798
Amount: 13
WMD:
0.7044362549916796
ICT:
0.5689537046313287
Trace:
0.15018631184101106
Amount: 15
WMD:
0.6955279130833164
ICT:
0.5769097446680068
Trace:
0.1525792546272278


In [18]:
test(1000, 40, 30, 50, 0.75, 0.0, 0.25, 0.0, 3, 3)

Tree parameters 1000 40 30 50 0.75 0.0 0.25 0.0
21.0
Amount: 1
WMD:
0.13523769680768782
ICT:
0.08900576369535355
Trace:
0.012726124346256256
Amount: 3
WMD:
0.33088360491804414
ICT:
0.23955742829186574
Trace:
0.07134131014347077
Amount: 5
WMD:
0.4610437214269308
ICT:
0.3582148595423925
Trace:
0.11917846137285233
Amount: 7
WMD:
0.5471662018343123
ICT:
0.41685656873952776
Trace:
0.1449382627606392
Amount: 9
WMD:
0.6252741337486564
ICT:
0.4833781558331989
Trace:
0.16094442933797837
Amount: 11
WMD:
0.6647908610513532
ICT:
0.5423104055722554
Trace:
0.16728853127360344
Amount: 13
WMD:
0.6874581641549871
ICT:
0.6007332346893491
Trace:
0.17172490972280502
Amount: 15
WMD:
0.7069902123073994
ICT:
0.5685142933187031
Trace:
0.1732353201210499


In [19]:
test(1000, 40, 30, 50, 0.75, 0.0, 0.25, 0.0, 3, 3)

Tree parameters 1000 40 30 50 0.75 0.0 0.25 0.0
20.0
Amount: 1
WMD:
0.13418577982258045
ICT:
0.09088048753738404
Trace:
0.013443546772003174
Amount: 3
WMD:
0.3364926843201421
ICT:
0.24294777113199234
Trace:
0.0813536986708641
Amount: 5
WMD:
0.46286394202080827
ICT:
0.36235085930824285
Trace:
0.12032774162292481
Amount: 7
WMD:
0.539880412777628
ICT:
0.42474388332366947
Trace:
0.1329399157166481
Amount: 9
WMD:
0.6166462413415337
ICT:
0.5046461262941361
Trace:
0.14164431923627854
Amount: 11
WMD:
0.6497517057148972
ICT:
0.5463467827200889
Trace:
0.14765985286235808
Amount: 13
WMD:
0.68698920226461
ICT:
0.5499811377286912
Trace:
0.15145663845539092
Amount: 15
WMD:
0.699783573385282
ICT:
0.5576075226783753
Trace:
0.1546110220849514


In [20]:
test(1000, 40, 30, 50, 0.75, 0.0, 0.25, 0.0, 3, 3)

Tree parameters 1000 40 30 50 0.75 0.0 0.25 0.0
23.0
Amount: 1
WMD:
0.13002340331579632
ICT:
0.07849906183325726
Trace:
0.016797055304050446
Amount: 3
WMD:
0.30168493176435984
ICT:
0.2136051716182543
Trace:
0.08122156971693038
Amount: 5
WMD:
0.4193737576897929
ICT:
0.31009822210021637
Trace:
0.11547820788621903
Amount: 7
WMD:
0.5245140731467943
ICT:
0.38974817246976107
Trace:
0.13077948236465453
Amount: 9
WMD:
0.5917542828508128
ICT:
0.46714399750336355
Trace:
0.13922576075792312
Amount: 11
WMD:
0.6491258733933715
ICT:
0.5147613747016244
Trace:
0.14702946466207503
Amount: 13
WMD:
0.6813894102922444
ICT:
0.5320060480936714
Trace:
0.14848645317554474
Amount: 15
WMD:
0.6915713275279323
ICT:
0.5472002580165863
Trace:
0.15031073653697968


In [21]:
test(1000, 40, 30, 50, 0.75, 0.0, 0.25, 0.0, 3, 3)

Tree parameters 1000 40 30 50 0.75 0.0 0.25 0.0
21.0
Amount: 1
WMD:
0.13149053717663678
ICT:
0.0975241230896541
Trace:
0.013998520493507385
Amount: 3
WMD:
0.3280726781345882
ICT:
0.23111718893618807
Trace:
0.08139564508199691
Amount: 5
WMD:
0.45245733552319733
ICT:
0.31415348047301883
Trace:
0.11734218287467957
Amount: 7
WMD:
0.5465370952787542
ICT:
0.41401342465763996
Trace:
0.13630035364627838
Amount: 9
WMD:
0.6093432933969299
ICT:
0.47752404804456794
Trace:
0.14549883794784546
Amount: 11
WMD:
0.6747170558015958
ICT:
0.5365220506304786
Trace:
0.1508444475233555
Amount: 13
WMD:
0.6979630704943418
ICT:
0.5653535479023344
Trace:
0.1565405780673027
Amount: 15
WMD:
0.7164635804715033
ICT:
0.6013191231659479
Trace:
0.1550714755654335
