In [1]:
import numpy as np
from pm4py.algo.simulation.tree_generator import factory as tree_gen_factory
from pm4py.objects.process_tree import semantics
from pm4py.visualization.process_tree import factory as pt_vis_factory
import copy, random
from random import choice, randint
import gensim

from gensim.corpora.dictionary import Dictionary
from pyemd import emd

# Definition of all noise functions

In [2]:
def add_noise_name(logdummy, noise_level):
    changelog = copy.deepcopy(logdummy)
    actionset = set([]) #we use a set to only keep unique actions, for a vocabulary
    for i in range(len(logdummy)):
        for j in range(len(logdummy[i])):
            actionset.add(logdummy[i][j])
    actions = list(actionset)
    #print(actions)
    #we could also use activities = attributes_filter.get_attribute_values(log, "concept:name")
    
    amount_of_noisy_traces = int(noise_level * len(logdummy))
    #print(amount_of_noisy_traces)
    noisy_examples = random.sample(range(0, len(logdummy)), amount_of_noisy_traces)
    #print(noisy_examples)
    for i in range(0, len(logdummy)):
        if i in noisy_examples:
            random_act = random.randint(0, len(logdummy[i]) - 1)
            new = random.choice(actions)
            #new = 'dumdum'
            changelog[i][random_act] = new
    return changelog

In [3]:
def add_noise_delete(logdummy, noise_level):
    changelog = copy.deepcopy(logdummy)
    amount_of_noisy_traces = int(noise_level * len(logdummy))
    #print(amount_of_noisy_traces)
    noisy_examples = random.sample(range(0, len(logdummy)), amount_of_noisy_traces)
    #print(noisy_examples)
    for i in range(0, len(logdummy)):
        if i in noisy_examples:
            random_act = random.randint(0, len(logdummy[i]) - 1)
            newtrace = copy.deepcopy(logdummy[i])
            #print(random_act)
            #print(newtrace)
            del newtrace[random_act]
            #print(newtrace)
            changelog[i] = newtrace
    return changelog

In [4]:
def add_noise_order(logdummy, noise_level):
    changelog = copy.deepcopy(logdummy)
    amount_of_noisy_traces = int(noise_level * len(logdummy))
    #print(amount_of_noisy_traces)
    noisy_examples = random.sample(range(0, len(logdummy)), amount_of_noisy_traces)
    #print(noisy_examples)
    for i in range(0, len(logdummy)):
        if i in noisy_examples:
            random_act = random.randint(0, len(logdummy[i]) - 1)
            random_act2 = random.randint(0, len(logdummy[i]) - 1)
            first = logdummy[i][random_act]
            second = logdummy[i][random_act2]
            
            changelog[i][random_act] = second
            changelog[i][random_act2] = first
    return changelog

In [5]:
def add_noise(logdummy, noise_level):
    l1 = add_noise_name(logdummy, noise_level)
    l2 = add_noise_order(l1, noise_level)
    l3 = add_noise_delete(l2, noise_level)
    return(l3)

# Definitions for generating trees and logs

In [6]:
def log_converter(logdummy):
    outputlog = []
    for i in range(len(logdummy)):
        dummytrace = []
        for j in range(len(logdummy[i])):
            dummytrace.append(logdummy[i][j]['concept:name'].replace(" ", ""))
        outputlog.append(dummytrace)
    return(outputlog)

In [7]:
def Gen_Log(sizelog, mode, minimum, maximum, sequence, choice, parallel, loops):
    parameters = {"mode": mode, "min": minimum, "max": maximum, "sequence": sequence, "choice": choice, "parallel": parallel, "loop": loops}
    tree = tree_gen_factory.apply(parameters=parameters)
    firstlog = semantics.generate_log(tree, no_traces=sizelog)
    log = log_converter(firstlog)
    return(log, tree)

# import scripts

In [8]:
import WMD
import TraceDist
import ICT

# Define Test

In [9]:
def get_average(list):
    av = []
    for trace in list:
        av.append(len(trace))
    return sum(av) / len(av) 

In [10]:
def test(sizelog, mode, minimum, maximum, sequence, choice, parallel, loops, windowsize, k):
    print("Tree parameters", sizelog, mode, minimum, maximum, sequence, choice, parallel, loops)
    av_num_traces = 1.0
    while av_num_traces < minimum/2:
        log, tree = Gen_Log(sizelog, mode, minimum, maximum, sequence, choice, parallel, loops)
        av_num_traces = get_average(log)
    print(av_num_traces)
    
    noises = [0.1, 0.2, 0.3, 0.4, 0.5]
    for noise in noises:
        print("Noise level:", noise)
        noise_log = add_noise(log, noise)
        WMD1, WMD2 = WMD.get_dist(log, noise_log, windowsize)
        print("WMD:")
        print(WMD1)
        ICT1, ICT2 = ICT.get_dist(log, noise_log, windowsize)
        print("ICT:")
        print(ICT1)
        T1, T2 = TraceDist.get_dist(log, noise_log, windowsize)
        print("Trace:")
        print(T1)
        
        
    

# example test

In [11]:
test(1000, 10, 5, 15, 0.75, 0.25, 0.0, 0.0, 3, 3)

Tree parameters 1000 10 5 15 0.75 0.25 0.0 0.0
6.0
Noise level: 0.1


KeyboardInterrupt: 