In [24]:
#needed to import PN
from pm4py.objects.petri.importer import importer as pnml_importer
#visualize PN
from pm4py.visualization.petrinet import visualizer as pn_visualizer
#playout PN
from pm4py.simulation.playout import simulator

from pm4py.algo.filtering.log.end_activities import end_activities_filter

import pandas as pd

import json, copy

import random

In [25]:
def import_net(filename):
    net, im , fm = pnml_importer.apply(filename)
    return net,im,fm

def gen_log_from_net(net, im, num_traces):
    log = simulator.apply(net, im, variant=simulator.Variants.BASIC_PLAYOUT,
                          parameters={simulator.Variants.BASIC_PLAYOUT.value.Parameters.NO_TRACES: num_traces})
    return log

def get_alphabet(net):
    activities = list({a.label for a in net.transitions if a.label and not '_' in a.label})
    return activities

def get_integer_map_net(net):
    return {x: i+1 for i,x in enumerate(get_alphabet(net))}

def apply_integer_map(log, map):
    return [[map[a['concept:name']] for a in t] for t in log]

def get_variants(net, im, max_loops, maxlen): #get all variants from a petri net
    variants = simulator.apply(net, im, variant=simulator.Variants.EXTENSIVE, 
                               parameters={simulator.Variants.EXTENSIVE.value.Parameters.MAX_TRACE_LENGTH: 100,
                                           simulator.Variants.EXTENSIVE.value.Parameters.MAX_MARKING_OCC: max_loops+1,
                                           simulator.Variants.EXTENSIVE.value.Parameters.MAX_TRACE_LENGTH: maxlen})
    return variants

def save_log(loglist, filename): #save a list of lists 
    df = pd.DataFrame.from_records(loglist)
    df.to_csv(filename, index=False)
    
def delete_variant_withtest(log, variant): #remove a variant from a log and return new log
    return([trace for trace in log if trace != variant], [trace for trace in log if trace == variant])

def delete_multiple_variants(log, variants, how_many):
    log = copy.deepcopy(log)
    testlog = []
    which = random.sample(range(0, len(variants)), how_many)
    print(which)
    for i in which:
        log, extratest = delete_variant_withtest(log, variants[i])
        testlog = testlog + extratest
    return(log, testlog)

In [26]:
def delete_too_much_loop(variants, max_occ, loop_act):
    new_variants = []
    for i in range(len(variants)):
        if variants[i].count(loop_act) <= max_occ:
            new_variants.append(variants[i])
    return(new_variants)

In [27]:
modelname = 'MoreComplex'

amount = 132

In [28]:
net, im, fm = import_net(modelname+'.pnml')

#get number encoding we are going to use
mapping = get_integer_map_net(net)

#save number mapping
mappingfilename = 'Mappings/mapping_'+modelname+'.txt'  
with open(mappingfilename, 'w') as f:
    f.write(json.dumps(mapping))

#get all variants
variants = apply_integer_map(get_variants(net, im, 2, 50), mapping)


  variants = simulator.apply(net, im, variant=simulator.Variants.EXTENSIVE,


In [29]:
print(len(variants))

8448


In [30]:
print(variants)

[[12, 8, 7, 1, 13, 2, 3, 4, 10, 6, 11, 5], [12, 8, 7, 1, 13, 2, 4, 10, 3, 6, 11, 5], [12, 8, 7, 1, 13, 2, 4, 3, 10, 6, 11, 5], [12, 8, 7, 1, 13, 9, 3, 4, 10, 6, 11, 5], [12, 8, 7, 1, 13, 9, 4, 10, 3, 6, 11, 5], [12, 8, 7, 1, 13, 9, 4, 3, 10, 6, 11, 5], [12, 8, 7, 1, 13, 3, 2, 4, 10, 6, 11, 5], [12, 8, 7, 1, 13, 3, 9, 4, 10, 6, 11, 5], [12, 8, 7, 1, 13, 3, 4, 2, 10, 6, 11, 5], [12, 8, 7, 1, 13, 3, 4, 9, 10, 6, 11, 5], [12, 8, 7, 1, 13, 3, 4, 10, 2, 6, 11, 5], [12, 8, 7, 1, 13, 3, 4, 10, 9, 6, 11, 5], [12, 8, 7, 1, 13, 4, 2, 10, 3, 6, 11, 5], [12, 8, 7, 1, 13, 4, 2, 3, 10, 6, 11, 5], [12, 8, 7, 1, 13, 4, 9, 10, 3, 6, 11, 5], [12, 8, 7, 1, 13, 4, 9, 3, 10, 6, 11, 5], [12, 8, 7, 1, 13, 4, 3, 2, 10, 6, 11, 5], [12, 8, 7, 1, 13, 4, 3, 9, 10, 6, 11, 5], [12, 8, 7, 1, 13, 4, 3, 10, 2, 6, 11, 5], [12, 8, 7, 1, 13, 4, 3, 10, 9, 6, 11, 5], [12, 8, 7, 1, 13, 4, 10, 2, 3, 6, 11, 5], [12, 8, 7, 1, 13, 4, 10, 9, 3, 6, 11, 5], [12, 8, 7, 1, 13, 4, 10, 3, 2, 6, 11, 5], [12, 8, 7, 1, 13, 4, 10, 3, 9, 6,

In [31]:
#for some reason pm4py playout if failing to limit occurences
variants = delete_too_much_loop(variants, 3, 3) #change second 3 into the mapping of the loop activity

varname = 'Variants/variants_' + modelname+'.csv'
save_log(variants, varname)

traintestlog = apply_integer_map(gen_log_from_net(net, im, 66000), mapping)

trtename = 'Train+Test_sets/Log_' + modelname +'.csv'
save_log(traintestlog, trtename)

train, test = delete_multiple_variants(traintestlog, variants, amount)

train_filename = "Train_sets/log_" + modelname +'.csv'
save_log(train, train_filename)

test_filename = "Test_sets/log_" + modelname +'.csv'
save_log(test, test_filename)

  log = simulator.apply(net, im, variant=simulator.Variants.BASIC_PLAYOUT,


[493, 346, 659, 287, 209, 630, 165, 613, 394, 181, 317, 240, 621, 176, 412, 652, 288, 551, 175, 210, 29, 26, 93, 385, 0, 146, 342, 607, 431, 1, 284, 262, 258, 390, 433, 50, 521, 265, 591, 173, 281, 312, 595, 505, 157, 495, 60, 170, 260, 563, 276, 25, 594, 600, 459, 401, 425, 605, 271, 17, 475, 552, 399, 100, 471, 290, 634, 432, 194, 321, 506, 626, 94, 221, 483, 86, 453, 473, 71, 227, 369, 115, 104, 544, 253, 24, 601, 11, 449, 426, 481, 102, 245, 583, 623, 283, 336, 239, 339, 212, 526, 636, 139, 361, 266, 494, 118, 373, 484, 511, 213, 466, 196, 331, 313, 257, 380, 335, 218, 14, 82, 428, 289, 501, 490, 140, 19, 353, 233, 377, 658, 434]


In [32]:
print(len(variants))

660


In [33]:
print(len(traintestlog))

66000


In [21]:
modelname = 'Model1'
amount = 24

net, im, fm = import_net(modelname+'.pnml')

#get number encoding we are going to use
mapping = get_integer_map_net(net)
#save number mapping
mappingfilename = 'Mappings/mapping_'+modelname+'.txt'  
with open(mappingfilename, 'w') as f:
    f.write(json.dumps(mapping))
    
#get all variants
variants = apply_integer_map(get_variants(net, im, 2, 50), mapping)

varname = 'Variants/variants_' + modelname+'.csv'
save_log(variants, varname)

traintestlog = apply_integer_map(gen_log_from_net(net, im, 12000), mapping)

trtename = 'Train+Test_sets/Log_' + modelname +'.csv'
save_log(traintestlog, trtename)

train, test = delete_multiple_variants(traintestlog, variants, amount)

train_filename = "Train_sets/log_" + modelname +'.csv'
save_log(train, train_filename)

test_filename = "Test_sets/log_" + modelname +'.csv'
save_log(test, test_filename)

  variants = simulator.apply(net, im, variant=simulator.Variants.EXTENSIVE,
  log = simulator.apply(net, im, variant=simulator.Variants.BASIC_PLAYOUT,


[79, 20, 38, 67, 43, 111, 102, 47, 55, 37, 57, 95, 74, 84, 58, 73, 3, 61, 89, 26, 71, 90, 24, 92]


In [22]:
modelname = 'Model2' #XOR
amount = 26

net, im, fm = import_net(modelname+'.pnml')

#get number encoding we are going to use
mapping = get_integer_map_net(net)
#save number mapping
mappingfilename = 'Mappings/mapping_'+modelname+'.txt'  
with open(mappingfilename, 'w') as f:
    f.write(json.dumps(mapping))
    
#get all variants
variants = apply_integer_map(get_variants(net, im, 2, 50), mapping)

varname = 'Variants/variants_' + modelname+'.csv'
save_log(variants, varname)

traintestlog = apply_integer_map(gen_log_from_net(net, im, 12000), mapping)

trtename = 'Train+Test_sets/Log_' + modelname +'.csv'
save_log(traintestlog, trtename)

train, test = delete_multiple_variants(traintestlog, variants, amount)

train_filename = "Train_sets/log_" + modelname +'.csv'
save_log(train, train_filename)

test_filename = "Test_sets/log_" + modelname +'.csv'
save_log(test, test_filename)

  variants = simulator.apply(net, im, variant=simulator.Variants.EXTENSIVE,
  log = simulator.apply(net, im, variant=simulator.Variants.BASIC_PLAYOUT,


[99, 95, 123, 106, 10, 27, 18, 14, 23, 41, 48, 22, 46, 47, 24, 86, 68, 79, 21, 76, 97, 73, 30, 55, 83, 25]


In [23]:
modelname = 'Model3' #XOR+Dep
amount = 26

net, im, fm = import_net(modelname+'.pnml')

#get number encoding we are going to use
mapping = get_integer_map_net(net)
#save number mapping
mappingfilename = 'Mappings/mapping_'+modelname+'.txt'  
with open(mappingfilename, 'w') as f:
    f.write(json.dumps(mapping))
    
#get all variants
variants = apply_integer_map(get_variants(net, im, 2, 50), mapping)

varname = 'Variants/variants_' + modelname+'.csv'
save_log(variants, varname)

traintestlog = apply_integer_map(gen_log_from_net(net, im, 12000), mapping)

trtename = 'Train+Test_sets/Log_' + modelname +'.csv'
save_log(traintestlog, trtename)

train, test = delete_multiple_variants(traintestlog, variants, amount)

train_filename = "Train_sets/log_" + modelname +'.csv'
save_log(train, train_filename)

test_filename = "Test_sets/log_" + modelname +'.csv'
save_log(test, test_filename)

  variants = simulator.apply(net, im, variant=simulator.Variants.EXTENSIVE,
  log = simulator.apply(net, im, variant=simulator.Variants.BASIC_PLAYOUT,


[8, 90, 81, 65, 100, 119, 1, 2, 50, 120, 52, 41, 32, 88, 11, 42, 73, 23, 53, 56, 34, 72, 31, 104, 102, 103]


In [24]:
def fix_variants_IOR(variants):
    filtered_log = end_activities_filter.apply(variants, ["O"])
    return(filtered_log)

In [25]:
modelname = 'Model4' #IOR
amount = 13

net, im, fm = import_net(modelname+'.pnml')

#get number encoding we are going to use
mapping = get_integer_map_net(net)
#save number mapping
mappingfilename = 'Mappings/mapping_'+modelname+'.txt'  
with open(mappingfilename, 'w') as f:
    f.write(json.dumps(mapping))
    
#get all variants
#need to do extra steps because play-out allow for incomplete traces
variantdum  = get_variants(net, im, 3, 50) 
print(len(variantdum))
variantdumfiltered = fix_variants_IOR(variantdum)
print(len(variantdumfiltered))
#get all variants
variants = apply_integer_map(variantdumfiltered, mapping)


varname = 'Variants/variants_' + modelname+'.csv'
save_log(variants, varname)

#need to do extra steps because play-out allow for incomplete traces
traintestdum = gen_log_from_net(net, im, 50000)
traintestdum = fix_variants_IOR(traintestdum)
print(len(traintestdum))
traintestdum = traintestdum[0:12000]
traintestlog = apply_integer_map(traintestdum, mapping)

trtename = 'Train+Test_sets/Log_' + modelname +'.csv'
save_log(traintestlog, trtename)

train, test = delete_multiple_variants(traintestlog, variants, amount)

train_filename = "Train_sets/log_" + modelname +'.csv'
save_log(train, train_filename)

test_filename = "Test_sets/log_" + modelname +'.csv'
save_log(test, test_filename)

85
64


  variants = simulator.apply(net, im, variant=simulator.Variants.EXTENSIVE,
  log = simulator.apply(net, im, variant=simulator.Variants.BASIC_PLAYOUT,


21186
[26, 50, 4, 16, 27, 18, 7, 37, 57, 60, 53, 63, 40]


In [26]:
modelname = 'Model5' #Parallel 2
amount = 25

net, im, fm = import_net(modelname+'.pnml')

#get number encoding we are going to use
mapping = get_integer_map_net(net)
#save number mapping
mappingfilename = 'Mappings/mapping_'+modelname+'.txt'  
with open(mappingfilename, 'w') as f:
    f.write(json.dumps(mapping))
    
#get all variants
variants = apply_integer_map(get_variants(net, im, 2, 50), mapping)

varname = 'Variants/variants_' + modelname+'.csv'
save_log(variants, varname)

traintestlog = apply_integer_map(gen_log_from_net(net, im, 12000), mapping)

trtename = 'Train+Test_sets/Log_' + modelname +'.csv'
save_log(traintestlog, trtename)

train, test = delete_multiple_variants(traintestlog, variants, amount)

train_filename = "Train_sets/log_" + modelname +'.csv'
save_log(train, train_filename)

test_filename = "Test_sets/log_" + modelname +'.csv'
save_log(test, test_filename)

  variants = simulator.apply(net, im, variant=simulator.Variants.EXTENSIVE,
  log = simulator.apply(net, im, variant=simulator.Variants.BASIC_PLAYOUT,


[18, 73, 52, 11, 79, 45, 14, 94, 68, 105, 76, 37, 50, 17, 30, 20, 0, 48, 69, 80, 57, 91, 61, 28, 81]


In [27]:
modelname = 'Model6' #loop model
amount = 5

net, im, fm = import_net(modelname+'.pnml')

#get number encoding we are going to use
mapping = get_integer_map_net(net)
#save number mapping
mappingfilename = 'Mappings/mapping_'+modelname+'.txt'  
with open(mappingfilename, 'w') as f:
    f.write(json.dumps(mapping))
    
#get all variants
variants = apply_integer_map(get_variants(net, im, 2, 50), mapping) #max 2 times taken, or three times in total

varname = 'Variants/variants_' + modelname+'.csv'
save_log(variants, varname)

traintestlog = apply_integer_map(gen_log_from_net(net, im, 12000), mapping)

trtename = 'Train+Test_sets/Log_' + modelname +'.csv'
save_log(traintestlog, trtename)

train, test = delete_multiple_variants(traintestlog, variants, amount)

train_filename = "Train_sets/log_" + modelname +'.csv'
save_log(train, train_filename)

test_filename = "Test_sets/log_" + modelname +'.csv'
save_log(test, test_filename)

  variants = simulator.apply(net, im, variant=simulator.Variants.EXTENSIVE,
  log = simulator.apply(net, im, variant=simulator.Variants.BASIC_PLAYOUT,


[2, 19, 23, 15, 13]
