In [1]:
#needed to import PN
from pm4py.objects.petri.importer import importer as pnml_importer
#visualize PN
from pm4py.visualization.petrinet import visualizer as pn_visualizer
#playout PN
from pm4py.simulation.playout import simulator


import pandas as pd

import json, copy



In [2]:
def import_net(filename):
    net, im , fm = pnml_importer.apply(filename)
    return net,im,fm

def gen_log_from_net(net, im, num_traces):
    log = simulator.apply(net, im, variant=simulator.Variants.BASIC_PLAYOUT,
                          parameters={simulator.Variants.BASIC_PLAYOUT.value.Parameters.NO_TRACES: num_traces})
    return log

def get_alphabet(net):
    activities = list({a.label for a in net.transitions if a.label and not '_' in a.label})
    return activities

def get_integer_map_net(net):
    return {x: i+1 for i,x in enumerate(get_alphabet(net))}

def apply_integer_map(log, map):
    return [[map[a['concept:name']] for a in t] for t in log]

def get_variants(net, im, max_loops, maxlen): #get all variants from a petri net
    variants = simulator.apply(net, im, variant=simulator.Variants.EXTENSIVE, 
                               parameters={simulator.Variants.EXTENSIVE.value.Parameters.MAX_TRACE_LENGTH: 100,
                                           simulator.Variants.EXTENSIVE.value.Parameters.MAX_MARKING_OCC: max_loops+1,
                                           simulator.Variants.EXTENSIVE.value.Parameters.MAX_TRACE_LENGTH: maxlen})
    return variants

def save_log(loglist, filename): #save a list of lists 
    df = pd.DataFrame.from_records(loglist)
    df.to_csv(filename, index=False)
    
def delete_variant(log, variant): #remove a variant from a log and return new log
    return([trace for trace in log if trace != variant])

In [3]:
net, im, fm = import_net('5parallel.pnml')

#get number encoding we are going to use
mapping = get_integer_map_net(net)
#save number mapping
mappingfilename = 'mapping.txt'  
with open(mappingfilename, 'w') as f:
    f.write(json.dumps(mapping))

In [4]:
#get all variants
variants = apply_integer_map(get_variants(net, im, 3, 50), mapping)

varname = 'variants.csv'
save_log(variants, varname)

  variants = simulator.apply(net, im, variant=simulator.Variants.EXTENSIVE,


In [5]:
traintestlog = apply_integer_map(gen_log_from_net(net, im, 12000), mapping)

save_log(traintestlog, "Full_traintest_log.csv")

  log = simulator.apply(net, im, variant=simulator.Variants.BASIC_PLAYOUT,


In [6]:
for i in range(0,len(variants)):
    print(i)
    print(variants[i])
    log = copy.deepcopy(traintestlog)
    train_log = delete_variant(log, variants[i])
    train_filename = "Train_sets/log_" + str(i) + ".csv"
    save_log(train_log, train_filename)
    testsize = 12000 - len(train_log)
    test_log = [variants[i]]*testsize
    test_filename = "Test_sets/log_" + str(i) + ".csv"
    save_log(test_log, test_filename)
    

0
[5, 12, 3, 6, 8, 1, 2, 11, 7, 10, 9, 13, 4]
1
[5, 12, 3, 6, 8, 1, 2, 7, 11, 10, 9, 13, 4]
2
[5, 12, 3, 6, 8, 1, 11, 2, 7, 10, 9, 13, 4]
3
[5, 12, 3, 6, 8, 1, 11, 7, 2, 10, 9, 13, 4]
4
[5, 12, 3, 6, 8, 1, 7, 2, 11, 10, 9, 13, 4]
5
[5, 12, 3, 6, 8, 1, 7, 11, 2, 10, 9, 13, 4]
6
[5, 12, 3, 6, 8, 2, 1, 11, 7, 10, 9, 13, 4]
7
[5, 12, 3, 6, 8, 2, 1, 7, 11, 10, 9, 13, 4]
8
[5, 12, 3, 6, 8, 2, 11, 1, 7, 10, 9, 13, 4]
9
[5, 12, 3, 6, 8, 2, 11, 7, 1, 10, 9, 13, 4]
10
[5, 12, 3, 6, 8, 2, 7, 1, 11, 10, 9, 13, 4]
11
[5, 12, 3, 6, 8, 2, 7, 11, 1, 10, 9, 13, 4]
12
[5, 12, 3, 6, 8, 11, 1, 2, 7, 10, 9, 13, 4]
13
[5, 12, 3, 6, 8, 11, 1, 7, 2, 10, 9, 13, 4]
14
[5, 12, 3, 6, 8, 11, 2, 1, 7, 10, 9, 13, 4]
15
[5, 12, 3, 6, 8, 11, 2, 7, 1, 10, 9, 13, 4]
16
[5, 12, 3, 6, 8, 11, 7, 1, 2, 10, 9, 13, 4]
17
[5, 12, 3, 6, 8, 11, 7, 2, 1, 10, 9, 13, 4]
18
[5, 12, 3, 6, 8, 7, 1, 2, 11, 10, 9, 13, 4]
19
[5, 12, 3, 6, 8, 7, 1, 11, 2, 10, 9, 13, 4]
20
[5, 12, 3, 6, 8, 7, 2, 1, 11, 10, 9, 13, 4]
21
[5, 12, 3, 6, 8, 7, 