In [46]:
import pm4py
import os
from pm4py.objects.log.importer.xes import importer as xes_importer
from pm4py.algo.discovery.inductive import algorithm as inductive_miner
from pm4py.algo.discovery.footprints import algorithm as footprints_discovery
from pm4py.algo.conformance.footprints import algorithm as footprints_conformance
from pm4py.algo.conformance.tokenreplay import algorithm as token_replay
from pm4py.objects.conversion.bpmn import converter as bpmn_converter
from pm4py.algo.evaluation.precision import algorithm as precision_evaluator
from pm4py.algo.evaluation.replay_fitness import algorithm as replay_fitness_evaluator
import spacy
import copy
from spacy import displacy

# Explore NLP

In [41]:
nlp = spacy.load('de_core_news_sm')
text = [
    "Als erstes backt der Bäcker ein Brot.",
    "Danach holt der Bäcker das Brot aus dem Ofen.",
    "Dann prüft der Bäcker ob das Brot ok ist.",
    "Wenn ja, dann verkauft der Bäcker das Brot.",
    "Wenn nein, dann schmeißt der Bäcker das Brot weg und backt ein neues Brot.",
]
for sentence in text:
    doc = nlp(sentence)
    displacy.render(doc, style="dep", jupyter=True, options={'distance': 140})

In [47]:
extracted_infos = []
trace = 0
# Junction traces
junction_counter = 0

for index, sentence in enumerate(text):
    doc = nlp(sentence)
    tokens = {token.pos_:token.text for token in doc}

    # Split CCONJ e.g. "and" to seperate items
    if 'CCONJ' in tokens.keys():
        sentence_parts = sentence.split(tokens.get('CCONJ'))
        doc = nlp(sentence_parts[0]) # Overwrite old doc for new parted sentence
        text = text[:index] + sentence_parts + text[index+1:]

    # Exclusive OR = new trace
    if 'SCONJ' in tokens.keys():
        junction_counter += 1 # Count trace up
        if junction_counter > 1:
            trace += 1
            update = {'TRACE':trace}
            extracted_infos_copy = [{**d,**update} for d in extracted_infos_copy]
            extracted_infos += extracted_infos_copy
        else:
            # After first junction, make copy with disjunct entries
            extracted_infos_copy = copy.deepcopy(extracted_infos)
    else:
        junction_counter = 0
    
    # Extract information for extracted_infos
    extraction = {'TRACE':trace, 'OBJECT':'%', 'VERB':'%', 'SUBJECT':'%', 'STEP':index}
    for token in doc:
        if (token.pos_ == 'VERB'):
            extraction['VERB'] = token.lemma_
        elif (token.dep_=='sb'):
            extraction['SUBJECT'] = token.text
        elif (token.dep_=='oa'):
            extraction['OBJECT'] = token.text
        elif (token.dep_ in ['pg','nk']):
            extraction['OBJECT'] += " " + token.text
        elif (token.dep_ in ['svp']):
            extraction['VERB'] = token.text + " " + extraction['VERB']
    extracted_infos.append(extraction)
    

extracted_infos

[{'TRACE': 0,
  'OBJECT': 'Brot',
  'VERB': 'backen',
  'SUBJECT': 'Bäcker',
  'STEP': 0},
 {'TRACE': 0,
  'OBJECT': 'Brot dem Ofen',
  'VERB': 'holen',
  'SUBJECT': 'Bäcker',
  'STEP': 1},
 {'TRACE': 0,
  'OBJECT': '% der das',
  'VERB': 'prüfen',
  'SUBJECT': 'Brot',
  'STEP': 2},
 {'TRACE': 1,
  'OBJECT': 'Brot',
  'VERB': 'backen',
  'SUBJECT': 'Bäcker',
  'STEP': 0},
 {'TRACE': 1,
  'OBJECT': 'Brot dem Ofen',
  'VERB': 'holen',
  'SUBJECT': 'Bäcker',
  'STEP': 1},
 {'TRACE': 1,
  'OBJECT': 'Brot',
  'VERB': 'verkaufen',
  'SUBJECT': 'Bäcker',
  'STEP': 3},
 {'TRACE': 2,
  'OBJECT': 'Brot',
  'VERB': 'backen',
  'SUBJECT': 'Bäcker',
  'STEP': 0},
 {'TRACE': 2,
  'OBJECT': 'Brot dem Ofen',
  'VERB': 'holen',
  'SUBJECT': 'Bäcker',
  'STEP': 1},
 {'TRACE': 2,
  'OBJECT': 'Brot',
  'VERB': 'weg schmeißen',
  'SUBJECT': 'Bäcker',
  'STEP': 4}]

# Evaluation

In [69]:
reference_bpmn = 'test_3.bpmn'
generated_bpmn = '../src/static/assets/bpmn/process.xes'

## Reference BPMN

In [70]:
bpmn_graph = pm4py.read_bpmn(os.path.join(reference_bpmn))
# Petri Nets
net, im, fm = bpmn_converter.apply(bpmn_graph)

## Generated Event Log

In [71]:
event_log = xes_importer.apply(generated_bpmn)

parsing log, completed traces :: 100%|██████████| 2/2 [00:00<00:00, 2804.62it/s]


## Conformance checking

In [67]:
prec = precision_evaluator.apply(event_log, net, im, fm, variant=precision_evaluator.Variants.ETCONFORMANCE_TOKEN)
prec

fitness = replay_fitness_evaluator.apply(event_log, net, im, fm, variant=replay_fitness_evaluator.Variants.TOKEN_BASED)
fitness

replaying log with TBR, completed variants :: 100%|██████████| 2/2 [00:00<00:00, 9521.69it/s]
replaying log with TBR, completed variants :: 100%|██████████| 2/2 [00:00<00:00, 5866.16it/s]


{'perc_fit_traces': 0.0,
 'average_trace_fitness': 0.0,
 'log_fitness': 0.0,
 'percentage_of_fitting_traces': 0.0}

In [65]:
replayed_traces = token_replay.apply(event_log, net, im, fm)
replayed_traces

replaying log with TBR, completed variants :: 100%|██████████| 2/2 [00:00<00:00, 6172.63it/s]


[{'trace_is_fit': False,
  'trace_fitness': 0.0,
  'activated_transitions': [],
  'reached_marking': ['source:1'],
  'enabled_transitions_in_marking': {(Activity_0p87uh7, 'Identifiziere Bezahlmethode')},
  'transitions_with_problems': [],
  'missing_tokens': 1,
  'consumed_tokens': 1,
  'remaining_tokens': 1,
  'produced_tokens': 1},
 {'trace_is_fit': False,
  'trace_fitness': 0.0,
  'activated_transitions': [],
  'reached_marking': ['source:1'],
  'enabled_transitions_in_marking': {(Activity_0p87uh7, 'Identifiziere Bezahlmethode')},
  'transitions_with_problems': [],
  'missing_tokens': 1,
  'consumed_tokens': 1,
  'remaining_tokens': 1,
  'produced_tokens': 1}]