In [1]:
%load_ext autoreload
%autoreload 2
import os
print(os.getcwd())
to_run = True
if to_run:
    os.chdir('..')
    to_run = False
print(os.getcwd())

/Users/aninauldum/Documents/Datalogi/REBS/opg1/notebooks
/Users/aninauldum/Documents/Datalogi/REBS/opg1


In [2]:
import pandas as pd
import pm4py
from copy import deepcopy

SyntaxError: invalid syntax (xml_dcr_portal.py, line 79)

Note: The pm4py library we use in the notebook is an extension with DCR from the original pm4py library. You will find more information on using pm4py here

https://pm4py.fit.fraunhofer.de/documentation.

# Event log data analysis
Note: sepsis_df is a pandas dataframe. Feel free to use your knowledge of data frames and data analysis as you see fit. Exploratory Data Analysis (EDA) as you know it from Data Science should work as expected.

In [None]:
event_log_file = 'notebooks/data/Sepsis Cases - Event Log.xes'
sepsis_df = pm4py.read_xes(event_log_file, infer_datetime_format=True) #loading the log
sepsis_df # note there are many NaN values, this is expected. When you have a not NaN value look at the concept:name to understand the link between the data attribute and the event.

In [None]:
returning = sepsis_df[sepsis_df['concept:name']=='Return ER']['case:concept:name'].unique().tolist()

# Part 2.1 Process discovery
Note: The model mined with this version of DisCoveR is more complex than the one from the dcrgraphs.net portal. It finds more relations.

Note 2: 'conditionsFor' represents the inverse reading of a condition arrow (this was done for efficient checking of the enabledness definition). 
    For example, you should read that 'Release D' is a conditionFor 'Return ER' ('Release D'-->*'Return ER'), but in the python object you will see it as 'conditionsFor': 'Return ER' {'Release D',...

Note 3: The other relations 'includesTo' (-->+), 'excludesTo' (-->%), 'responseTo' (*-->) are represented in the python object in the same direction as the arrows indicate. 
    For example, 'excludesTo': { ... 'Return ER': {'Return ER', 'Release B' ... encodes the visual self exclusion on 'Return ER' and that 'Return ER' excludes 'Release B' ('Return ER' -->% 'Release B') 

In [None]:
dcr_sepsis, log_abstraction = pm4py.discover_dcr(sepsis_df) # this discovers a model for the whole log
print('Inspect the mined DCR Graph from pm4py')
print(f'[Found relations] {dcr_sepsis.get_constraints()}')
print(f'[conditionsFor] {dcr_sepsis.conditions}')
print(f'[excludesTo] {dcr_sepsis.excludes}')
# when you discover (mine) a DCR Graph the marking you get will have all events included, none executed and none pending
print(f'[marking executed] {dcr_sepsis.marking.executed}')
print(f'[marking included] {dcr_sepsis.marking.included}')
print(f'[marking pending] {dcr_sepsis.marking.pending}')
pm4py.view_dcr(dcr_sepsis)

Example: play with the execution semantics in the notebook

In [None]:
def execute_and_check(event_to_execute,dcr_graph, dcr_semantics):
    enabled_events = dcr_semantics.enabled(dcr_graph)
    print(f'[Before executing {event_to_execute}] Enabled events: {enabled_events}') # check enabled events
    if event_to_execute in enabled_events: # check if I can execute the event_to_execute
        dcr_graph_new_marking = dcr_semantics.execute(dcr_graph,event_to_execute) # here you execute the event and change the state of the dcr graph
        print(f'[ After executing {event_to_execute}] Enabled events: {dcr_semantics.enabled(dcr_graph_new_marking)}') # now these are the newly enabled events
        print(f'[Is the graph accepting?] {dcr_semantics.is_accepting(dcr_graph_new_marking)}')
        return dcr_graph_new_marking
    else:
        print(f'[Error!] {event_to_execute} not in the set of enabled events')
        return dcr_graph

from pm4py.objects.dcr.semantics import DcrSemantics

temp_dcr = deepcopy(dcr_sepsis)
dcr_semantics = DcrSemantics()
event_to_execute1 = 'CRP' # update this event in order to simulate the execution of the event
temp_dcr = execute_and_check(event_to_execute1,temp_dcr,dcr_semantics)
event_to_execute2 = 'ER Sepsis Triage' # update this event in order to simulate the execution of the event
temp_dcr = execute_and_check(event_to_execute2,temp_dcr,dcr_semantics)

2.3 (in the notebook): Now I can make 'Release D' pending. This has an effect on my acceptance criteria. Also I cannot execute 'Release D' yet, because I have to satisfy the condition chain which will eventually enable 'Release D'.
What is the minimal sequence of events I need to execute before I am able to execute 'Release D' and return into an accepting state for my DCR Graph? 

In [None]:
temp_dcr = deepcopy(dcr_sepsis)
dcr_semantics = DcrSemantics()
temp_dcr.marking.pending.add('Release D')
print(f'[marking pending] {temp_dcr.marking.pending}')
#TODO: execute the minimum number of events to enable 'Release D'
for event in ['ER Registration','ER Triage', 'Admission NC', 'ER Sepsis Triage', 'CRP', 'Leucocytes', 'Release D']:
    temp_dcr = execute_and_check(event,temp_dcr,dcr_semantics)

In [None]:
returning_df =sepsis_df[sepsis_df['case:concept:name'].isin(returning)]
non_returning_df = sepsis_df[~sepsis_df['case:concept:name'].isin(returning)]
#TODO: 1.5 run DisCoveR, export to the models folder, load them into the portal, run some descriptive statistics as seen above on the two subsets.
non_returning_dcr, _ = pm4py.discover_dcr(non_returning_df)
returning_dcr, _ = pm4py.discover_dcr(returning_df)

In [None]:
pm4py.save_vis_dcr(returning_dcr,file_path='notebooks/img/returning_conformance.png')
pm4py.view_dcr(returning_dcr)
pm4py.save_vis_dcr(non_returning_dcr,file_path='notebooks/img/non_returning_conformance.png')
pm4py.view_dcr(non_returning_dcr)

# Part 2.2 Conformance checking
### Rule based conformance checking
Note: Testing for conformance on the same data you used to discover your model will always be a perfect fit

In [None]:
conformance_sepsis_df = pd.DataFrame(pm4py.conformance_dcr(sepsis_df,dcr_sepsis))
print(f'[Deviation fitness] {conformance_sepsis_df["dev_fitness"].mean()}')
conformance_sepsis_df[conformance_sepsis_df['is_fit']==False]

### Choice conformance

In [None]:
choice_graph = deepcopy(dcr_sepsis)
choice_graph.conditions = {}
choice_graph.excludes = {'Admission NC':{'Admission NC','Admission IC'},'Admission IC':{'Admission IC','Admission NC'}}
choice_graph.events = {'Admission NC','Admission IC'}
choice_graph.marking.included = {'Admission NC','Admission IC'}
pm4py.view_dcr(choice_graph)
pm4py.save_vis_dcr(choice_graph,file_path='notebooks/img/choice_conformance.png')

In [None]:
conformance_sepsis_df = pd.DataFrame(pm4py.conformance_dcr(sepsis_df, choice_graph))
print(f'[Deviation fitness] {conformance_sepsis_df["dev_fitness"].mean()}')
print('Conforming traces:',len(conformance_sepsis_df[conformance_sepsis_df['is_fit']==True]))
print('Non-conforming traces',len(conformance_sepsis_df[conformance_sepsis_df['is_fit']==False]))
print(conformance_sepsis_df[conformance_sepsis_df['is_fit']==False]['deviations'].value_counts())

In [None]:
conformance_sepsis_df = pd.DataFrame(pm4py.conformance_dcr(returning_df, choice_graph))
print(f'[Deviation fitness] {conformance_sepsis_df["dev_fitness"].mean()}')
print('Conforming traces:',len(conformance_sepsis_df[conformance_sepsis_df['is_fit']==True]))
print('Non-conforming traces',len(conformance_sepsis_df[conformance_sepsis_df['is_fit']==False]))
conformance_sepsis_df[conformance_sepsis_df['is_fit']==False]['deviations'].value_counts()

In [None]:
conformance_sepsis_df = pd.DataFrame(pm4py.conformance_dcr(non_returning_df, choice_graph))
print(f'[Deviation fitness] {conformance_sepsis_df["dev_fitness"].mean()}')
print('Conforming traces:',len(conformance_sepsis_df[conformance_sepsis_df['is_fit']==True]))
print('Non-conforming traces',len(conformance_sepsis_df[conformance_sepsis_df['is_fit']==False]))
conformance_sepsis_df[conformance_sepsis_df['is_fit']==False]['deviations'].value_counts()

### Sequence conformance

In [None]:
sequence_graph = deepcopy(dcr_sepsis)
sequence_graph.conditions = {'ER Triage': {'ER Registration'}, 'ER Sepsis Triage':{'ER Triage'}}
sequence_graph.excludes = {}
sequence_graph.responses = {'ER Registration': {'ER Triage'}, 'ER Triage': {'ER Sepsis Triage'}}
sequence_graph.events = {'ER Registration','ER Sepsis Triage','ER Triage'}
sequence_graph.marking.included = {'ER Registration','ER Sepsis Triage','ER Triage'}
pm4py.view_dcr(sequence_graph)
pm4py.save_vis_dcr(sequence_graph,file_path='notebooks/img/sequence_conformance.png')

In [None]:
conformance_sepsis_df = pd.DataFrame(pm4py.conformance_dcr(sepsis_df,sequence_graph))
print(f'[Deviation fitness] {conformance_sepsis_df["dev_fitness"].mean()}')
print('Conforming traces:',len(conformance_sepsis_df[conformance_sepsis_df['is_fit']==True]))
print('Non-conforming traces',len(conformance_sepsis_df[conformance_sepsis_df['is_fit']==False]))
conformance_sepsis_df[conformance_sepsis_df['is_fit']==False]['deviations'].value_counts()

In [None]:
conformance_sepsis_df = pd.DataFrame(pm4py.conformance_dcr(returning_df,sequence_graph))
print(f'[Deviation fitness] {conformance_sepsis_df["dev_fitness"].mean()}')
print('Conforming traces:',len(conformance_sepsis_df[conformance_sepsis_df['is_fit']==True]))
print('Non-conforming traces',len(conformance_sepsis_df[conformance_sepsis_df['is_fit']==False]))
conformance_sepsis_df[conformance_sepsis_df['is_fit']==False]['deviations'].value_counts()

In [None]:
conformance_sepsis_df = pd.DataFrame(pm4py.conformance_dcr(non_returning_df,sequence_graph))
print(f'[Deviation fitness] {conformance_sepsis_df["dev_fitness"].mean()}')
print('Conforming traces:',len(conformance_sepsis_df[conformance_sepsis_df['is_fit']==True]))
print('Non-conforming traces',len(conformance_sepsis_df[conformance_sepsis_df['is_fit']==False]))
conformance_sepsis_df[conformance_sepsis_df['is_fit']==False]['deviations'].value_counts()

## Alignment based conformance
Note: Testing for conformance on the same data you used to discover your model will always be a perfect fit

In [None]:
alignment_sepsis_df = pd.DataFrame(pm4py.optimal_alignment_dcr(sepsis_df,dcr_sepsis))
print(f'[Alignment fitness] {alignment_sepsis_df["fitness"].mean()}')
alignment_sepsis_df[alignment_sepsis_df['fitness']<1]

In [None]:
alignment_sepsis_df = pd.DataFrame(pm4py.optimal_alignment_dcr(sepsis_df,choice_graph))
print(f'[Deviation fitness] {alignment_sepsis_df["fitness"].mean()}')
print(len(alignment_sepsis_df[alignment_sepsis_df['fitness']==1]))
print(len(alignment_sepsis_df[alignment_sepsis_df['fitness']!=1]))
alignment_sepsis_df[alignment_sepsis_df['fitness']!=1]['alignment'].value_counts()

In [None]:
alignment_sepsis_df = pd.DataFrame(pm4py.optimal_alignment_dcr(returning_df,choice_graph))
print(f'[Deviation fitness] {alignment_sepsis_df["fitness"].mean()}')
print(len(alignment_sepsis_df[alignment_sepsis_df['fitness']==1]))
print(len(alignment_sepsis_df[alignment_sepsis_df['fitness']!=1]))
alignment_sepsis_df[alignment_sepsis_df['fitness']!=1]['alignment'].value_counts()

In [None]:
alignment_sepsis_df = pd.DataFrame(pm4py.optimal_alignment_dcr(non_returning_df, choice_graph))
print(f'[Deviation fitness] {alignment_sepsis_df["fitness"].mean()}')
print(len(alignment_sepsis_df[alignment_sepsis_df['fitness']==1]))
print(len(alignment_sepsis_df[alignment_sepsis_df['fitness']!=1]))
alignment_sepsis_df[alignment_sepsis_df['fitness']!=1]['alignment'].value_counts()

In [None]:
alignment_sepsis_df = pd.DataFrame(pm4py.optimal_alignment_dcr(sepsis_df,sequence_graph))
print(f'[Deviation fitness] {alignment_sepsis_df["fitness"].mean()}')
print(len(alignment_sepsis_df[alignment_sepsis_df['fitness']==1]))
print(len(alignment_sepsis_df[alignment_sepsis_df['fitness']!=1]))
alignment_sepsis_df[alignment_sepsis_df['fitness']!=1]['alignment'].value_counts()

In [None]:
alignment_sepsis_df = pd.DataFrame(pm4py.optimal_alignment_dcr(returning_df,sequence_graph))
print(f'[Deviation fitness] {alignment_sepsis_df["fitness"].mean()}')
print(len(alignment_sepsis_df[alignment_sepsis_df['fitness']==1]))
print(len(alignment_sepsis_df[alignment_sepsis_df['fitness']!=1]))
alignment_sepsis_df[alignment_sepsis_df['fitness']!=1]['alignment'].value_counts()

In [None]:
alignment_sepsis_df = pd.DataFrame(pm4py.optimal_alignment_dcr(non_returning_df,sequence_graph))
print(f'[Deviation fitness] {alignment_sepsis_df["fitness"].mean()}')
print(len(alignment_sepsis_df[alignment_sepsis_df['fitness']==1]))
print(len(alignment_sepsis_df[alignment_sepsis_df['fitness']!=1]))
alignment_sepsis_df[alignment_sepsis_df['fitness']!=1]['alignment'].value_counts()