In [1]:
import pandas as pd
from pm4py.objects.conversion.log import converter as log_converter
from pm4py.objects.log.importer.xes import importer as xes_importer

# process mining 
from pm4py.algo.discovery.alpha import algorithm as alpha_miner
from pm4py.algo.discovery.inductive import algorithm as inductive_miner
from pm4py.algo.discovery.heuristics import algorithm as heuristics_miner
from pm4py.algo.discovery.dfg import algorithm as dfg_discovery

# viz
from pm4py.visualization.petrinet import visualizer as pn_visualizer
from pm4py.visualization.process_tree import visualizer as pt_visualizer
from pm4py.visualization.heuristics_net import visualizer as hn_visualizer
from pm4py.visualization.dfg import visualizer as dfg_visualization

# misc 
from pm4py.objects.conversion.process_tree import converter as pt_converter

Please pay extra attention to the naming convention presented in the table. There is some repetition in the columns, so you can clearly see how they were renamed to work well with the algorithms in pm4py. The default name indicating the case ID is case:concept:name, concept:name is the event, and lastly time:timestamp is the corresponding timestamp. In case the columns are not named this way, we can always rename them using the rename method of a pd.DataFrame. 

In [2]:

df = pd.read_csv("running-example.csv")
log = log_converter.apply(df)
log

[{'attributes': {'concept:name': 3, 'creator': 'Fluxicon Nitro'}, 'events': [{'Activity': 'register request', 'Costs': 50, 'Resource': 'Pete', 'concept:name': 'register request', 'org:resource': 'Pete', 'time:timestamp': '2010-12-30 14:32:00+01:00'}, '..', {'Activity': 'pay compensation', 'Costs': 200, 'Resource': 'Ellen', 'concept:name': 'pay compensation', 'org:resource': 'Ellen', 'time:timestamp': '2011-01-15 10:45:00+01:00'}]}, '....', {'attributes': {'concept:name': 4, 'creator': 'Fluxicon Nitro'}, 'events': [{'Activity': 'register request', 'Costs': 50, 'Resource': 'Pete', 'concept:name': 'register request', 'org:resource': 'Pete', 'time:timestamp': '2011-01-06 15:02:00+01:00'}, '..', {'Activity': 'reject request', 'Costs': 200, 'Resource': 'Ellen', 'concept:name': 'reject request', 'org:resource': 'Ellen', 'time:timestamp': '2011-01-12 15:44:00+01:00'}]}]

In [3]:
df.head()

Unnamed: 0,Activity,Costs,Resource,case:concept:name,case:creator,concept:name,org:resource,time:timestamp
0,register request,50,Pete,3,Fluxicon Nitro,register request,Pete,2010-12-30 14:32:00+01:00
1,examine casually,400,Mike,3,Fluxicon Nitro,examine casually,Mike,2010-12-30 15:06:00+01:00
2,check ticket,100,Ellen,3,Fluxicon Nitro,check ticket,Ellen,2010-12-30 16:34:00+01:00
3,decide,200,Sara,3,Fluxicon Nitro,decide,Sara,2011-01-06 09:18:00+01:00
4,reinitiate request,200,Sara,3,Fluxicon Nitro,reinitiate request,Sara,2011-01-06 12:18:00+01:00


## Alpha miner 

Alpha Miner is one of the best-known process discovery algorithms. In short, the algorithm scans the traces (sequences in the event log) for ordering relations and builds the footprint matrix. Then, it converts the matrix into a **Petri net (a type of graph)**. This video contains a more detailed description of the algorithm.


Running Alpha Miner results in the following:
* a Petri net model in which all the transitions are visible, unique, and correspond to the classified events.
* the initial marking — it describes the status of the Petri net model when the execution starts.
* the final marking — it describes the status of the Petri net model when the execution ends.

Process models expressed using Petri nets share a well-defined semantic: the execution of the process starts from the events included in the initial marking and finishes at the events included in the final marking.


Some of the characteristics of the algorithm:

* it cannot handle loops of length one or two,
* invisible and duplicated tasks cannot be discovered,
* the discovered model might not be sound (for a definition of model soundness in process mining, please refer to this video),
* it does not handle noise well.


In [4]:
#Initaite alpha miner

net, initial_marking, final_marking = alpha_miner.apply(log)

In [7]:
import os
os.environ["PATH"] += os.pathsep + 'D:/Program Files (x86)/Graphviz2.38/bin/'

In [8]:
# alpha miner
net, initial_marking, final_marking = alpha_miner.apply(log)

# viz
gviz = pn_visualizer.apply(net, initial_marking, final_marking)
pn_visualizer.view(gviz)

ExecutableNotFound: failed to execute ['dot', '-Tpng', '-O', 'tmp2ved_5if.gv'], make sure the Graphviz executables are on your systems' PATH