# Introduction to Process Mining with PM4PY
Code is partially copied from this tutorial: https://medium.com/@c3_62722/process-mining-with-python-tutorial-a-healthcare-application-part-1-ae02027a050

In [None]:
import pandas as pd
import numpy as np
from datetime import date
from IPython.display import Markdown, display
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
fn = 'ArtificialPatientTreatment.csv'
events = pd.read_csv(fn)

events.columns = ['patient', 'action', 'resource', 'datetime']
events['datetime'] = pd.to_datetime(events['datetime'])
events.head()


In [None]:
print('{} has {} rows and {} columns.'.format(fn, events.shape[0], events.shape[1]))


In [None]:
## Get the case start times to get the time deltas for the 'age' of each activity with respect to start
case_starts_ends = events.pivot_table(index='patient', aggfunc={'datetime': ['min', 'max']})
case_starts_ends = case_starts_ends.reset_index()
case_starts_ends.columns = ['patient', 'caseend', 'casestart']
events = events.merge(case_starts_ends, on='patient')
events['relativetime'] = events['datetime'] - events['casestart']
events['caselength'] = events['caseend'] - events['casestart']
events.head()

In [None]:
events['action'] = events['action'].apply(lambda x: x.strip())

# Descriptive Statistics

### How many events per case?

In [None]:
events['patient'].value_counts().value_counts().plot(kind='bar')
plt.xlabel('Number of actions per patient')
plt.ylabel('Number of patients')

### Action occurrences

In [None]:
events['action'].value_counts().plot(kind='bar')
plt.xlabel('action')
plt.ylabel('number of occurrences')

### Scatterplot of sequences
relative time: Time since start of case

In [None]:
events['relativetime_s'] = events['relativetime'].dt.seconds + 86400*events['relativetime'].dt.days
ordered = events.sort_values(by=['caselength', 'patient', 'relativetime_s'])

In [None]:
## Time in seconds
ax = sns.scatterplot(x=ordered['relativetime_s'], y=ordered['patient'], hue=ordered['action'])
plt.yticks(np.arange(0, 100, 5));
plt.show()


# Process Mining

In [None]:
#!pip install pm4py --user
from pm4py.objects.conversion.log import converter as log_converter
from pm4py.objects.log.importer.xes import importer as xes_importer

# process mining 
from pm4py.algo.discovery.alpha import algorithm as alpha_miner
from pm4py.algo.discovery.inductive import algorithm as inductive_miner
from pm4py.algo.discovery.heuristics import algorithm as heuristics_miner
from pm4py.algo.discovery.dfg import algorithm as dfg_discovery
from pm4py.algo.discovery.footprints import algorithm as footprints_discovery


# viz
from pm4py.visualization.petri_net import visualizer as pn_visualizer
from pm4py.visualization.process_tree import visualizer as pt_visualizer
from pm4py.visualization.heuristics_net import visualizer as hn_visualizer
from pm4py.visualization.dfg import visualizer as dfg_visualization

# misc 
from pm4py.objects.conversion.process_tree import converter as pt_converter

In [None]:
eventlog = events.copy()
### Specify which columns correspond to case (case:concept:name), 
###event (concept:name) and timestamp (time:timestamp) - rename columns in accordance
###with pm4py

eventlog.rename(columns={'datetime': 'time:timestamp', 'patient': 'case:concept:name', 'action': 'concept:name', 'resource': 'org:resource'}, inplace=True)

## Convert to log format
log = log_converter.apply(eventlog)


### Footprint Analysis

In [None]:
# footprint analysis
fp_log = footprints_discovery.apply(eventlog, variant=footprints_discovery.Variants.ENTIRE_EVENT_LOG)

### Occurences of binary relations

In [None]:
fp_log['dfg']

### Causalities

In [None]:
fp_log['sequence']

### Parallel

In [None]:
fp_log['parallel']

## Footprint table

In [None]:
from pm4py.visualization.footprints import visualizer as fp_visualizer
gviz = fp_visualizer.apply(fp_log)
fp_visualizer.view(gviz)

### Directly-follows graph

In [None]:
#Create graph from log
dfg = dfg_discovery.apply(log)

# viz
gviz = dfg_visualization.apply(dfg, log=log, variant=dfg_visualization.Variants.FREQUENCY)
dfg_visualization.view(gviz)

### Alpha miner

In [None]:
# alpha miner
net, initial_marking, final_marking = alpha_miner.apply(log)

# Visualise
gviz = pn_visualizer.apply(net, initial_marking, final_marking)
pn_visualizer.view(gviz)

In [None]:
# add information about frequency to the viz 
parameters = {pn_visualizer.Variants.FREQUENCY.value.Parameters.FORMAT: "png"}
gviz2 = pn_visualizer.apply(net, initial_marking, final_marking, 
                           parameters=parameters, 
                           variant=pn_visualizer.Variants.FREQUENCY, 
                           log=log)

pn_visualizer.view(gviz2)

### Reachability graph

In [None]:
from pm4py.objects.petri_net.utils import reachability_graph
ts = reachability_graph.construct_reachability_graph(net, initial_marking)
            

In [None]:
from pm4py.visualization.transition_system import visualizer as ts_visualizer
gviz3 = ts_visualizer.apply(ts)
ts_visualizer.view(gviz3)
                                