# Case Study

### Loading the data set

In [None]:
import pm4py

print("Loading event log...")
sales_df = pm4py.read.read_xes("./data/BPI_Challenge_2019.xes")

filtered_sales_df = pm4py.filter_variants_top_k(sales_df, 5)

### Discovering the process

In [None]:
from pm4py.objects.bpmn.layout import layouter

bpmn = pm4py.discover_bpmn_inductive(filtered_sales_df, activity_key='concept:name', case_id_key='case:concept:name', timestamp_key='time:timestamp')
bpmn_layouted = layouter.apply(bpmn)

pm4py.view_bpmn(bpmn_layouted, format='png')

### Loading the deviating event log

In [None]:
# In the documentation of this project, it is described how exactly the deviating event log is simulated
# For simplicity purposes, the event log here has already been generated under data/deviating_event_log.csv
import pandas as pd

dataframe = pd.read_csv('data/deviating_event_log.csv', sep=',')
dataframe = pm4py.format_dataframe(dataframe, case_id='case:concept:name', activity_key='concept:name', timestamp_key='time:timestamp')
altered_event_log = pm4py.convert_to_event_log(dataframe)


### Conformance checking

In [None]:
# Discovering a Petri net on the original event log
petri_net, initial_markings, final_markings = pm4py.discovery.discover_petri_net_inductive(filtered_sales_df, activity_key='concept:name', case_id_key='case:concept:name', timestamp_key='time:timestamp')

# Replaying the altered event log against the original petri net
replayed_traces = pm4py.conformance_diagnostics_token_based_replay(altered_event_log, filtered_sales_df, initial_markings, final_markings)
replayed_traces

### Statistics

In [None]:
# Event distribution graph
pm4py.view_events_distribution_graph(filtered_sales_df, distr_type="months", format="png")

In [None]:
# Case arrival and case dispersion ratio
from pm4py.statistics.traces.generic.log import case_arrival

case_arrival_ratio = pm4py.get_case_arrival_average(filtered_sales_df)
case_dispersion_ratio = case_arrival.get_case_dispersion_avg(filtered_sales_df, parameters={case_arrival.Parameters.TIMESTAMP_KEY: "time:timestamp"})

print("Case arrival ratio:", case_arrival_ratio)
print("Case dispersion ratio:", case_dispersion_ratio)