# Process Mining for Email Workflow

Use the 'pm4py' library to create a process mining model of a demo log.
The goal is to be able to identify issues in the SOC Toolkit logs with process mining.

## Process Mining Tools:
   - Alpha Miner
   - Petri Nets
   - Directly Follows Graph (time factor)
   - Replay
   - Diagnostics 

In [4]:
import os
import pm4py

import pandas as pd
from pm4py.objects.log.util import dataframe_utils
from pm4py.objects.conversion.log import converter as log_converter
from pm4py.objects.log.exporter.xes import exporter as xes_exporter
from pm4py.objects.log.importer.xes import importer as xes_importer

from pm4py.algo.discovery.heuristics import algorithm as heuristics_miner
from pm4py.visualization.petrinet import visualizer as pn_visualizer

In [7]:
class ProcessMining:
    def __init__(self):
        self._event_log = list()

    def print(self, show_slice_list = True) -> None:
        if(show_slice_list):
            print(self._event_log[0:5])
        else:
            print(self._event_log)

    def convert_to_xes(self, _csv_file_name: str, _xes_file_name: str) -> None:
        # parse csv to xes log file
        log_csv = pd.read_csv(_csv_file_name, sep=',')
        log_csv = dataframe_utils.convert_timestamp_columns_in_df(log_csv)
        log_csv = log_csv.sort_values('timestamp')
        parameters = {log_converter.Variants.TO_EVENT_LOG.value.Parameters.CASE_ID_KEY: 'case'}
        self._event_log = log_converter.apply(log_csv, parameters=parameters,
                                        variant=log_converter.Variants.TO_EVENT_LOG)
        xes_exporter.apply(self._event_log, _xes_file_name)


## 1. Long Correlator Case

In [8]:
mine = ProcessMining()
# Scenario 1
mine.convert_to_xes(_csv_file_name='./out/long_correlator_case.csv',
                     _xes_file_name='./out/long_correlator_case.xes')
mine.print(show_slice_list=True)

exporting log, completed traces ::   0%|          | 0/1000 [00:00<?, ?it/s]

[{'attributes': {'concept:name': 54}, 'events': [{'case': 54, 'timestamp': Timestamp('2021-01-12 17:18:00.431256+0000', tz='UTC'), 'activity': 'executed pre function of component auth_proxy'}, '..', {'case': 54, 'timestamp': Timestamp('2021-01-12 17:18:05.031256+0000', tz='UTC'), 'activity': 'executed post function of component guardia_api'}]}, {'attributes': {'concept:name': 76}, 'events': [{'case': 76, 'timestamp': Timestamp('2021-01-12 17:18:00.626463+0000', tz='UTC'), 'activity': 'executed pre function of component auth_proxy'}, '..', {'case': 76, 'timestamp': Timestamp('2021-01-12 17:18:10.126463+0000', tz='UTC'), 'activity': 'executed post function of component guardia_api'}]}, {'attributes': {'concept:name': 106}, 'events': [{'case': 106, 'timestamp': Timestamp('2021-01-12 17:18:00.692066+0000', tz='UTC'), 'activity': 'executed pre function of component auth_proxy'}, '..', {'case': 106, 'timestamp': Timestamp('2021-01-12 17:18:08.992066+0000', tz='UTC'), 'activity': 'executed po