# Log Parser for Email Workflow 

Define a parser to extract information out of logs. 
The final log needs to be in a valid, transformable XES structure as defined like:
  * case id   --> 'case:concept:name'
  * timestamp --> 'time:timestamp'
  * activity  --> 'concept:name'
  * resource  --> 'org:resource'

In [100]:
import os
import csv
import pandas as pd
from datetime import datetime

In [158]:
# define parser for log files
class LogParser:
    def __init__(self, file_name):
        self._file_name = file_name
        self._parsing_comp = list()

    def parse(self) -> None:
        if os.path.isfile(self._file_name): 
            for line in open(self._file_name, "r"):
                part = line.rstrip().split('::')
                self._parsing_comp.append(part)

    def print(self, show_slice_list = True) -> None:
        if(show_slice_list):
            print(self._parsing_comp[0:5])
        else:
            print(self._parsing_comp)

    def write_to_csv(self, out_file_name) -> None:
        if os.path.isfile(self._file_name): 
            with open(out_file_name, 'w') as outcsv:
                # configure writer to write standard csv file
                writer = csv.writer(outcsv, delimiter=',', quoting=csv.QUOTE_MINIMAL, lineterminator='\n')
                # sort timestamps
                sorted(self._parsing_comp)
                # Case ID, Timestamp, Activity
                writer.writerow(['case:concept:name', 'time:timestamp', 'concept:name', 'org:resource'])
                for item in self._parsing_comp:
                    # write item to outcsv
                    writer.writerow([item[1].split(' ')[1], item[2], item[5], item[5].split(' ')[5]])
            print('Successfully wrote log info to csv!')

    def clean(self) -> None:
        self._parsing_comp = None
        self._file_name = None

## 1. Long Correlator Case

In [159]:
parser = LogParser('./log/long_correlator_case.log')

In [160]:
parser.parse()
parser.print(show_slice_list=True)

[['CASE', 'id 0', '2021-01-12 17:18:05.517598', 'count 1', 'DEBUG', 'executed pre function of component auth_proxy'], ['CASE', 'id 0', '2021-01-12 17:18:05.617598', 'count 2', 'DEBUG', 'executed pre function of component guardia_api'], ['CASE', 'id 0', '2021-01-12 17:18:05.717598', 'count 3', 'DEBUG', 'executed pre function of component email_service'], ['CASE', 'id 0', '2021-01-12 17:18:05.817598', 'count 4', 'DEBUG', 'executed pre function of component indicator_parser'], ['CASE', 'id 0', '2021-01-12 17:18:05.917598', 'count 5', 'DEBUG', 'executed pre function of component worker']]


In [161]:
parser.write_to_csv('./out/long_correlator_case.csv')

Successfully wrote log info to csv!


In [162]:
parser.clean()

## 2. Long Correlator-Worker Case

In [118]:
parser2 = LogParser('./log/long_correlator_worker_case.log')

In [119]:
parser2.parse()
parser2.print(show_slice_list=True)

[['CASE', 'id 0', '2021-01-18 18:23:45.012778', 'count 1', 'DEBUG', 'executed pre function of component auth_proxy'], ['CASE', 'id 0', '2021-01-18 18:23:45.112778', 'count 2', 'DEBUG', 'executed pre function of component guardia_api'], ['CASE', 'id 0', '2021-01-18 18:23:45.212778', 'count 3', 'DEBUG', 'executed pre function of component email_service'], ['CASE', 'id 0', '2021-01-18 18:23:45.312778', 'count 4', 'DEBUG', 'executed pre function of component indicator_parser'], ['CASE', 'id 0', '2021-01-18 18:23:45.412778', 'count 5', 'DEBUG', 'executed post function of component worker']]


In [120]:
parser2.write_to_csv('./out/long_correlator_worker_case.csv')

Successfully wrote log info to csv!


In [121]:
parser2.clean()