# Dotted Chart Analysis

In [3]:
# util
from pprint import pprint
import pandas as pd

# visualization
import plotly.express as px

In [4]:
import sys
sys.path.insert(1, '../')

from src.io import DOM_DEC, INT_DEC, PER, PRE, REQ, read_log, to_dataframe

## Import Event Log

In [5]:
log = read_log(DOM_DEC)

parsing log, completed traces :: 100%|██████████| 10357/10357 [00:04<00:00, 2279.24it/s]


## Dotted Chart Analysis

RQ: Can we observe batch processing behaviour?

In [9]:
time = []
case = []

for idx, trace in enumerate(log):
    for event in trace:
        case.append(idx)
        time.append(event['time:timestamp'])

In [10]:
fig = px.scatter(case, 
                    time, 
                    labels={
                        "index": "Case",
                        "x": "Time"
                    }, 
                    title='Events over time')
fig.show()

### Plot single event classes

#### List all event classes

In [11]:
classes = []

for trace in log:
    for event in trace:
        classes.append(event['concept:name'])
        
classes = list(set(classes))
print('There are %s unique event classes in the log.' %len(classes))

There are 14 unique event classes in the log.


In [12]:
classes

['Declaration SAVED by EMPLOYEE',
 'Declaration REJECTED by MISSING',
 'Declaration REJECTED by PRE_APPROVER',
 'Declaration FINAL_APPROVED by SUPERVISOR',
 'Declaration SUBMITTED by EMPLOYEE',
 'Declaration APPROVED by BUDGET OWNER',
 'Payment Handled',
 'Declaration APPROVED by ADMINISTRATION',
 'Declaration REJECTED by ADMINISTRATION',
 'Declaration APPROVED by PRE_APPROVER',
 'Request Payment',
 'Declaration REJECTED by EMPLOYEE',
 'Declaration REJECTED by SUPERVISOR',
 'Declaration REJECTED by BUDGET OWNER']

#### Visualization

Visualize the temporal distribution of the event to find potential batch processesing. We use Plotly to enable an interactive exploration.

In [13]:
time = []
case = []

event_class = classes[classes.index('Payment Handled')]

for idx, trace in enumerate(log):
    for event in trace:
        if event['concept:name'] == event_class:
            case.append(idx)
            time.append(event['time:timestamp'])

title = 'Temporal distribution of "' + event_class + '" events'
fig = px.scatter(case,
                 time, 
                 labels={
                     "index": "Case",
                     "x": "Time"
                 }, 
                 title=title)
fig.show()

### Findings:

The following activities show batching behaviour:
- Payment Handled
- Declaration REJECTED by MISSING
- Request Payment
- (Declaration REJECTED by BUDGET OWNER)
