In [2]:
import pandas as pd
import pycelonis
import yaml
from pycelonis.pql import PQL, PQLColumn, PQLFilter, OrderByColumn
from pycelonis_core.utils.errors import PyCelonisNotFoundError
import numpy as np
from collections import defaultdict
from src.data_integration.celonis_data_integration import get_connection, get_celonis_info, create_pool_and_model, check_invalid_table_in_celonis, execute_PQL_query
from src.data_integration.get_data import get_execution_time_per_res_per_act, get_unique_activity, get_unique_resource, get_res_act_relation, get_target_activity_with_start_end_timestamp 
from src.resource_based.resource_performance import resource_performance
from src.resource_based.batch_identification import batch_identification
from src.data_integration.get_data import get_caseid_activity_lifecycle_resource
from src.resource_based.find_high_rework_resources_analysis import find_high_rework_resources
from src.resource_based.find_deviations_analysis import find_deviations

In [4]:
    # get the data pool and data model of our project
    celonis = get_connection()
    data_pool, data_model, pool_name, model_name, case_column_name, act_column_name, time_column_name, res_column_name, lifecycle = get_celonis_info(
        celonis=celonis)

In [4]:

# check if one table is invalid (does not exist in our data pool/model)
if not check_invalid_table_in_celonis(data_model, table="receipt"):
    df = get_execution_time_per_res_per_act(data_model, "receipt", case_column_name, act_column_name,
                                            res_column_name, time_column_name)
else:
    print(f"No such table")

0it [00:00, ?it/s]

In [6]:
le, me = resource_performance(df)

In [7]:
le

Unnamed: 0,activity,the least efficient resource,avg_execution_time(min)
0,Confirmation of receipt,admin3,26695.886368
1,T02 Check confirmation of receipt,admin2,17878.510908
2,T03 Adjust confirmation of receipt,admin2,14006.592538
3,T04 Determine confirmation of receipt,admin3,11558.185783
4,T05 Print and send confirmation of receipt,admin2,22677.156725
5,T06 Determine necessity of stop advice,test,19936.693308
6,T07-1 Draft intern advice aspect 1,admin2,15178.597175
7,T07-2 Draft intern advice aspect 2,test,54488.6856
8,T07-3 Draft intern advice hold for aspect 3,Resource35,7222.095283
9,T07-4 Draft internal advice to hold for type 4,Resource32,4.72295


In [10]:
batch_data = get_target_activity_with_start_end_timestamp(data_model, "receipt", case_column_name, act_column_name,
                                            res_column_name, time_column_name)

0it [00:00, ?it/s]

In [11]:
resources = get_unique_resource(batch_data, "resource")
activities = get_unique_activity(batch_data, "activity")

In [12]:
import warnings

warnings.filterwarnings('ignore')

In [14]:
df_sim, df_seq, df_con = batch_identification(batch_data, activities)

In [5]:
df = get_caseid_activity_lifecycle_resource(data_model, "receipt", case_column_name, act_column_name, res_column_name, lifecycle)

0it [00:00, ?it/s]

In [6]:
df

Unnamed: 0,case_id,activity,transition,resource
0,case-10011,Confirmation of receipt,complete,Resource21
1,case-10011,T02 Check confirmation of receipt,complete,Resource10
2,case-10011,T03 Adjust confirmation of receipt,complete,Resource21
3,case-10017,Confirmation of receipt,complete,Resource30
4,case-10017,T06 Determine necessity of stop advice,complete,Resource30
...,...,...,...,...
7138,case-9997,Confirmation of receipt,complete,Resource06
7139,case-9997,T02 Check confirmation of receipt,complete,Resource06
7140,case-9997,T04 Determine confirmation of receipt,complete,Resource06
7141,case-9997,T05 Print and send confirmation of receipt,complete,Resource06


In [9]:
# find high rework resources
high_rework_resources = find_high_rework_resources(df, rework_threshold=1, count_threshold=2)
high_rework_resources

['Resource24', 'Resource15', 'Resource29', 'Resource09']

In [5]:
# get data using PQL
df = get_caseid_activity_lifecycle_resource(data_model, "receipt", case_column_name, act_column_name, res_column_name, lifecycle)
df

0it [00:00, ?it/s]

Unnamed: 0,case_id,activity,transition,resource
0,case-10011,Confirmation of receipt,complete,Resource21
1,case-10011,T02 Check confirmation of receipt,complete,Resource10
2,case-10011,T03 Adjust confirmation of receipt,complete,Resource21
3,case-10017,Confirmation of receipt,complete,Resource30
4,case-10017,T06 Determine necessity of stop advice,complete,Resource30
...,...,...,...,...
7138,case-9997,Confirmation of receipt,complete,Resource06
7139,case-9997,T02 Check confirmation of receipt,complete,Resource06
7140,case-9997,T04 Determine confirmation of receipt,complete,Resource06
7141,case-9997,T05 Print and send confirmation of receipt,complete,Resource06


In [6]:
deviations = find_deviations(df, 1)

In [7]:
deviations

Unnamed: 0,resource,unusual_activity,case
0,Resource21,T03 Adjust confirmation of receipt,case-10011
1,Resource21,T11 Create document X request unlicensed,case-10071
2,Resource21,T12 Check document X request unlicensed,case-10071
3,Resource21,T14 Determine document X request unlicensed,case-10071
4,Resource22,T07-1 Draft intern advice aspect 1,case-10073
...,...,...,...
104,Resource01,T19 Determine report Y to stop indication,case-9631
105,Resource33,T10 Determine necessity to stop indication,case-9635
106,Resource38,T02 Check confirmation of receipt,case-9635
107,Resource38,T04 Determine confirmation of receipt,case-9635
