In [72]:
import pandas as pd
import pm4py 
 
import numpy as np 

In [73]:
df_log = pd.read_csv('financial_log/financial_log_translated.csv', sep = ',')
df_log.head(10)

Unnamed: 0,Case ID,Activity,Resource,Complete Timestamp,Variant,Variant index,lifecycle:transition,concept:name,AMOUNT_REQ
0,173688,A_SUBMITTED-COMPLETE,112.0,2011-10-01 7:38:44.546,Variant 613,613,COMPLETE,A_SUBMITTED,20000
1,173688,A_PARTLYSUBMITTED-COMPLETE,112.0,2011-10-01 7:38:44.880,Variant 613,613,COMPLETE,A_PARTLYSUBMITTED,20000
2,173688,A_PREACCEPTED-COMPLETE,112.0,2011-10-01 7:39:37.906,Variant 613,613,COMPLETE,A_PREACCEPTED,20000
3,173688,W_COMPLETE_REQUEST-SCHEDULE,112.0,2011-10-01 7:39:38.875,Variant 613,613,SCHEDULE,W_COMPLETE_REQUEST,20000
4,173688,W_COMPLETE_REQUEST-START,,2011-10-01 18:36:46.437,Variant 613,613,START,W_COMPLETE_REQUEST,20000
5,173688,A_ACCEPTED-COMPLETE,10862.0,2011-10-01 18:42:43.308,Variant 613,613,COMPLETE,A_ACCEPTED,20000
6,173688,O_SELECTED-COMPLETE,10862.0,2011-10-01 18:45:09.243,Variant 613,613,COMPLETE,O_SELECTED,20000
7,173688,A_FINALIZED-COMPLETE,10862.0,2011-10-01 18:45:09.243,Variant 613,613,COMPLETE,A_FINALIZED,20000
8,173688,O_CREATED-COMPLETE,10862.0,2011-10-01 18:45:11.197,Variant 613,613,COMPLETE,O_CREATED,20000
9,173688,O_SENT-COMPLETE,10862.0,2011-10-01 18:45:11.380,Variant 613,613,COMPLETE,O_SENT,20000


In [74]:
event_log_pm4py = pm4py.format_dataframe(df_log, case_id='Case ID', activity_key='Activity',
                                         timestamp_key='Complete Timestamp', timest_format='%Y-%m-%d %H:%M:%S%z')

In [92]:
# num of events, num of cases, start and end activities, average of case durations
def basic_data_analysis(event_log):
    num_events = len(event_log)
    num_cases = len(event_log['Case ID'].unique())
    start_activities = pm4py.get_start_activities(event_log)
    end_activities = pm4py.get_end_activities(event_log)
    variants = pm4py.get_variants_as_tuples(event_log)
    mean_case_durations = np.round(
        np.mean(pm4py.stats.get_all_case_durations(event_log))/24/60/60, 3)
    print("Number of events: {}\nNumber of cases: {}\nStart activities: {}\nEnd activities: {}\nVariant Length: {}\nMean of case durations: {}".format(
        num_events, num_cases, start_activities, end_activities, len(variants),  mean_case_durations))


In [93]:
basic_data_analysis(event_log_pm4py)

Number of events: 262200
Number of cases: 13087
Start activities: {'A_SUBMITTED-COMPLETE': 13087}
End activities: {'A_DECLINED-COMPLETE': 3429, 'W_VALIDATE_REQUEST-COMPLETE': 2745, 'W_HANDLING_LEADS-COMPLETE': 2234, 'W_COMPLETE_REQUEST-COMPLETE': 1939, 'W_CALLING_AFTER_SENDING_OFFERS-COMPLETE': 1289, 'A_CANCELLED-COMPLETE': 655, 'W_CALLING_BACK_IMCOMPLETE FILES-COMPLETE': 452, 'O_CANCELLED-COMPLETE': 279, 'W_ASSESS_FRAUD-COMPLETE': 57, 'W_CHANGE_CONTRACT_DETAILS-SCHEDULE': 4, 'W_VALIDATE_REQUEST-START': 2, 'A_REGISTERED-COMPLETE': 1, 'W_CALLING_AFTER_SENDING_OFFERS-START': 1}
Variant Length: 4366
Mean of case durations: 8.624


## **Order Variants by Frequency**

In [138]:
# REF : https://www.kaggle.com/code/samhomsi/process-mining

from pm4py.statistics.traces.log import case_statistics
variants_count = case_statistics.get_variant_statistics(event_log_pm4py)
variants_count = sorted(variants_count, key=lambda x: x['count'], reverse=True)
variants_count[:10]

[{'variant': ('A_SUBMITTED-COMPLETE',
   'A_PARTLYSUBMITTED-COMPLETE',
   'A_DECLINED-COMPLETE'),
  'count': 3429},
 {'variant': ('A_SUBMITTED-COMPLETE',
   'A_PARTLYSUBMITTED-COMPLETE',
   'W_HANDLING_LEADS-SCHEDULE',
   'W_HANDLING_LEADS-START',
   'A_DECLINED-COMPLETE',
   'W_HANDLING_LEADS-COMPLETE'),
  'count': 1872},
 {'variant': ('A_SUBMITTED-COMPLETE',
   'A_PARTLYSUBMITTED-COMPLETE',
   'W_HANDLING_LEADS-SCHEDULE',
   'W_HANDLING_LEADS-START',
   'W_HANDLING_LEADS-COMPLETE',
   'W_HANDLING_LEADS-START',
   'A_DECLINED-COMPLETE',
   'W_HANDLING_LEADS-COMPLETE'),
  'count': 271},
 {'variant': ('A_SUBMITTED-COMPLETE',
   'A_PARTLYSUBMITTED-COMPLETE',
   'W_HANDLING_LEADS-SCHEDULE',
   'W_HANDLING_LEADS-START',
   'A_PREACCEPTED-COMPLETE',
   'W_COMPLETE_REQUEST-SCHEDULE',
   'W_HANDLING_LEADS-COMPLETE',
   'W_COMPLETE_REQUEST-START',
   'A_DECLINED-COMPLETE',
   'W_COMPLETE_REQUEST-COMPLETE'),
  'count': 209},
 {'variant': ('A_SUBMITTED-COMPLETE',
   'A_PARTLYSUBMITTED-COMPLETE',

## **Loan Approved & Declined & Canceled Cases**

In [146]:
print("[ALL]")
refined_event_log = pm4py.filter_event_attribute_values(
    event_log_pm4py, 'Activity', {'A_ACTIVATED-COMPLETE', 'A_DECLINED-COMPLETE', 'A_CANCELLED-COMPLETE'})
basic_data_analysis(refined_event_log)
print("[APPROVED]")
loan_approved_event_log = pm4py.filter_event_attribute_values(
    event_log_pm4py, 'Activity', {'A_ACTIVATED-COMPLETE'})
basic_data_analysis(loan_approved_event_log)
print("[DECLINED]")
loan_declined_event_log = pm4py.filter_event_attribute_values(
    event_log_pm4py, 'Activity', {'A_DECLINED-COMPLETE'})
basic_data_analysis(loan_declined_event_log)
# loan_declined_event_log.head(10)
print("[CANCELED]")
# loan_approved_event_log.head(10)
loan_canceled_event_log = pm4py.filter_event_attribute_values(
    event_log_pm4py, 'Activity', {'A_CANCELLED-COMPLETE'})
basic_data_analysis(loan_canceled_event_log)
# loan_declined_event_log.head(10)


[ALL]
Number of events: 249451
Number of cases: 12688
Start activities: {'A_SUBMITTED-COMPLETE': 12688}
End activities: {'A_DECLINED-COMPLETE': 3429, 'W_VALIDATE_REQUEST-COMPLETE': 2740, 'W_HANDLING_LEADS-COMPLETE': 2234, 'W_COMPLETE_REQUEST-COMPLETE': 1863, 'W_CALLING_AFTER_SENDING_OFFERS-COMPLETE': 1019, 'A_CANCELLED-COMPLETE': 655, 'W_CALLING_BACK_IMCOMPLETE FILES-COMPLETE': 407, 'O_CANCELLED-COMPLETE': 279, 'W_ASSESS_FRAUD-COMPLETE': 57, 'W_CHANGE_CONTRACT_DETAILS-SCHEDULE': 4, 'A_REGISTERED-COMPLETE': 1}
Variant Length: 4070
Mean of case durations: 8.31
[APPROVED]
Number of events: 99925
Number of cases: 2246
Start activities: {'A_SUBMITTED-COMPLETE': 2246}
End activities: {'W_VALIDATE_REQUEST-COMPLETE': 2046, 'W_CALLING_BACK_IMCOMPLETE FILES-COMPLETE': 194, 'W_CHANGE_CONTRACT_DETAILS-SCHEDULE': 4, 'A_REGISTERED-COMPLETE': 1, 'W_CALLING_AFTER_SENDING_OFFERS-COMPLETE': 1}
Variant Length: 2067
Mean of case durations: 16.735
[DECLINED]
Number of events: 70432
Number of cases: 7635
St

## **Task Efficiency of the employees (loan approval, productivity)**

In [113]:
loan_approved_event_log_event_level = pm4py.filter_event_attribute_values(
    event_log_pm4py, 'Activity', {'A_ACTIVATED-COMPLETE'}, level='event')
resources_approval = pm4py.get_event_attribute_values(
    loan_approved_event_log_event_level, "Resource")
resources_approval = dict(list(resources_approval.items())[:10])
print(resources_approval)

loan_declined_event_log_event_level = pm4py.filter_event_attribute_values(
    event_log_pm4py, 'Activity', {'A_DECLINED-COMPLETE'}, level='event')
resources_declined = pm4py.get_event_attribute_values(
    loan_declined_event_log_event_level, "Resource")
resources_declined = dict(list(resources_declined.items())[:10])
print(resources_declined)

loan_cancelled_event_log_event_level = pm4py.filter_event_attribute_values(
    event_log_pm4py, 'Activity', {'A_CANCELLED-COMPLETE'}, level='event')
resources_canceled = pm4py.get_event_attribute_values(
    loan_cancelled_event_log_event_level, "Resource")
resources_canceled = dict(list(resources_canceled.items())[:10])
print(resources_canceled)

{10138.0: 681, 10972.0: 518, 10629.0: 359, 10609.0: 335, 10809.0: 271, 11289.0: 68, 11339.0: 9, 112.0: 3, 10779.0: 2}
{112.0: 3429, 10910.0: 244, 11169.0: 238, 10609.0: 206, 11189.0: 172, 10138.0: 156, 10913.0: 155, 10861.0: 137, 10982.0: 133, 10629.0: 119}
{112.0: 1004, 11203.0: 108, 11119.0: 97, 11180.0: 95, 11181.0: 95, 10861.0: 85, 10913.0: 82, 10909.0: 76, 11201.0: 72, 11202.0: 68}


In [128]:
filtered_event_log = pm4py.filter_trace_attribute_values(
    event_log_pm4py, 'Activity', {'A_DECLINED-COMPLETE', 'A_CANCELLED-COMPLETE', 'A_ACTIVATED-COMPLETE'})
basic_data_analysis(filtered_event_log)

Number of events: 249451
Number of cases: 12688
Start activities: {'A_SUBMITTED-COMPLETE': 12688}
End activities: {'A_DECLINED-COMPLETE': 3429, 'W_VALIDATE_REQUEST-COMPLETE': 2740, 'W_HANDLING_LEADS-COMPLETE': 2234, 'W_COMPLETE_REQUEST-COMPLETE': 1863, 'W_CALLING_AFTER_SENDING_OFFERS-COMPLETE': 1019, 'A_CANCELLED-COMPLETE': 655, 'W_CALLING_BACK_IMCOMPLETE FILES-COMPLETE': 407, 'O_CANCELLED-COMPLETE': 279, 'W_ASSESS_FRAUD-COMPLETE': 57, 'W_CHANGE_CONTRACT_DETAILS-SCHEDULE': 4, 'A_REGISTERED-COMPLETE': 1}
Variant Length: 4070
Mean of case durations: 8.31


## **The main causes for driving outcomes of the loan application approval**

In [173]:
print("[APPROVED]")
money_approval = pm4py.get_event_attribute_values(
    loan_approved_event_log_event_level, "AMOUNT_REQ")
print(money_approval)

approved_money_total = 0
approved_money_max = 0
for key, value in money_approval.items():
    approved_money_total += key * value
    if(key >= approved_money_max):
        approved_money_max = key

print(approved_money_total / len(loan_approved_event_log_event_level))
print(approved_money_max)
  
print("[DECLINED]")
money_declined = pm4py.get_event_attribute_values(
    loan_declined_event_log_event_level, "AMOUNT_REQ")
print(money_declined)


declined_money_total = 0
declined_money_max = 0
for key, value in money_declined.items():
    declined_money_total += key * value
    if(key >= declined_money_max):
        declined_money_max = key
        
print(declined_money_total / len(loan_declined_event_log_event_level))
print(declined_money_max)

print("[CANCELLED]")
money_canceled = pm4py.get_event_attribute_values(
    loan_cancelled_event_log_event_level, "AMOUNT_REQ")
print(money_canceled)

cancelled_money_total = 0
cancelled_money_max = 0
for key, value in money_declined.items():
    cancelled_money_total += key * value
    if(key >= declined_money_max):
        cancelled_money_max = key
    
print(cancelled_money_total / len(loan_cancelled_event_log_event_level))
print(cancelled_money_max)


[APPROVED]
{15000: 281, 5000: 269, 10000: 226, 25000: 160, 20000: 117, 8000: 80, 6000: 77, 7500: 77, 30000: 73, 7000: 59, 12000: 52, 50000: 48, 35000: 35, 40000: 29, 12500: 29, 9000: 28, 16000: 27, 14000: 25, 17000: 23, 8500: 23, 11000: 21, 6500: 21, 13000: 20, 18000: 20, 5500: 18, 4000: 18, 21000: 16, 3000: 14, 22000: 13, 3500: 13, 32000: 12, 45000: 12, 13500: 10, 27000: 10, 17500: 9, 37500: 8, 27500: 8, 19000: 8, 26000: 7, 2500: 7, 28000: 7, 15500: 7, 29000: 7, 16500: 6, 33000: 6, 23000: 6, 1000: 5, 24000: 4, 11500: 4, 9500: 4, 10500: 4, 7300: 4, 38000: 4, 25500: 4, 7800: 4, 18500: 4, 22500: 3, 28500: 3, 42500: 3, 26500: 3, 34000: 3, 14500: 3, 47000: 3, 37000: 3, 4500: 3, 36000: 2, 21500: 2, 31000: 2, 30500: 2, 7700: 2, 60000: 2, 29500: 2, 23500: 2, 32500: 2, 19500: 2, 41000: 2, 6300: 2, 6800: 2, 7750: 2, 36500: 2, 9400: 2, 48000: 2, 10300: 1, 44000: 1, 9763: 1, 41382: 1, 65000: 1, 8200: 1, 17812: 1, 17050: 1, 42942: 1, 11300: 1, 15147: 1, 7843: 1, 7995: 1, 46627: 1, 1200: 1, 27082: 