In [1]:
import os
import warnings
warnings.filterwarnings(action='ignore')
import pandas as pd
from datetime import datetime
from utils.filtering import generate_system

org_path = os.getcwd()
input_path = os.sep.join([str(org_path), "input"])

In [2]:
# Show a list of data in 'input' folder
file_list = os.listdir(input_path)
csv_list = [s for s in file_list if 'credit-card-new.csv' in s]
print(csv_list)
ratio= 0.01

for dat in csv_list:
    # [Page for data import]
    event_log = pd.read_csv(input_path+ '\\' + dat)
    if sum( [x in dat for x in ['Small', 'Medium', 'Large', 'Huge', 'Wide']]) > 0:
        extracted_data = event_log[['Case', 'Activity', 'Timestamp']]
        form = "%Y/%m/%d %H:%M:%S.%f"    
    elif sum( [x in dat for x in ['credit-card-new']]) > 0:
        extracted_data = event_log[['Case ID', 'Activity', 'Start Timestamp', 'Resource']]
        extracted_data.columns = ["Case", "Activity", "Timestamp", 'Resource']
        form = "%Y-%m-%d %H:%M:%S.%f" 
    elif sum( [x in dat for x in ['mccloud', 'credit-card']]) > 0:
        extracted_data = event_log[['Case', 'Activity', 'Timestamp']]
        form = "%Y-%m-%d %H:%M:%S.%f" 
    
    else:
        extracted_data = event_log[['Case.ID', 'Activity', 'Complete.Timestamp']]
        form = "%Y-%m-%d %H:%M:%S.%f"

    if "Event" not in extracted_data.columns:
        extracted_data["Event"] = list(range(0,len(event_log.index)))

    extracted_data = extracted_data.sort_values(["Case", "Timestamp", "Activity"],ascending=[True, True, True]) # Reorder rows
    extracted_data.Case = extracted_data.Case.astype(str) 
    time = extracted_data['Timestamp'].apply(lambda x: datetime.strptime(x, form))
    extracted_data['Timestamp'] = time
    EL = extracted_data.copy()
    EL = EL.dropna(subset=['Case'])
    del extracted_data['Event']
    del EL['Event']

['credit-card-new.csv']


In [3]:
EL.head()

Unnamed: 0,Case,Activity,Timestamp,Resource
62310,0,Check for completeness,2023-09-29 09:00:00.000,Clerk-000001
62309,0,New online application received,2023-09-29 09:00:00.000,
62311,0,Perform checks,2023-09-29 09:08:36.418,Clerk-000003
62312,0,Make decision,2023-09-29 09:09:43.339,Manager-000001
62313,0,Notify accept,2023-09-29 09:16:37.243,Manager-000003


In [4]:
EL.Activity.unique()

array(['Check for completeness', 'New online application received',
       'Perform checks', 'Make decision', 'Notify accept', 'Deliver card',
       'EVENT 13 END', 'Request info', 'info received', 'notify reject',
       'time out', 'review request received'], dtype=object)

In [5]:
from patterns.FormBased import FormBased

In [12]:
EL_polluted = FormBased(EL,
                    which = ['Make decision', 'Notify accept', 'Deliver card'], 
                    ratio= 0.7 , 
                    tstart = "2023-09-26 09:00:00.000",
                    tend = "2023-12-26 09:00:00.000",
                    DecConstraint = "Chain Response[Make decision, Notify accept] |A.Resource is Manager-000001 |T.Resource is Manager-000003 |", 
                    case_id_key = "Case",
                    timestamp_key = "Timestamp")

Filtering step 1 . The number of cases in the time interval ( 2023-09-26 09:00:00.000 , 2023-12-26 09:00:00.000 ):  1505
Filtering step 2 . The number of cases by declare rule:  34
Filtering step 3 . The number of cases containing the defined subseq  ['Make decision', 'Notify accept', 'Deliver card'] :  34
Filtering step 4 . The number of cases to be filtered by defined random portion:  24


In [None]:
EL_polluted[EL_polluted['label'] !=  ""].head(6)

Unnamed: 0,Case,Activity,Timestamp,Resource,label
3,0,Make decision,2023-09-29 09:09:43.339,Manager-000001,form-based events(2023-09-29 09:09:43.339000)
4,0,Notify accept,2023-09-29 09:09:43.339,Manager-000003,form-based events(2023-09-29 09:16:37.243000)
5,0,Deliver card,2023-09-29 09:09:43.339,Manager-000002,form-based events(2023-09-29 09:39:19.386000)
26,1113,Make decision,2023-12-04 07:15:07.899,Manager-000001,form-based events(2023-12-04 07:15:07.899000)
27,1113,Notify accept,2023-12-04 07:15:07.899,Manager-000003,form-based events(2023-12-04 08:02:31.947000)
28,1113,Deliver card,2023-12-04 07:15:07.899,Manager-000002,form-based events(2023-12-04 08:08:22.361000)


In [None]:
from patterns.CollateralEvent import CollateralEvent

In [None]:
EL_polluted = CollateralEvent(EL, 
                    target_collats = "[Activity:'Make decision'>>('Make decision_signed1', 'Make decision_signed2')]",
                    ratio= 0.7 , 
                    timep = 1,
                    unit = 'sec',
                    tstart = "2023-09-26 09:00:00.000",
                    tend = "2023-12-26 09:00:00.000",
                    DecConstraint = "Chain Response[Make decision, Notify accept] |A.Resource is Manager-000001 |T.Resource is Manager-000003 |", 
                    case_id_key = "Case",
                    timestamp_key = "Timestamp",
                    activity_key = "Activity")

Filtering step 1 . The number of cases in the time interval ( 2023-09-26 09:00:00.000 , 2023-12-26 09:00:00.000 ):  1505
Filtering step 2 . The number of cases by declare rule:  34
Filtering step 3 . The number of cases to be filtered by defined random portion:  24


In [None]:
EL_polluted[EL_polluted['label'] !=  ""].head(6)

Unnamed: 0,Case,Activity,Timestamp,Resource,label
4,0,Make decision_signed1,2023-09-29 09:09:43.439,Manager-000001,collateral events(Make decision)
5,0,Make decision_signed2,2023-09-29 09:09:44.039,Manager-000001,collateral events(Make decision)
13,1085,Make decision_signed1,2023-12-01 13:15:18.496,Manager-000001,collateral events(Make decision)
14,1085,Make decision_signed2,2023-12-01 13:15:19.196,Manager-000001,collateral events(Make decision)
38,1163,Make decision_signed1,2023-12-06 03:27:38.606,Manager-000001,collateral events(Make decision)
39,1163,Make decision_signed2,2023-12-06 03:27:39.606,Manager-000001,collateral events(Make decision)


In [None]:
from patterns.ScatteredCase import ScatteredCase

In [None]:
ELsys = generate_system(EL, nsys = 10)

EL_polluted = ScatteredCase(ELsys,
                            log_name= 'EL_scattered2',
                            syslist = "[System:('System2', 'System3')]",
                            tstart = "2023-09-26 09:00:00.000",
                            tend = "2023-12-26 09:00:00.000",
                            case_id_key = "Case",
                            timestamp_key = "Timestamp")

In [None]:
EL_polluted[EL_polluted['label'] !=  ""].head(1)['label'][9454]

"Scattered cases(['Check for completeness', 'Make decision', 'Notify accept', 'EVENT 13 END'])"

In [7]:
from patterns.PollutedLabel import PollutedLabel

In [8]:
# Polluted Label
EL_polluted = PollutedLabel(EL, 
                            target = "[Activity:('Perform checks', 'Make decision')]",  # TBD: also numeric range
                            action = "[Activity]_[0-9:{2}][a-zA-Z:{5}]_[Timestamp*(%Y%m%d %H%M%S%f)]",
                            ratio = 0.7,
                            tstart = "2023-09-26 09:00:00.000",
                            tend = "2023-12-26 09:00:00.000",
                            DecConstraint = "Chain Response[Make decision, Notify accept] |A.Resource is Manager-000001 |T.Resource is Manager-000003 |", 
                            case_id_key = "Case",
                            timestamp_key = "Timestamp")

Filtering step 1 . The number of cases in the time interval ( 2023-09-26 09:00:00.000 , 2023-12-26 09:00:00.000 ):  1505
Filtering step 2 . The number of cases by declare rule:  34
Filtering step 3 . The number of cases to be filtered by defined random portion:  24


In [9]:
EL_polluted[EL_polluted['label'] !=  ""].head(6)

Unnamed: 0,Case,Activity,Timestamp,Resource,label
2,0,Perform checks_11WSVrt_20230929 090836418000,2023-09-29 09:08:36.418,Clerk-000003,polluted Label(Activity:'Perform checks')
3,0,Make decision_44coYig_20230929 090943339000,2023-09-29 09:09:43.339,Manager-000001,polluted Label(Activity:'Make decision')
39,1203,Perform checks_20LJtij_20231208 041050406000,2023-12-08 04:10:50.406,Clerk-000006,polluted Label(Activity:'Perform checks')
40,1203,Make decision_79amUPL_20231208 042426460000,2023-12-08 04:24:26.460,Manager-000002,polluted Label(Activity:'Make decision')
53,1203,Perform checks_27FUDto_20231208 061151533000,2023-12-08 06:11:51.533,Clerk-000002,polluted Label(Activity:'Perform checks')
54,1203,Make decision_78NMNYV_20231208 061902545000,2023-12-08 06:19:02.545,Manager-000001,polluted Label(Activity:'Make decision')


In [10]:
from patterns.ScatteredEvent import ScatteredEvent

In [11]:

# Scattered Events
EL_polluted, temp = ScatteredEvent(EL, 
                    target = "[Activity:'Make decision'>>('Make revision1', 'Make revision2')]",                                                   # "[Activity:('Make decision', 'Notify accept', 'Deliver card')]",
                    action = "[Resource]_[0-9:{2}][a-zA-Z:{5}]_[Timestamp*(%Y%m%d %H%M%S%f)]",
                    loc = "[Description:idx(-1)]",
                    Del = True, 
                    ratio = 0.7, 
                    tstart = "2023-09-26 09:00:00.000",
                    tend = "2023-12-26 09:00:00.000",
                    DecConstraint = "Chain Response[Make decision, Notify accept] |A.Resource is Manager-000001 |T.Resource is Manager-000003 |", 
                    case_id_key = "Case",
                    activity_key = "Activity",
                    timestamp_key = "Timestamp")

Filtering step 1 . The number of cases in the time interval ( 2023-09-26 09:00:00.000 , 2023-12-26 09:00:00.000 ):  1505
Filtering step 2 . The number of cases by declare rule:  34
Filtering step 3 . The number of cases to be filtered by defined random portion:  24


TypeError: bad operand type for unary +: 'str'

In [None]:
EL_polluted[EL_polluted['label'] !=  ""].head(6)

Unnamed: 0,Case,Activity,Timestamp,Resource,Description,label
30,1113,Make decision,2023-12-04 07:15:07.899,Manager-000001,"[Clerk-000003_79wkqEd_20231204 073055915000, M...",Scattered Events( Removed activities:['Make re...
37,1163,Make decision,2023-12-06 03:27:38.206,Manager-000001,"[Manager-000001_48PVuWS_20231206 032815018666,...",Scattered Events( Removed activities:['Make re...
44,1203,Make decision,2023-12-08 04:24:26.460,Manager-000002,"[Clerk-000003_01GBvEg_20231208 043454335000, n...",Scattered Events( Removed activities:['Make re...
107,1283,Make decision,2023-12-13 13:08:36.949,Manager-000001,"[Manager-000001_87hkvKx_20231213 131242134333,...",Scattered Events( Removed activities:['Make re...
114,1388,Make decision,2023-12-19 10:32:46.574,Manager-000005,"[nan_19MLcNs_20231219 103309015000, nan_15JhiU...",Scattered Events( Removed activities:['Make re...
126,1395,Make decision,2023-12-19 14:46:39.642,Manager-000001,"[Manager-000003_39RLxbo_20231219 144834820333,...",Scattered Events( Removed activities:['Make re...


In [24]:
EL_polluted.loc[30,'label']

"Scattered Events( Removed activities:['Make revision1', 'Make revision2'], Concatenated attr = ['Resource', 'Timestamp'])"

In [14]:
from patterns.SynonymousLabel import SynonymousLabel

In [15]:
EL_polluted = SynonymousLabel(EL, 
                            target = "[Activity:('Perform checks')]",
                            syns = ["Perform checks - Dep1",  "Perform checks - Dep2", "Perform checks - Dep3"],
                            prob = [0.1, 0.6, 0.3],
                            ratio = 0.7,
                            tstart = "2023-09-26 09:00:00.000",
                            tend = "2023-12-26 09:00:00.000",
                            DecConstraint = "Chain Response[Make decision, Notify accept] |A.Resource is Manager-000001 |T.Resource is Manager-000003 |", 
                            case_id_key = "Case",
                            timestamp_key = "Timestamp")


Filtering step 1 . The number of cases in the time interval ( 2023-09-26 09:00:00.000 , 2023-12-26 09:00:00.000 ):  1505
Filtering step 2 . The number of cases by declare rule:  34
Filtering step 3 . The number of cases to be filtered by defined random portion:  24


In [16]:
EL_polluted[EL_polluted['label'] !=  ""].head(10)

Unnamed: 0,Case,Activity,Timestamp,Resource,label
62311,0,Perform checks - Dep2,2023-09-29 09:08:36.418,Clerk-000003,synonymous label('Perform checks')
58807,97,Perform checks - Dep2,2023-10-04 14:33:28.710,Clerk-000005,synonymous label('Perform checks')
61807,154,Perform checks - Dep2,2023-10-09 03:56:24.704,Clerk-000002,synonymous label('Perform checks')
31073,283,Perform checks - Dep2,2023-10-13 14:46:37.679,Clerk-000001,synonymous label('Perform checks')
31084,283,Perform checks - Dep1,2023-10-13 16:24:12.376,Clerk-000005,synonymous label('Perform checks')
23902,510,Perform checks - Dep3,2023-10-30 01:22:06.896,Clerk-000002,synonymous label('Perform checks')
10406,556,Perform checks - Dep2,2023-11-01 01:51:12.379,Clerk-000005,synonymous label('Perform checks')
37400,572,Perform checks - Dep1,2023-11-01 19:54:51.647,Clerk-000003,synonymous label('Perform checks')
31715,602,Perform checks - Dep2,2023-11-02 17:51:56.297,Clerk-000004,synonymous label('Perform checks')
61640,701,Perform checks - Dep2,2023-11-09 11:24:17.882,Clerk-000001,synonymous label('Perform checks')


In [17]:
from patterns.HomonymousLabel import HomonymousLabel

In [18]:
# Homonymous Label
EL_polluted = HomonymousLabel(EL,
                            target = "[Activity:('Perform checks', 'Check for completeness')]",
                            hlabel = "Check:homonymous",
                            ratio = 0.7,
                            tstart = "2023-09-26 09:00:00.000",
                            tend = "2023-12-26 09:00:00.000",
                            DecConstraint = "Chain Response[Make decision, Notify accept] |A.Resource is Manager-000001 |T.Resource is Manager-000003 |", 
                            case_id_key = "Case",
                            timestamp_key = "Timestamp")

Filtering step 1 . The number of cases in the time interval ( 2023-09-26 09:00:00.000 , 2023-12-26 09:00:00.000 ):  1505
Filtering step 2 . The number of cases by declare rule:  34
Filtering step 3 . The number of cases to be filtered by defined random portion:  24


In [19]:
EL_polluted[EL_polluted['label'] !=  ""].head(5)

Unnamed: 0,Case,Activity,Timestamp,Resource,label
62310,0,Check:homonymous,2023-09-29 09:00:00.000,Clerk-000001,homonymous Label(Activity:'Check for completen...
62311,0,Check:homonymous,2023-09-29 09:08:36.418,Clerk-000003,homonymous Label(Activity:'Perform checks')
58806,97,Check:homonymous,2023-10-04 14:18:44.672,Clerk-000003,homonymous Label(Activity:'Check for completen...
58807,97,Check:homonymous,2023-10-04 14:33:28.710,Clerk-000005,homonymous Label(Activity:'Perform checks')
61800,154,Check:homonymous,2023-10-09 02:50:57.264,Clerk-000005,homonymous Label(Activity:'Check for completen...


In [20]:
from patterns.ElusiveCase import ElusiveCase

In [21]:
# Elusive Case 
EL_polluted = ElusiveCase(EL,
                        method = 'Variant', # 'Variant', 'KMeans'
                        gnum = 4,     # gnum = 4,   n_clusters = 100
                        ratio = 0.7,
                        tstart = "2023-09-26 09:00:00.000",
                        tend = "2023-12-26 09:00:00.000",
                        DecConstraint = "Chain Response[Make decision, Notify accept] |A.Resource is Manager-000001 |T.Resource is Manager-000003 |", 
                        case_id_key = "Case",
                        activity_key = "Activity",
                        timestamp_key = "Timestamp")

Filtering step 1 . The number of cases in the time interval ( 2023-09-26 09:00:00.000 , 2023-12-26 09:00:00.000 ):  1505
Filtering step 2 . The number of cases by declare rule:  34
Filtering step 3 . The number of cases to be filtered by defined random portion:  24
Generating draft IDs... (the below count is based on variant-level)


100%|██████████| 11/11 [00:00<00:00, 688.59it/s]


In [22]:
EL_polluted[EL_polluted['label'] !=  ""].head(5)

Unnamed: 0,Activity,Timestamp,Resource,draft_ID,0,1,label
0,Check for completeness,2023-09-29 09:00:00.000,Clerk-000001,,,,0
1,New online application received,2023-09-29 09:00:00.000,,,,,0
2,Perform checks,2023-09-29 09:08:36.418,Clerk-000003,,,,0
3,Make decision,2023-09-29 09:09:43.339,Manager-000001,,,,0
4,Notify accept,2023-09-29 09:16:37.243,Manager-000003,,,,0


In [23]:
from patterns.UnanchoredEvent import UnanchoredEvent

In [24]:

ELsys = generate_system(EL, nsys =10)
# Unanchored Event
EL_polluted = UnanchoredEvent(ELsys, 
                            syslist = "[System:('System1', 'System3')]",
                            TimeFormat = "%Y/%m/%d %H:%M:%S.%f", 
                            tstart = "2023-09-26 09:00:00.000",
                            tend = "2023-12-26 09:00:00.000",
                            DecConstraint = "Chain Response[Make decision, Notify accept] |A.Resource is Manager-000001 |T.Resource is Manager-000003 |", 
                            case_id_key = "Case",
                            timestamp_key = "Timestamp")

Filtering step 1 . The number of cases in the time interval ( 2023-09-26 09:00:00.000 , 2023-12-26 09:00:00.000 ):  1505
Filtering step 2 . The number of cases by declare rule:  34


In [25]:
EL_polluted[EL_polluted['label'] !=  ""].head(5)

Unnamed: 0,Case,Activity,Timestamp,Resource,System,label
0,0,Check for completeness,2023/09/29 09:00:00.000000,Clerk-000001,System3,unanchored event
248,97,Check for completeness,2023/10/04 14:18:44.672000,Clerk-000003,System3,unanchored event
425,154,Check for completeness,2023/10/09 02:50:57.264000,Clerk-000005,System3,unanchored event
426,154,Check for completeness,2023/10/09 02:58:29.207000,Clerk-000001,System3,unanchored event
427,154,Check for completeness,2023/10/09 03:32:32.364000,Clerk-000001,System3,unanchored event


In [27]:
from patterns.DistortedLabel import DistortedLabel

ImportError: cannot import name 'DistortedLabel' from 'patterns.DistortedLabel' (c:\Users\ADMIN\Desktop\FLAWD\patterns\DistortedLabel.py)

In [None]:
# Distorted Label
EL_polluted = DistortedLabel(EL, 
                          who = "[Resource:(random{m=0.05, s=0.01})]",    #[Resource:('Manager-000001)]
                          distortion = "[Activity:random(Skip, Insert, Interchange, UpLow, Proximity)]",  # "[Activity:({'Check for completeness':'check for completeness'})]",
                          ratio = 0.7,
                          tstart = "2023-09-26 09:00:00.000",
                          tend = "2023-12-26 09:00:00.000",
                          declare = "Chain Response[Make decision, Notify accept] |A.Resource is Manager-000001 |T.Resource is Manager-000003 |", 
                          case_id_key = "Case",
                          timestamp_key = "Timestamp")

In [None]:
EL_polluted[EL_polluted['label'] !=  ""].head(5)

In [None]:
from patterns.DistortedLabel import DistortedLabel