In [1]:
import dice_ml
from dice_ml import Dice
from dice_ml.utils.exception import UserConfigValidationException

from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_iris, fetch_california_housing

from src.transition_system import transition_system, indexs_for_window, list_to_str
from src.function_store import StoreTestRun, extract_algo_name, generate_cfe, get_case_id, prepare_df_for_ml, \
    activity_n_resources, get_test_cases, get_prefix_of_activities, validate_transition

import pandas as pd
import os
import pickle
import random
import subprocess
from IPython.display import display
from math import ceil
import warnings
from wrapt_timeout_decorator import timeout

# from multiprocessing import Process
# from timeout_decorator import timeout, TimeoutError
# import signal
from typing import Tuple, Any
from collections import Counter
import utils
from time import sleep
if "src" in os.getcwd():
    os.chdir("../")

# Suppress all warnings
warnings.filterwarnings("ignore")
%load_ext autoreload
%autoreload 2

# Dataset: VINST (Volvo Belgium)
link: https://www.win.tue.nl/bpi/doku.php?id=2013:challenge

## Setup Variables

In [2]:
pd.options.display.max_columns= None
KPI = "total_time"  # activity_occurrence, total_time, ...
SECONDS_TO_HOURS = 60 * 60
SECONDS_TO_DAYS = 60 * 60 * 24
WINDOW_SIZE = 3
REDUCED_KPI_TIME = 90
TOTAL_CFS = 15                         # Number of CFs DiCE algorithm should produce
TRAIN_DATA_SIZE = 10_066               # 31_066
DICE_METHOD = "random"
RESULTS_FILE_PATH_N_NAME = "experiment_results/genetic-04-total_time.csv"
proximity_weight = 0.2
sparsity_weight = 0.2
diversity_weight = 5.0

case_id_name = 'SR_Number'  # The case identifier column name.
start_date_name = 'Change_Date+Time'  # Maybe change to start_et (start even time)
activity_column_name = "ACTIVITY"

## Load Data

In [3]:
data_dir = "./preprocessed_datasets/"
train_dataset_file = "train-set-cfe.csv"
test_dataset_file = "test-set-cfe.csv"
test_pickle_dataset_file = "test-set-cfe.pkl"
df = pd.read_csv("./data/VINST cases incidents.csv")  # Use full dataset for transition systens
df_train = pd.read_csv(os.path.join(data_dir, train_dataset_file))
df_test = pd.read_csv(os.path.join(data_dir, test_dataset_file))

### Basic Preprocessing of the dataset

In [4]:
df = df.fillna("missing")

df_train = df_train[:TRAIN_DATA_SIZE]  # 31_066
# df_test = df_test[: 19_041]
print(f"Rows in df_train: {len(df_train):,}")
print(f"Rows in df_train: {len(df_test):,}")
# df_train.info()

Rows in df_train: 10,066
Rows in df_train: 19,041


### Helper functions

In [5]:

# valid_resources = activity_n_resources(df_train, ["ACTIVITY", "Involved_ST_Function_Div", "Involved_Org_line_3", "Involved_ST"])
resource_columns_to_validate = [activity_column_name, 'Involved_ST_Function_Div', 'Involved_Org_line_3',
                                'Involved_ST', 'Country', 'Owner_Country']
valid_resources = activity_n_resources(df, resource_columns_to_validate)
# len(valid_resources)

### Prepare the Test Dataset

In [5]:
# === Unpickle the Standard test-set. To standardize the test across different parameters.
test_cases = get_test_cases(None, None, load_dataset=True, path_and_filename=os.path.join(data_dir, test_pickle_dataset_file))
print(f"Number of cases in the test set: {len(test_cases):,}")

# # === Pickle dataset for comparison of different methods
# with open(os.path.join(data_dir, test_pickle_dataset_file), 'wb') as file:
#     pickle.dump(test_cases, file)

Number of cases in the test set: 2,493


In [9]:
# ### Features that can vary
# option 1:
# cols_to_vary = [col for col in df_train.columns if col[0] == '#']
# cols_to_vary.extend(["ACTIVITY"])

# option 2:
cols_to_vary = ["ACTIVITY", "Involved_ST_Function_Div", "Involved_Org_line_3", "Involved_ST"]

outcome_name = "lead_time"

X_train, y_train = prepare_df_for_ml(df_train, case_id_name, outcome_name, columns_to_remove=["Change_Date+Time", "time_remaining"])
# X_test, y_test = prepare_df_for_ml(df_test, outcome_name)

continuous_features = ["time_from_first", "time_from_previous_et", "time_from_midnight", "# ACTIVITY=In Progress", "# ACTIVITY=Awaiting Assignment",
                       "# ACTIVITY=Resolved", "# ACTIVITY=Assigned", "# ACTIVITY=Closed", "# ACTIVITY=Wait - User", "# ACTIVITY=Wait - Implementation", "# ACTIVITY=Wait",
                       "# ACTIVITY=Wait - Vendor", "# ACTIVITY=In Call", "# ACTIVITY=Wait - Customer", "# ACTIVITY=Unmatched", "# ACTIVITY=Cancelled"]
categorical_features = ["Status", "ACTIVITY", "Involved_ST_Function_Div", "Involved_Org_line_3", "Involved_ST", "SR_Latest_Impact", "Product", "Country", "Owner_Country",
                        "weekday"]

# We create the preprocessing pipelines for both numeric and categorical data.
numeric_transformer = Pipeline(steps=[
    ('scaler', StandardScaler())])

categorical_transformer = Pipeline(steps=[
    ('onehot', OneHotEncoder(handle_unknown='ignore'))])

transformations = ColumnTransformer(
    transformers=[
        ('num', numeric_transformer, continuous_features),
        ('cat', categorical_transformer, categorical_features)])

# Append classifier to preprocessing pipeline.
# Now we have a full prediction pipeline.
clf = Pipeline(steps=[('preprocessor', transformations),
                           ('classifier', RandomForestRegressor(n_jobs=7))])
model = clf.fit(X_train, y_train)

# ## Create DiCE model
data_model = dice_ml.Data(dataframe=pd.concat([X_train, y_train], axis="columns"),
                      continuous_features=continuous_features,
                      outcome_name=outcome_name)

# We provide the type of model as a parameter (model_type)
ml_backend = dice_ml.Model(model=model, backend="sklearn", model_type='regressor')
method = DICE_METHOD  # genetic, kdtree, random
explainer = Dice(data_model, ml_backend, method=method)  # Method random does not support features_weights argument in generate_counterfactuals()
# exp_genetic_iris = Dice(d_iris, m_iris, method="kdtree")

## Experiment with Single Queries

In [10]:
sidx = 87
eidx = 97

df_train[sidx: eidx]
# df_train[sidx: eidx]

Unnamed: 0,SR_Number,Change_Date+Time,Status,ACTIVITY,Involved_ST_Function_Div,Involved_Org_line_3,Involved_ST,SR_Latest_Impact,Product,Country,Owner_Country,time_from_first,time_from_previous_et,time_from_midnight,weekday,time_remaining,# ACTIVITY=In Progress,# ACTIVITY=Awaiting Assignment,# ACTIVITY=Resolved,# ACTIVITY=Assigned,# ACTIVITY=Closed,# ACTIVITY=Wait - User,# ACTIVITY=Wait - Implementation,# ACTIVITY=Wait,# ACTIVITY=Wait - Vendor,# ACTIVITY=In Call,# ACTIVITY=Wait - Customer,# ACTIVITY=Unmatched,# ACTIVITY=Cancelled,lead_time
87,1-506071646,1299572184,Accepted,In Progress,missing,Org line V7n,V37 2nd,Medium,PROD542,nl,Netherlands,81256.0,8430.0,29784.0,Tuesday,37206306.0,8,5,0,0,0,1,0,0,0,0,0,0,0,37287562.0
88,1-506071646,1299572298,Queued,Awaiting Assignment,A2_1,Org line A2,D8,Medium,PROD542,nl,Netherlands,81370.0,114.0,29898.0,Tuesday,37206192.0,9,5,0,0,0,1,0,0,0,0,0,0,0,37287562.0
89,1-506071646,1299572432,Accepted,In Progress,A2_1,Org line A2,D8,Medium,PROD542,nl,POLAND,81504.0,134.0,30032.0,Tuesday,37206058.0,9,6,0,0,0,1,0,0,0,0,0,0,0,37287562.0
90,1-506071646,1299572566,Accepted,Wait - User,A2_1,Org line A2,D8,Medium,PROD542,nl,POLAND,81638.0,134.0,30166.0,Tuesday,37205924.0,10,6,0,0,0,1,0,0,0,0,0,0,0,37287562.0
91,1-506071646,1299651374,Accepted,In Progress,A2_1,Org line A2,D8,Medium,PROD542,nl,POLAND,160446.0,78808.0,22574.0,Wednesday,37127116.0,10,6,0,0,0,2,0,0,0,0,0,0,0,37287562.0
92,1-506071646,1299651436,Queued,Awaiting Assignment,missing,Org line V7n,V37 2nd,Medium,PROD542,nl,POLAND,160508.0,62.0,22636.0,Wednesday,37127054.0,11,6,0,0,0,2,0,0,0,0,0,0,0,37287562.0
93,1-506071646,1299686621,Accepted,In Progress,missing,Org line V7n,V37 2nd,Medium,PROD542,nl,Netherlands,195693.0,35185.0,57821.0,Wednesday,37091869.0,11,7,0,0,0,2,0,0,0,0,0,0,0,37287562.0
94,1-506071646,1299686712,Queued,Awaiting Assignment,A2_1,Org line A2,D8,Medium,PROD542,nl,Netherlands,195784.0,91.0,57912.0,Wednesday,37091778.0,12,7,0,0,0,2,0,0,0,0,0,0,0,37287562.0
95,1-506071646,1299756545,Accepted,In Progress,A2_1,Org line A2,D8,Medium,PROD542,nl,POLAND,265617.0,69833.0,41345.0,Thursday,37021945.0,12,8,0,0,0,2,0,0,0,0,0,0,0,37287562.0
96,1-506071646,1299757653,Accepted,Wait - User,A2_1,Org line A2,D8,Medium,PROD542,nl,POLAND,266725.0,1108.0,42453.0,Thursday,37020837.0,13,8,0,0,0,2,0,0,0,0,0,0,0,37287562.0


In [11]:
sidx = 94
eidx = 95
total_time_upper_bound = int( y_train[sidx] * (90 / 100) )  # A percentage of the original total time of the trace
query_instances = X_train[sidx: eidx]

cfe = explainer.generate_counterfactuals(query_instances, total_CFs=20, desired_range=[0, total_time_upper_bound], features_to_vary=cols_to_vary)

cfe.visualize_as_dataframe(show_only_changes=True)

100%|██████████| 1/1 [00:01<00:00,  1.32s/it]

Query instance (original outcome : 34397332)





Unnamed: 0,Status,ACTIVITY,Involved_ST_Function_Div,Involved_Org_line_3,Involved_ST,SR_Latest_Impact,Product,Country,Owner_Country,time_from_first,time_from_previous_et,time_from_midnight,weekday,# ACTIVITY=In Progress,# ACTIVITY=Awaiting Assignment,# ACTIVITY=Resolved,# ACTIVITY=Assigned,# ACTIVITY=Closed,# ACTIVITY=Wait - User,# ACTIVITY=Wait - Implementation,# ACTIVITY=Wait,# ACTIVITY=Wait - Vendor,# ACTIVITY=In Call,# ACTIVITY=Wait - Customer,# ACTIVITY=Unmatched,# ACTIVITY=Cancelled,lead_time
0,Queued,Awaiting Assignment,A2_1,Org line A2,D8,Medium,PROD542,nl,Netherlands,195784.0,91.0,57912.0,Wednesday,12,7,0,0,0,2,0,0,0,0,0,0,0,34397332.0



Diverse Counterfactual set (new outcome: [0, 33558805])


Unnamed: 0,Status,ACTIVITY,Involved_ST_Function_Div,Involved_Org_line_3,Involved_ST,SR_Latest_Impact,Product,Country,Owner_Country,time_from_first,time_from_previous_et,time_from_midnight,weekday,# ACTIVITY=In Progress,# ACTIVITY=Awaiting Assignment,# ACTIVITY=Resolved,# ACTIVITY=Assigned,# ACTIVITY=Closed,# ACTIVITY=Wait - User,# ACTIVITY=Wait - Implementation,# ACTIVITY=Wait,# ACTIVITY=Wait - Vendor,# ACTIVITY=In Call,# ACTIVITY=Wait - Customer,# ACTIVITY=Unmatched,# ACTIVITY=Cancelled,lead_time
0,-,-,D_2,Org line G1,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,32348424.0
1,-,-,-,-,N3 2nd,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,27343876.0
2,-,Wait - User,-,Org line F,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,32573954.0
3,-,Wait,-,-,V4 2nd,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,26627440.0
4,-,-,E_10,-,M6,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,27531300.0
5,-,-,-,-,U6 2nd,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,27343876.0
6,-,Wait - Implementation,-,-,G28 3rd,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,27768832.0
7,-,-,-,-,L50 3rd,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,27343876.0
8,-,Wait,-,Org line I,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,31999992.0
9,-,-,-,-,N47,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,27343876.0


In [12]:
## Apply Transition System Validation to the above Single Query Results
# === Load the Transition Graph
_, transition_graph = transition_system(df, case_id_name=case_id_name, activity_column_name=activity_column_name, window_size=WINDOW_SIZE)

In [15]:
# # Case 1: For experimenting with single query
# prefix_of_activities = get_prefix_of_activities(expected_activity_index=sidx, event_log=df_train, window_size=window_size, activity_column_name=activity_column_name)

# # Case 2: When running in test mode
# prefix_of_activities = get_prefix_of_activities(df_single_trace=df_test_trace, window_size=window_size, activity_column_name=activity_column_name)

# # Test Validate Transitions Function
# validate_transition(cfe, prefix_of_activities, transition_graph, valid_resources)
# cfe.visualize_as_dataframe(show_only_changes=True)
# print(f"Valid Counterfactual Set")

### Check Feature Importance score

In [16]:
# imp = exp_genetic_iris.local_feature_importance(query_instances, total_CFs=10, desired_range=[0, total_time_upper_bound], features_to_vary=cols_to_vary)
# imp.local_importance

## Experiment with Multiple Queries

In [17]:
state_obj = StoreTestRun(save_load_path=RESULTS_FILE_PATH_N_NAME)
save_load_path = state_obj.get_save_load_path()

if os.path.exists(save_load_path):
    state_obj.load_state()
    cases_done = state_obj.run_state["cases_done"]
else:
    cases_done = 0

state_obj.run_state["cases_done"]

0

In [28]:
# %%capture
for df_test_trace in test_cases[cases_done:]:
    # print("Case number:", cases_done)
    # print(f"Case number is state_obj: {state_obj.run_state['cases_done']}")
    query_case_id = get_case_id(df_test_trace, case_id_name)

    if 0 < len(df_test_trace) <= 2:
        print("too small", cases_done, query_case_id)
        result_value = query_case_id
        state_obj.add_cfe_to_results( ("cases_too_small", result_value) )
        cases_stored = state_obj.save_state()
        cases_done += 1
        continue

    X_test, y_test = prepare_df_for_ml(df_test_trace, case_id_name, outcome_name, columns_to_remove=["Change_Date+Time", "time_remaining"])
    # Access the last row of the truncated trace to replicate the behavior of a running trace
    query_instances = X_test.iloc[-1:]
    total_time_upper_bound = int( y_test.iloc[-1] * (REDUCED_KPI_TIME / 100) )  # A percentage of the original total time of the trace

    try:
        cfe = generate_cfe(explainer, query_instances, total_time_upper_bound, features_to_vary=cols_to_vary,
                           total_cfs=TOTAL_CFS, kpi=KPI, proximity_weight=proximity_weight, sparsity_weight=sparsity_weight,
                           diversity_weight=diversity_weight)

        result_value = (query_case_id, cfe)
        state_obj.add_cfe_to_results(("cfe_before_validation", result_value))  # save after cfe validation

        prefix_of_activities = get_prefix_of_activities(df_single_trace=df_test_trace, window_size=0, activity_column_name=activity_column_name)
        cfe_df = validate_transition(cfe, prefix_of_activities=prefix_of_activities, transition_graph=transition_graph, valid_resources=valid_resources)

        if len(cfe_df) > 0:
            result_value = (query_case_id, cfe_df)
            state_obj.add_cfe_to_results(("cfe_after_validation", result_value))

        cases_stored = state_obj.save_state()

    except UserConfigValidationException:
        result_value = query_case_id
        state_obj.add_cfe_to_results(("cfe_not_found", result_value))
        cases_stored = state_obj.save_state()
    except TimeoutError as err:  # When function takes too long
        result_value = query_case_id
        print("TimeoutError caught:", err)
        state_obj.add_cfe_to_results(("cfe_not_found", result_value))
        cases_stored = state_obj.save_state()
    except ValueError:
        # print(f"Includes feature not found in training data: {get_case_id(df_test_trace)}")
        result_value = query_case_id
        state_obj.add_cfe_to_results(("cases_includes_new_data", result_value))
        cases_stored = state_obj.save_state()
    # This error is seen occurring on when running lots of loops on the server
    except AttributeError as e:
        print("AttributeError caught:", e)
        state_obj.add_cfe_to_results(("exceptions", query_case_id))
        cases_stored = state_obj.save_state()
    # except Exception as err:
    #     print(f"Broadest Exception handler invoked", err)
    #     state_obj.add_cfe_to_results(("exceptions", query_case_id))
    #     cases_stored = state_obj.save_state()

    cases_done += 1
    # Just for a sanity check
    assert cases_done == cases_stored
    if cases_done >= 20:
        break

Case number: 22
Case number is state_obj: 22


100%|██████████| 1/1 [00:00<00:00,  2.85it/s]

No Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec





In [18]:
state_obj.get_run_state_df()

Unnamed: 0,cfe_before_validation,cfe_after_validation,cfe_not_found,cases_includes_new_data,cases_too_small,cases_zero_in_y,exceptions,cases_done
0,0,0,0,0,0,0,0,0


# Results:

### View Valid cases and their Counterfactual Examples (CFEs)
You can load saved configs and run them independently of the above code

In [21]:
# Question: If we train the models using dataset with good traces?

In [10]:
pickle_file = "random-t16.pkl"
if not os.path.exists( f"./experiment_results/{pickle_file}"):
    result = subprocess.run(['scp', f'labnum08:git_repos/explainable-prescriptive-analytics/experiment_results/{pickle_file}', 'experiment_results/'], capture_output=True, text=True)

    # return code of 0 means the command executed successfully
    if result.returncode != 0:
        print("There is an Error in the command")
    else:
        print("successful")
else:
    print(f"File already exists")

File already exists


In [15]:
RESULTS_FILE_PATH_N_NAME = f"experiment_results/{pickle_file.split('.')[0]}.csv"

state_obj = StoreTestRun(save_load_path=RESULTS_FILE_PATH_N_NAME)
save_load_path = state_obj.get_save_load_path()

if os.path.exists(save_load_path):
    state_obj.load_state()
    cases_done = state_obj.run_state["cases_done"]
else:
    cases_done = 0

print("Cases tested: ", state_obj.run_state["cases_done"])

def print_results():
    for case_id, cfe_df in state_obj.run_state["cfe_after_validation"]:
        yield case_id, cfe_df

generator = print_results()

Cases tested:  2493


In [19]:
case_id, cfe_df = next(generator)

print(f"Original Test Case:")
# === Find test_case with case_id
for df_test_trace in test_cases:
    if get_case_id(df_test_trace) == case_id:
        display(df_test_trace)

print(f"Counterfactuals for the last row:")
cfe_df

Original Test Case:


Unnamed: 0,SR_Number,Change_Date+Time,Status,ACTIVITY,Involved_ST_Function_Div,Involved_Org_line_3,Involved_ST,SR_Latest_Impact,Product,Country,Owner_Country,time_from_first,time_from_previous_et,time_from_midnight,weekday,time_remaining,# ACTIVITY=In Progress,# ACTIVITY=Awaiting Assignment,# ACTIVITY=Resolved,# ACTIVITY=Assigned,# ACTIVITY=Closed,# ACTIVITY=Wait - User,# ACTIVITY=Wait - Implementation,# ACTIVITY=Wait,# ACTIVITY=Wait - Vendor,# ACTIVITY=In Call,# ACTIVITY=Wait - Customer,# ACTIVITY=Unmatched,# ACTIVITY=Cancelled,lead_time
0,1-540390551,1310368850,Accepted,In Progress,A2_1,Org line C,D2,Medium,PROD542,nl,Belgium,0.0,0.0,26450.0,Monday,26409637.0,0,0,0,0,0,0,0,0,0,0,0,0,0,26409637.0
1,1-540390551,1310368855,Accepted,In Progress,A2_1,Org line C,D2,Medium,PROD542,nl,Belgium,5.0,5.0,26455.0,Monday,26409632.0,1,0,0,0,0,0,0,0,0,0,0,0,0,26409637.0
2,1-540390551,1310369323,Queued,Awaiting Assignment,missing,Org line V7,V29 2nd,Medium,PROD542,nl,Belgium,473.0,468.0,26923.0,Monday,26409164.0,2,0,0,0,0,0,0,0,0,0,0,0,0,26409637.0
3,1-540390551,1310369325,Accepted,In Progress,missing,Org line V7,V29 2nd,Medium,PROD542,nl,Belgium,475.0,2.0,26925.0,Monday,26409162.0,2,1,0,0,0,0,0,0,0,0,0,0,0,26409637.0
4,1-540390551,1310369327,Accepted,Wait,missing,Org line V7,V29 2nd,Medium,PROD542,nl,Belgium,477.0,2.0,26927.0,Monday,26409160.0,3,1,0,0,0,0,0,0,0,0,0,0,0,26409637.0
5,1-540390551,1310384058,Queued,Awaiting Assignment,missing,Org line V7n,V37 2nd,Medium,PROD542,nl,Belgium,15208.0,14731.0,41658.0,Monday,26394429.0,3,1,0,0,0,0,0,1,0,0,0,0,0,26409637.0
6,1-540390551,1310390309,Accepted,In Progress,missing,Org line V7n,V37 2nd,Medium,PROD542,nl,Netherlands,21459.0,6251.0,47909.0,Monday,26388178.0,3,2,0,0,0,0,0,1,0,0,0,0,0,26409637.0
7,1-540390551,1310390882,Queued,Awaiting Assignment,A2_1,Org line C,D2,Medium,PROD542,nl,Netherlands,22032.0,573.0,48482.0,Monday,26387605.0,4,2,0,0,0,0,0,1,0,0,0,0,0,26409637.0
8,1-540390551,1310391323,Accepted,In Progress,A2_1,Org line C,D2,Medium,PROD542,nl,Belgium,22473.0,441.0,48923.0,Monday,26387164.0,4,3,0,0,0,0,0,1,0,0,0,0,0,26409637.0


Counterfactuals for the last row:


Unnamed: 0,Status,ACTIVITY,Involved_ST_Function_Div,Involved_Org_line_3,Involved_ST,SR_Latest_Impact,Product,Country,Owner_Country,time_from_first,time_from_previous_et,time_from_midnight,weekday,# ACTIVITY=In Progress,# ACTIVITY=Awaiting Assignment,# ACTIVITY=Resolved,# ACTIVITY=Assigned,# ACTIVITY=Closed,# ACTIVITY=Wait - User,# ACTIVITY=Wait - Implementation,# ACTIVITY=Wait,# ACTIVITY=Wait - Vendor,# ACTIVITY=In Call,# ACTIVITY=Wait - Customer,# ACTIVITY=Unmatched,# ACTIVITY=Cancelled,lead_time
0,Accepted,Wait,A2_1,Org line C,D2,Medium,PROD542,nl,Belgium,22473.0,441.0,48923.0,Monday,4,3,0,0,0,0,0,1,0,0,0,0,0,13820730.0
1,Accepted,Assigned,A2_1,Org line C,D2,Medium,PROD542,nl,Belgium,22473.0,441.0,48923.0,Monday,4,3,0,0,0,0,0,1,0,0,0,0,0,13876632.0
2,Accepted,Wait - Vendor,A2_1,Org line C,D2,Medium,PROD542,nl,Belgium,22473.0,441.0,48923.0,Monday,4,3,0,0,0,0,0,1,0,0,0,0,0,14067300.0
3,Accepted,In Progress,A2_1,Org line C,D2,Medium,PROD542,nl,Belgium,22473.0,441.0,48923.0,Monday,4,3,0,0,0,0,0,1,0,0,0,0,0,13934988.0
4,Accepted,Awaiting Assignment,A2_1,Org line C,D2,Medium,PROD542,nl,Belgium,22473.0,441.0,48923.0,Monday,4,3,0,0,0,0,0,1,0,0,0,0,0,14015351.0
5,Accepted,Resolved,A2_1,Org line C,D2,Medium,PROD542,nl,Belgium,22473.0,441.0,48923.0,Monday,4,3,0,0,0,0,0,1,0,0,0,0,0,14067300.0


In [None]:
# for _, cfe in cfes_list:
#     cfe.visualize_as_dataframe(show_only_changes=True)
# cfes_list[9][1]

## Observation
The output usually comes within 2 minutes

#### TODOS
- Check how many queries (rows) have valid CFEs
- How to make CFE not, ACTIVITY column not to take the value "Pending Liquidation request"
- Discover how DiCE handle constraints ( For us do it in post-processing step )
- CE_OU - resource column, you can modify it
- Modify just the resource (col: CE_UO) & Activity (col: ACTIVITY)
- build a transition system to validate activity and then resource
- Do the above for total time case
- I
- Implement activity validation and resource validation to the above
-   To do this effectively maybe figure out how the DiCE post process the constraints and implement this in that layer

In [None]:
query_instances = X_train[sidx: eidx]
cfe = exp_genetic_iris.generate_counterfactuals(query_instances, total_CFs=5, desired_class="opposite", features_to_vary=cols_to_vary)
cfe.visualize_as_dataframe(show_only_changes=True)

genetic_cfes = []

for idx, query_instances in X_train.iterrows():
    query_instances = query_instances.to_frame().transpose()
    # query_instances = X_train[0:1]  # an interesting query`

    cfe = exp_genetic_iris.generate_counterfactuals(query_instances, total_CFs=5, desired_class="opposite", features_to_vary=cols_to_vary)

    genetic_cfes.append( (idx, cfe) )

-Implement post processing steps
    - Transition transition_system with full traces
    - 4 tuple comparison with country and owner country



## Deprecated Code

In [29]:
from multiprocessing import Process

def long_process_function(xval):
    print(f"Now sleeping for {xval}")
    sleep(xval)
    print("Done sleeping!!!")

def increment_50_seconds():
    print("Increment 50 seconds")
    for i in range(50):
        sleep(1)
        print(f"Counter at: {i}")


if __name__ == '__main__':
    try:
        p1 = Process(target=increment_50_seconds, name="Process_increment_50_seconds")

        p1.start()

        p1.join(timeout=5)

        print("-------------- Before long func ------------------")
        long_process_function(10)
        print("-------------- After long func ------------------")
        p1.terminate()

        if p1.exitcode is None:
            print("Time is out")

    except:
        print("Exception caught")



Increment 50 seconds
Counter at: 0
Counter at: 1
Counter at: 2
Counter at: 3
Counter at: 4
-------------- Before long func ------------------
Now sleeping for 10
Counter at: 5
Counter at: 6
Counter at: 7
Counter at: 8
Counter at: 9
Counter at: 10
Counter at: 11
Counter at: 12
Counter at: 13
Done sleeping!!!
-------------- After long func ------------------
Time is out


#### Practice `wrapt_timeout_decorator` module

In [45]:
@timeout(5)
def mytest(message):
    # this example does NOT work on windows, please check the section
    # "use with Windows" in the README.rst
    print(message)
    try:
        for i in range(1,10):
            time.sleep(1)
            print('{} seconds have passed'.format(i))

    except TimeoutError as e:
        print("TimeoutError caught:", e)
    # This error is seen occurring on when running lots of loops on the server
    except AttributeError as e:
        print("AttributeError caught:", e)

if __name__ == '__main__':
    mytest('starting')

starting
1 seconds have passed
2 seconds have passed
3 seconds have passed
4 seconds have passed
TimeoutError caught: Function mytest timed out after 5.0 seconds


================================================================

#### Pratice `timeout_decorator` module

In [18]:

class MyTimeOutError(AssertionError):
     """Thrown when a timeout occurs in `timeout_decorator`"""
     pass

def timeout_decorator(timeout_seconds):

    def timeout_wrapper(original_function):

        def _timeout_handler(signum, frame):
            raise MyTimeOutError("Function execution timed out.")

        def wrapper(*args, **kwargs):
            # Set the signal handler
            signal.signal(signal.SIGALRM, _timeout_handler)
            # Set the alarm for the specified timeout duration
            signal.alarm(timeout_seconds)

            result = original_function(*args, **kwargs)

            return result
        return wrapper
    return timeout_wrapper

In [19]:
import signal
from time import sleep

@timeout_decorator(4)
def long_running_function():
    for i in range(10):
        sleep(1)

try:
    # Call the long-running function
    long_running_function()

except MyTimeOutError as e:
    # Handle the timeout error
    print(str(e))
finally:
    # Cancel the alarm
    signal.alarm(0)


Function execution timed out.


==============================

### Practice Creating decorators

In [35]:
import signal
def deco_bar2(my_arg1):
    def deco_bar(original_function):
        def wrapper(*args, **kwargs):
            print("I'm deco bar() before")
            print(f"argument passed to decorator:{my_arg1}")
            result = original_function(*args, **kwargs)

            print("I'm deco bar() after")

            return result
        return wrapper
    return deco_bar

@deco_bar2("arg_to_deco")
def foo(valx):
    print(f"I'm foo({valx})")
    return 1

def baz(*args, **kwargs):
    print(args, kwargs)


In [36]:
foo(valx="foo_arg2")

I'm deco bar() before
argument passed to decorator:arg_to_deco
I'm foo(foo_arg2)
I'm deco bar() after


1

In [None]:
def validate_transition(cfe):
    """ Deprecated function.
    Works with transition graph which has single activity as key.
    Args:
        cfe: Dice counterfactual object.
    """
    cf_examples_df = cfe.cf_examples_list[0].final_cfs_df.copy()

    # expected_next_action = cfe.cf_examples_list[0].test_instance_df[activity_column_name].item()
    # print(f"Expected next action: {expected_next_action}")

    current_activity = current_step[activity_column_name].item()
    # === Verify the next activity
    indexes_to_drop = []
    for idx, suggested_next_activity in cf_examples_df[activity_column_name].items():
        # print(f"index: {idx}, Suggest: {suggested_next_activity}")
        if suggested_next_activity not in transition_graph[current_activity]:
            indexes_to_drop.append(idx)

    cf_examples_df = cf_examples_df.drop(indexes_to_drop, axis='index').reset_index(drop=True)

    # === Verify the associated resources
    indexes_to_drop = []
    for idx, row in cf_examples_df[ ["ACTIVITY", "Involved_ST_Function_Div"] ].iterrows():
        row_tuple = tuple(row)
        if row_tuple not in valid_resources:
            # print(f"removed row had: {row_tuple}")
            indexes_to_drop.append(idx)

    cf_examples_df = cf_examples_df.drop(indexes_to_drop, axis='index').reset_index(drop=True)
    return cf_examples_df