In [27]:
import numpy as np
import pandas as pd
import sys
sys.path.append("../")

from cfmining.algorithms import MAPOFCEM
from cfmining.predictors import GeneralClassifier_Shap
from cfmining.action_set import ActionSet
from cfmining.utils import get_data_model, DeepPipeExplainer

from experiments_helper import run_experiments, summarize_results, format_df_table

%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [2]:
SEED = 0

In [3]:
def get_action_set(dataset, X_train):
    if dataset == "german":
        not_mutable_features = ['Age', 'OwnsHouse', 'isMale', 'JobClassIsSkilled', 'Single', 'ForeignWorker', 'RentsHouse']
        mutable_features = [feat for feat in X_train.columns if feat not in not_mutable_features]
        #continuous_features = ['Age', 'LoanDuration', 'LoanAmount', 'LoanRateAsPercentOfIncome','YearsAtCurrentHome']
        #categoric_features = [col for col in X_train.columns if col not in continuous_features]
        action_set = ActionSet(X = X_train, default_step_size = 0.05, mutable_features = mutable_features)
    elif dataset == "taiwan":
        not_mutable_features = ['Single', 'Age_in_25_to_40', 'Married', 'Age_lt_25', 'Age_in_40_to_59', 'Age_geq_60', 'EducationLevel']
        mutable_features = [feat for feat in X_train.columns if feat not in not_mutable_features]
        #continuous_features = ["LIMIT_BAL", "BILL_AMT1", "BILL_AMT2", "BILL_AMT3", "BILL_AMT4", "BILL_AMT5", "BILL_AMT6", "PAY_AMT1","PAY_AMT2", "PAY_AMT3", "PAY_AMT4", "PAY_AMT5", "PAY_AMT6", "MaxBillAmountOverLast6Months", "MaxPaymentAmountOverLast6Months","MostRecentBillAmount", "MostRecentPaymentAmount", "MostRecentPaymentAmount", "TotalMonthsOverdue"]
        #categoric_features = [col for col in X_train.columns if col not in continuous_features]
        action_set = ActionSet(X = X_train, default_step_size = 0.1, mutable_features = mutable_features)
    return action_set

## Comparison estimate prob max v1 and v2

Comparison between methodology that uses the standard method of maximum probability and the method that avoids calculating new explanations (use the explanation of the original individual).

In [13]:
dataset = "german"
model = "LGBMClassifier"
compare = "percentile"
X_train, Y_train, model, outlier_detection, individuals = get_data_model(dataset, model)
individuals = individuals.sample(n = 20, random_state=SEED)
action_set = get_action_set(dataset, X_train)

class FakeOutlierDetection():
    def predict(self, X):
        return [1]
model_shap = GeneralClassifier_Shap(model, FakeOutlierDetection(), X_train, shap_explainer="permutation", threshold = 0.5)

results = []

method = MAPOFCEM(
    action_set = action_set,
    classifier = model_shap,
    compare = compare,
    max_changes = 3,
    estimate_prob_max=False
)
metrics = run_experiments(
    method,
    individuals,
    model_shap,
)
metrics = summarize_results(metrics, dataset)
metrics["method"] = 1
results.append(metrics)


method = MAPOFCEM(
    action_set = action_set,
    classifier = model_shap,
    compare = compare,
    max_changes = 3,
    estimate_prob_max=True
)
metrics = run_experiments(
    method,
    individuals,
    model_shap,
)
metrics = summarize_results(metrics, dataset)
metrics["method"] = 2
results.append(metrics)

results = pd.concat(results)
format_df_table(results, "method", results.columns.tolist()[:-1])

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 20/20 [00:24<00:00,  1.22s/it]
100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 20/20 [00:17<00:00,  1.15it/s]


Unnamed: 0,method,costs,n_changes,outlier,outliers_score,diversity,n_solutions,time
0,1,0.086 (+-0.066),2.1 (+-0.788),0.25 (+-0.444),0.522 (+-0.04),0.0 (+-0.0),1.0 (+-0.0),1.222 (+-1.415)
1,2,0.086 (+-0.066),2.1 (+-0.788),0.25 (+-0.444),0.522 (+-0.04),0.0 (+-0.0),1.0 (+-0.0),0.872 (+-0.988)


In [14]:
dataset = "german"
model = "LGBMClassifier"
compare = "non_dom"
X_train, Y_train, model, outlier_detection, individuals = get_data_model(dataset, model)
individuals = individuals.sample(n = 20, random_state=SEED)
action_set = get_action_set(dataset, X_train)

class FakeOutlierDetection():
    def predict(self, X):
        return [1]
model_shap = GeneralClassifier_Shap(model, FakeOutlierDetection(), X_train, shap_explainer="permutation", threshold = 0.5)

results = []

method = MAPOFCEM(
    action_set = action_set,
    classifier = model_shap,
    compare = compare,
    max_changes = 3,
    estimate_prob_max=False
)
metrics = run_experiments(
    method,
    individuals,
    model_shap,
)
metrics = summarize_results(metrics, dataset)
metrics["method"] = 1
results.append(metrics)


method = MAPOFCEM(
    action_set = action_set,
    classifier = model_shap,
    compare = compare,
    max_changes = 3,
    estimate_prob_max=True
)
metrics = run_experiments(
    method,
    individuals,
    model_shap,
)
metrics = summarize_results(metrics, dataset)
metrics["method"] = 2
results.append(metrics)

results = pd.concat(results)
format_df_table(results, "method", results.columns.tolist()[:-1])

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 20/20 [02:23<00:00,  7.18s/it]
100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 20/20 [02:22<00:00,  7.14s/it]


Unnamed: 0,method,costs,n_changes,outlier,outliers_score,diversity,n_solutions,time
0,1,0.414 (+-0.155),1.973 (+-0.655),6.8 (+-10.521),0.516 (+-0.041),0.05 (+-0.064),28.85 (+-18.793),7.174 (+-5.708)
1,2,0.414 (+-0.155),1.973 (+-0.655),6.85 (+-10.51),0.516 (+-0.041),0.05 (+-0.064),28.8 (+-18.741),7.139 (+-5.678)


In [15]:
dataset = "taiwan"
model = "LGBMClassifier"
compare = "percentile"
X_train, Y_train, model, outlier_detection, individuals = get_data_model(dataset, model)
individuals = individuals.sample(n = 20, random_state=SEED)
action_set = get_action_set(dataset, X_train)

class FakeOutlierDetection():
    def predict(self, X):
        return [1]
model_shap = GeneralClassifier_Shap(model, FakeOutlierDetection(), X_train, shap_explainer="permutation", threshold = 0.5)

results = []

method = MAPOFCEM(
    action_set = action_set,
    classifier = model_shap,
    compare = compare,
    max_changes = 3,
    estimate_prob_max=False
)
metrics = run_experiments(
    method,
    individuals,
    model_shap,
)
metrics = summarize_results(metrics, dataset)
metrics["method"] = 1
results.append(metrics)


method = MAPOFCEM(
    action_set = action_set,
    classifier = model_shap,
    compare = compare,
    max_changes = 3,
    estimate_prob_max=True
)
metrics = run_experiments(
    method,
    individuals,
    model_shap,
)
metrics = summarize_results(metrics, dataset)
metrics["method"] = 2
results.append(metrics)

results = pd.concat(results)
format_df_table(results, "method", results.columns.tolist()[:-1])

PermutationExplainer explainer: 1001it [00:57, 14.76it/s]                                                                                                                                                                                           
100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 20/20 [00:38<00:00,  1.95s/it]
100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 20/20 [00:33<00:00,  1.66s/it]


Unnamed: 0,method,costs,n_changes,outlier,outliers_score,diversity,n_solutions,time
0,1,0.08 (+-0.072),2.1 (+-0.912),0.05 (+-0.224),0.458 (+-0.053),0.0 (+-0.0),1.0 (+-0.0),1.947 (+-2.218)
1,2,0.084 (+-0.083),2.05 (+-0.887),0.05 (+-0.224),0.458 (+-0.053),0.0 (+-0.0),1.0 (+-0.0),1.658 (+-1.766)


In [16]:
dataset = "taiwan"
model = "LGBMClassifier"
compare = "non_dom"
X_train, Y_train, model, outlier_detection, individuals = get_data_model(dataset, model)
individuals = individuals.sample(n = 20, random_state=SEED)
action_set = get_action_set(dataset, X_train)

class FakeOutlierDetection():
    def predict(self, X):
        return [1]
model_shap = GeneralClassifier_Shap(model, FakeOutlierDetection(), X_train, shap_explainer="tree", threshold = 0.5)

results = []

method = MAPOFCEM(
    action_set = action_set,
    classifier = model_shap,
    compare = compare,
    max_changes = 3,
    estimate_prob_max=False
)
metrics = run_experiments(
    method,
    individuals,
    model_shap,
)
metrics = summarize_results(metrics, dataset)
metrics["method"] = 1
results.append(metrics)


method = MAPOFCEM(
    action_set = action_set,
    classifier = model_shap,
    compare = compare,
    max_changes = 3,
    estimate_prob_max=True
)
metrics = run_experiments(
    method,
    individuals,
    model_shap,
)
metrics = summarize_results(metrics, dataset)
metrics["method"] = 2
results.append(metrics)

results = pd.concat(results)
format_df_table(results, "method", results.columns.tolist()[:-1])

100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 20/20 [1:07:19<00:00, 201.99s/it]
100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 20/20 [1:12:25<00:00, 217.29s/it]


Unnamed: 0,method,costs,n_changes,outlier,outliers_score,diversity,n_solutions,time
0,1,0.478 (+-0.092),2.527 (+-0.4),15.5 (+-46.377),0.467 (+-0.048),0.341 (+-0.279),197.4 (+-141.669),201.964 (+-156.836)
1,2,0.479 (+-0.092),2.53 (+-0.402),15.7 (+-46.659),0.467 (+-0.048),0.341 (+-0.279),202.35 (+-149.388),217.257 (+-173.008)


## Estimate outlier or not

In [33]:
dataset = "german"
model = "LGBMClassifier"
compare = "percentile"
X_train, Y_train, model, outlier_detection, individuals = get_data_model(dataset, model)
individuals = individuals.sample(n = 20, random_state=SEED)
action_set = get_action_set(dataset, X_train)

model_shap = GeneralClassifier_Shap(model, outlier_detection, X_train, shap_explainer="permutation", threshold = 0.5)

results = []

method = MAPOFCEM(
    action_set = action_set,
    classifier = model_shap,
    compare = compare,
    max_changes = 3,
    estimate_prob_max=False,
    estimate_outlier=True,
)
metrics = run_experiments(
    method,
    individuals,
    model_shap,
)
metrics = summarize_results(metrics, dataset)
metrics["method"] = 1
results.append(metrics)


method = MAPOFCEM(
    action_set = action_set,
    classifier = model_shap,
    compare = compare,
    max_changes = 3,
    estimate_prob_max=False,
    estimate_outlier=False,
)
metrics = run_experiments(
    method,
    individuals,
    model_shap,
)
metrics = summarize_results(metrics, dataset)
metrics["method"] = 2
results.append(metrics)

results = pd.concat(results)
format_df_table(results, "method", results.columns.tolist()[:-1])

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 20/20 [00:39<00:00,  1.99s/it]
100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 20/20 [00:38<00:00,  1.94s/it]


Unnamed: 0,method,costs,n_changes,outlier,outliers_score,diversity,n_solutions,time
0,1,0.108 (+-0.084),2.2 (+-0.834),0.05 (+-0.224),0.507 (+-0.037),0.0 (+-0.0),1.0 (+-0.0),1.986 (+-1.991)
1,2,0.108 (+-0.084),2.2 (+-0.834),0.05 (+-0.224),0.507 (+-0.037),0.0 (+-0.0),1.0 (+-0.0),1.934 (+-1.97)


In [34]:
dataset = "german"
model = "MLPClassifier"
compare = "percentile"
X_train, Y_train, model, outlier_detection, individuals = get_data_model(dataset, model)
individuals = individuals.sample(n = 20, random_state=SEED)
action_set = get_action_set(dataset, X_train)

model_shap = GeneralClassifier_Shap(model, outlier_detection, X_train, shap_explainer="permutation", threshold = 0.5)

results = []

method = MAPOFCEM(
    action_set = action_set,
    classifier = model_shap,
    compare = compare,
    max_changes = 3,
    estimate_prob_max=False,
    estimate_outlier=True,
)
metrics = run_experiments(
    method,
    individuals,
    model_shap,
)
metrics = summarize_results(metrics, dataset)
metrics["method"] = 1
results.append(metrics)


method = MAPOFCEM(
    action_set = action_set,
    classifier = model_shap,
    compare = compare,
    max_changes = 3,
    estimate_prob_max=False,
    estimate_outlier=False,
)
metrics = run_experiments(
    method,
    individuals,
    model_shap,
)
metrics = summarize_results(metrics, dataset)
metrics["method"] = 2
results.append(metrics)

results = pd.concat(results)
format_df_table(results, "method", results.columns.tolist()[:-1])

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 20/20 [00:16<00:00,  1.22it/s]
100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 20/20 [00:17<00:00,  1.15it/s]


Unnamed: 0,method,costs,n_changes,outlier,outliers_score,diversity,n_solutions,time
0,1,0.102 (+-0.105),1.944 (+-0.873),0.167 (+-0.383),0.515 (+-0.039),0.0 (+-0.0),0.9 (+-0.308),0.82 (+-0.934)
1,2,0.102 (+-0.105),1.944 (+-0.873),0.167 (+-0.383),0.515 (+-0.039),0.0 (+-0.0),0.9 (+-0.308),0.865 (+-1.072)


In [35]:
dataset = "german"
model = "LGBMClassifier"
compare = "non_dom"
X_train, Y_train, model, outlier_detection, individuals = get_data_model(dataset, model)
individuals = individuals.sample(n = 50, random_state=SEED)
action_set = get_action_set(dataset, X_train)

model_shap = GeneralClassifier_Shap(model, outlier_detection, X_train, shap_explainer="tree", threshold = 0.5)

results = []

method = MAPOFCEM(
    action_set = action_set,
    classifier = model_shap,
    compare = compare,
    max_changes = 3,
    estimate_prob_max=False,
    estimate_outlier=True,
)
metrics = run_experiments(
    method,
    individuals,
    model_shap,
)
metrics = summarize_results(metrics, dataset)
metrics["method"] = 1
results.append(metrics)


method = MAPOFCEM(
    action_set = action_set,
    classifier = model_shap,
    compare = compare,
    max_changes = 3,
    estimate_prob_max=False,
    estimate_outlier=False,
)
metrics = run_experiments(
    method,
    individuals,
    model_shap,
)
metrics = summarize_results(metrics, dataset)
metrics["method"] = 2
results.append(metrics)

results = pd.concat(results)
format_df_table(results, "method", results.columns.tolist()[:-1])

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 50/50 [01:55<00:00,  2.31s/it]
100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 50/50 [01:50<00:00,  2.22s/it]


Unnamed: 0,method,costs,n_changes,outlier,outliers_score,diversity,n_solutions,time
0,1,0.406 (+-0.143),1.942 (+-0.663),1.14 (+-2.195),0.502 (+-0.033),0.193 (+-0.261),21.46 (+-18.599),2.31 (+-2.042)
1,2,0.407 (+-0.144),1.943 (+-0.663),1.24 (+-2.308),0.502 (+-0.033),0.193 (+-0.261),21.58 (+-18.744),2.217 (+-1.967)


In [36]:
dataset = "taiwan"
model = "LGBMClassifier"
compare = "non_dom"
X_train, Y_train, model, outlier_detection, individuals = get_data_model(dataset, model)
individuals = individuals.sample(n = 20, random_state=SEED)
action_set = get_action_set(dataset, X_train)

model_shap = GeneralClassifier_Shap(model, outlier_detection, X_train, shap_explainer="tree", threshold = 0.5)

results = []

method = MAPOFCEM(
    action_set = action_set,
    classifier = model_shap,
    compare = compare,
    max_changes = 3,
    estimate_prob_max=False,
    estimate_outlier=True,
    outlier_percentile=0.01
)
metrics = run_experiments(
    method,
    individuals,
    model_shap,
)
metrics = summarize_results(metrics, dataset)
metrics["method"] = 1
results.append(metrics)


method = MAPOFCEM(
    action_set = action_set,
    classifier = model_shap,
    compare = compare,
    max_changes = 3,
    estimate_prob_max=False,
    estimate_outlier=False,
    outlier_percentile=0.01
)
metrics = run_experiments(
    method,
    individuals,
    model_shap,
)
metrics = summarize_results(metrics, dataset)
metrics["method"] = 2
results.append(metrics)

results = pd.concat(results)
format_df_table(results, "method", results.columns.tolist()[:-1])

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 20/20 [25:17<00:00, 75.89s/it]
100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 20/20 [25:01<00:00, 75.05s/it]


Unnamed: 0,method,costs,n_changes,outlier,outliers_score,diversity,n_solutions,time
0,1,0.452 (+-0.098),2.407 (+-0.472),14.15 (+-45.548),0.466 (+-0.049),0.492 (+-0.3),128.2 (+-81.645),75.869 (+-67.13)
1,2,0.452 (+-0.098),2.407 (+-0.472),14.15 (+-45.548),0.466 (+-0.049),0.492 (+-0.3),128.2 (+-81.645),75.034 (+-66.463)


## Different explainers

In [24]:
dataset = "german"
model = "LGBMClassifier"
compare = "percentile"
X_train, Y_train, model, outlier_detection, individuals = get_data_model(dataset, model)
individuals = individuals.sample(n = 20, random_state=SEED)
action_set = get_action_set(dataset, X_train)

class FakeOutlierDetection():
    def predict(self, X):
        return [1]
model_shap = GeneralClassifier_Shap(model, FakeOutlierDetection(), X_train, shap_explainer="tree", threshold = 0.5)

results = []

method = MAPOFCEM(
    action_set = action_set,
    classifier = model_shap,
    compare = compare,
    max_changes = 3,
    estimate_prob_max=False,
    estimate_outlier=True,
)
metrics = run_experiments(
    method,
    individuals,
    model_shap,
)
metrics = summarize_results(metrics, dataset)
metrics["method"] = 1
results.append(metrics)


model_shap = GeneralClassifier_Shap(model, FakeOutlierDetection(), X_train, shap_explainer="permutation", threshold = 0.5)
method = MAPOFCEM(
    action_set = action_set,
    classifier = model_shap,
    compare = compare,
    max_changes = 3,
    estimate_prob_max=False,
    estimate_outlier=True,
)
metrics = run_experiments(
    method,
    individuals,
    model_shap,
)
metrics = summarize_results(metrics, dataset)
metrics["method"] = 2
results.append(metrics)

results = pd.concat(results)
format_df_table(results, "method", results.columns.tolist()[:-1])

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 20/20 [00:05<00:00,  3.33it/s]
100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 20/20 [00:26<00:00,  1.32s/it]


Unnamed: 0,method,costs,n_changes,outlier,outliers_score,diversity,n_solutions,time
0,1,0.076 (+-0.059),2.3 (+-0.801),0.2 (+-0.41),0.522 (+-0.043),0.0 (+-0.0),1.0 (+-0.0),0.299 (+-0.311)
1,2,0.074 (+-0.057),2.3 (+-0.801),0.25 (+-0.444),0.527 (+-0.047),0.0 (+-0.0),1.0 (+-0.0),1.323 (+-1.525)


In [25]:
dataset = "taiwan"
model = "LGBMClassifier"
compare = "percentile"
X_train, Y_train, model, outlier_detection, individuals = get_data_model(dataset, model)
individuals = individuals.sample(n = 20, random_state=SEED)
action_set = get_action_set(dataset, X_train)

class FakeOutlierDetection():
    def predict(self, X):
        return [1]
model_shap = GeneralClassifier_Shap(model, FakeOutlierDetection(), X_train, shap_explainer="tree", threshold = 0.5)

results = []

method = MAPOFCEM(
    action_set = action_set,
    classifier = model_shap,
    compare = compare,
    max_changes = 3,
    estimate_prob_max=False,
    estimate_outlier=True,
)
metrics = run_experiments(
    method,
    individuals,
    model_shap,
)
metrics = summarize_results(metrics, dataset)
metrics["method"] = 1
results.append(metrics)


model_shap = GeneralClassifier_Shap(model, FakeOutlierDetection(), X_train, shap_explainer="permutation", threshold = 0.5)
method = MAPOFCEM(
    action_set = action_set,
    classifier = model_shap,
    compare = compare,
    max_changes = 3,
    estimate_prob_max=False,
    estimate_outlier=True,
)
metrics = run_experiments(
    method,
    individuals,
    model_shap,
)
metrics = summarize_results(metrics, dataset)
metrics["method"] = 2
results.append(metrics)

results = pd.concat(results)
format_df_table(results, "method", results.columns.tolist()[:-1])

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 20/20 [00:08<00:00,  2.45it/s]
PermutationExplainer explainer: 1001it [00:58, 14.24it/s]                                                                                                                                                                                           
100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 20/20 [00:33<00:00,  1.66s/it]


Unnamed: 0,method,costs,n_changes,outlier,outliers_score,diversity,n_solutions,time
0,1,0.082 (+-0.076),2.0 (+-0.973),0.05 (+-0.224),0.458 (+-0.052),0.0 (+-0.0),1.0 (+-0.0),0.407 (+-0.423)
1,2,0.091 (+-0.098),2.0 (+-0.858),0.05 (+-0.224),0.458 (+-0.053),0.0 (+-0.0),1.0 (+-0.0),1.663 (+-1.913)


In [26]:
dataset = "german"
model = "LGBMClassifier"
compare = "non_dom"
X_train, Y_train, model, outlier_detection, individuals = get_data_model(dataset, model)
individuals = individuals.sample(n = 20, random_state=SEED)
action_set = get_action_set(dataset, X_train)

class FakeOutlierDetection():
    def predict(self, X):
        return [1]
model_shap = GeneralClassifier_Shap(model, FakeOutlierDetection(), X_train, shap_explainer="tree", threshold = 0.5)

results = []

method = MAPOFCEM(
    action_set = action_set,
    classifier = model_shap,
    compare = compare,
    max_changes = 3,
    estimate_prob_max=False,
    estimate_outlier=True,
)
metrics = run_experiments(
    method,
    individuals,
    model_shap,
)
metrics = summarize_results(metrics, dataset)
metrics["method"] = 1
results.append(metrics)


model_shap = GeneralClassifier_Shap(model, FakeOutlierDetection(), X_train, shap_explainer="permutation", threshold = 0.5)
method = MAPOFCEM(
    action_set = action_set,
    classifier = model_shap,
    compare = compare,
    max_changes = 3,
    estimate_prob_max=False,
    estimate_outlier=True,
)
metrics = run_experiments(
    method,
    individuals,
    model_shap,
)
metrics = summarize_results(metrics, dataset)
metrics["method"] = 2
results.append(metrics)

results = pd.concat(results)
format_df_table(results, "method", results.columns.tolist()[:-1])

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 20/20 [00:54<00:00,  2.75s/it]
100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 20/20 [02:20<00:00,  7.01s/it]


Unnamed: 0,method,costs,n_changes,outlier,outliers_score,diversity,n_solutions,time
0,1,0.417 (+-0.153),1.956 (+-0.621),5.8 (+-9.887),0.515 (+-0.039),0.057 (+-0.064),26.9 (+-15.94),2.743 (+-2.386)
1,2,0.42 (+-0.151),1.974 (+-0.648),6.5 (+-9.865),0.516 (+-0.039),0.055 (+-0.072),29.0 (+-17.036),7.002 (+-5.605)


In [30]:
dataset = "german"
model = "MLPClassifier"
compare = "non_dom"
X_train, Y_train, model, outlier_detection, individuals = get_data_model(dataset, model)
individuals = individuals.sample(n = 20, random_state=SEED)
action_set = get_action_set(dataset, X_train)

class FakeOutlierDetection():
    def predict(self, X):
        return [1]
explainer = DeepPipeExplainer(model, X_train.sample(n = 25, random_state=SEED))
model_shap = GeneralClassifier_Shap(model, FakeOutlierDetection(), X_train, shap_explainer="custom", threshold = 0.5, explainer=explainer)

results = []

method = MAPOFCEM(
    action_set = action_set,
    classifier = model_shap,
    compare = compare,
    max_changes = 3,
    estimate_prob_max=False,
    estimate_outlier=True,
)
metrics = run_experiments(
    method,
    individuals,
    model_shap,
)
metrics = summarize_results(metrics, dataset)
metrics["method"] = 1
results.append(metrics)


model_shap = GeneralClassifier_Shap(model, FakeOutlierDetection(), X_train, shap_explainer="permutation", threshold = 0.5)
method = MAPOFCEM(
    action_set = action_set,
    classifier = model_shap,
    compare = compare,
    max_changes = 3,
    estimate_prob_max=False,
    estimate_outlier=True,
)
metrics = run_experiments(
    method,
    individuals,
    model_shap,
)
metrics = summarize_results(metrics, dataset)
metrics["method"] = 2
results.append(metrics)

results = pd.concat(results)
format_df_table(results, "method", results.columns.tolist()[:-1])

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 20/20 [04:16<00:00, 12.83s/it]
100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 20/20 [02:32<00:00,  7.63s/it]


Unnamed: 0,method,costs,n_changes,outlier,outliers_score,diversity,n_solutions,time
0,1,0.515 (+-0.137),2.26 (+-0.671),8.632 (+-11.969),0.515 (+-0.043),0.055 (+-0.079),46.35 (+-41.276),12.822 (+-10.68)
1,2,0.522 (+-0.149),2.231 (+-0.655),7.842 (+-10.611),0.521 (+-0.046),0.078 (+-0.116),31.95 (+-24.025),7.622 (+-4.363)


In [32]:
dataset = "taiwan"
model = "MLPClassifier"
compare = "percentile"
X_train, Y_train, model, outlier_detection, individuals = get_data_model(dataset, model)
individuals = individuals.sample(n = 20, random_state=SEED)
action_set = get_action_set(dataset, X_train)

class FakeOutlierDetection():
    def predict(self, X):
        return [1]
explainer = DeepPipeExplainer(model, X_train.sample(n = 25, random_state=SEED))
model_shap = GeneralClassifier_Shap(model, FakeOutlierDetection(), X_train, shap_explainer="custom", threshold = 0.5, explainer=explainer)

results = []

method = MAPOFCEM(
    action_set = action_set,
    classifier = model_shap,
    compare = compare,
    max_changes = 3,
    estimate_prob_max=False,
    estimate_outlier=True,
)
metrics = run_experiments(
    method,
    individuals,
    model_shap,
)
metrics = summarize_results(metrics, dataset)
metrics["method"] = 1
results.append(metrics)


model_shap = GeneralClassifier_Shap(model, FakeOutlierDetection(), X_train, shap_explainer="permutation", threshold = 0.5)
method = MAPOFCEM(
    action_set = action_set,
    classifier = model_shap,
    compare = compare,
    max_changes = 3,
    estimate_prob_max=False,
    estimate_outlier=True,
)
metrics = run_experiments(
    method,
    individuals,
    model_shap,
)
metrics = summarize_results(metrics, dataset)
metrics["method"] = 2
results.append(metrics)

results = pd.concat(results)
format_df_table(results, "method", results.columns.tolist()[:-1])

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 20/20 [07:48<00:00, 23.40s/it]
PermutationExplainer explainer: 1001it [00:27, 22.85it/s]                                                                                                                                                                                           
100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 20/20 [06:41<00:00, 20.07s/it]


Unnamed: 0,method,costs,n_changes,outlier,outliers_score,diversity,n_solutions,time
0,1,0.315 (+-0.254),2.9 (+-0.308),0.0 (+-0.0),0.438 (+-0.031),0.0 (+-0.0),1.0 (+-0.0),23.385 (+-21.922)
1,2,0.315 (+-0.254),2.9 (+-0.308),0.0 (+-0.0),0.441 (+-0.029),0.0 (+-0.0),1.0 (+-0.0),20.065 (+-12.122)
