In [107]:
import copy
import pandas as pd
import numpy as np
import sys
sys.path.append("../")

from cfmining.algorithms import MAPOFCEM
from cfmining.predictors import GeneralClassifier, MonotoneClassifier
from cfmining.utils import get_data_model
from cfmining.baselines import MAPOCAM

from experiments_helper import run_experiments, format_df_table, summarize_results, get_action_set


%load_ext autoreload
%autoreload 2

SEED = 0

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [108]:
for dataset in ["german", "taiwan"]:
    X_train, Y_train, model, outlier_detection, individuals = get_data_model(dataset, "LogisticRegression")
    individuals = individuals.sample(n = 50, random_state=SEED)
    outlier_detection.percentile = 0.01 if dataset == "taiwan" else 0.05
    action_set = get_action_set(dataset, X_train)

    model = MonotoneClassifier(model, outlier_detection, X = X_train)

    method = MAPOFCEM(
        action_set = action_set,
        classifier = model,
        compare = "non_dom",
        max_changes = 2,
        outlier_percentile=0.01 if dataset == "taiwan" else 0.05,
        time_limit=np.inf,
    )

    run_experiments(
        method,
        individuals=individuals,
        model=model,
        output_file=f"../results/lr/{dataset}/mapofcem_no_shap.csv"
    );

    for feat in action_set:
        feat.flip_direction = 1
        feat.update_grid()

    method = MAPOCAM(
        action_set,
        model,
        criteria = "non_dom",
        max_changes=2
    )

    run_experiments(
        method,
        individuals=individuals,
        model=model,
        output_file=f"../results/lr/{dataset}/mapocam.csv"
    );

100%|█████████████████████████████████████████████████████████████████████████████████| 50/50 [04:15<00:00,  5.11s/it]
100%|█████████████████████████████████████████████████████████████████████████████████| 50/50 [04:13<00:00,  5.07s/it]
PermutationExplainer explainer: 1001it [01:02, 13.42it/s]                                                             
100%|██████████████████████████████████████████████████████████████████████████████| 50/50 [1:50:12<00:00, 132.24s/it]
 56%|███████████████████████████████████████████▋                                  | 28/50 [1:00:52<40:53, 111.51s/it]

In [112]:
dataset = "german"
results = []
for method in ["mapofcem_no_shap", "mapocam"]:
    results_cur = pd.read_csv(f"../results/lr/{dataset}/{method}.csv")
    print(results_cur.shape)
    results_cur = summarize_results(results_cur, dataset, 0.05)
    results_cur["method"] = method
    results.append(results_cur)
results = pd.concat(results)
format_df_table(results, "method", results.columns.tolist()[:-1])

(50, 5)
(50, 5)


Unnamed: 0,method,costs,n_changes,diversity,outlier,outliers_score,n_solutions,time
0,mapocam,0.389 (+-0.12) | 0.548,1.74 (+-0.314) | 2.0,0.142 (+-0.166) | 0.413,0.109 (+-0.205) | 0.486,0.498 (+-0.04) | 0.565,19.68 (+-13.736) | 45.65,5.062 (+-1.485) | 6.737
1,mapofcem_no_shap,0.399 (+-0.127) | 0.558,1.74 (+-0.308) | 2.0,0.207 (+-0.213) | 0.628,0.03 (+-0.083) | 0.159,0.492 (+-0.033) | 0.539,17.88 (+-12.458) | 42.1,5.107 (+-1.504) | 6.725


In [114]:
dataset = "taiwan"
results = []
for method in ["mapofcem_no_shap", "mapocam"]:
    results_cur = pd.read_csv(f"../results/lr/{dataset}/{method}.csv")
    print(results_cur.shape)
    results_cur = summarize_results(results_cur, dataset, 0.01)
    results_cur["method"] = method
    results.append(results_cur)
results = pd.concat(results)
format_df_table(results, "method", results.columns.tolist()[:-1])

(50, 5)
(50, 5)


Unnamed: 0,method,costs,n_changes,outlier,outliers_score,diversity,n_solutions,time
0,mapocam,0.629 (+-0.152) | 0.866,1.973 (+-0.018) | 1.991,0.0 (+-0.003) | 0.0,0.477 (+-0.035) | 0.527,0.3 (+-0.246) | 0.747,242.34 (+-167.414) | 493.5,132.648 (+-26.083) | 162.431
1,mapofcem_no_shap,0.63 (+-0.151) | 0.866,1.973 (+-0.018) | 1.991,0.0 (+-0.002) | 0.0,0.477 (+-0.035) | 0.527,0.301 (+-0.245) | 0.747,240.84 (+-166.379) | 493.5,132.215 (+-25.909) | 161.787
