In [86]:
import pandas as pd
import numpy as np
import sys
sys.path.append("../")

from cfmining.algorithms import MAPOFCEM
from cfmining.predictors import MonotoneClassifier
from cfmining.baselines import MAPOCAM

from experiments_helper import get_data_model, run_experiments, format_df_table, summarize_results, get_action_set


%load_ext autoreload
%autoreload 2

SEED = 0

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


Experiment to evaluate the occurence of outliers in counterfactual explanations. 

We compare MAPOFCEM and MAPOCAM using a MonotoneClassifier that does not need the use of SHAP to estimate the maximum probability.

In [None]:
max_changes = 3
for dataset_name in [
    "german", 
    "taiwan",
    "adult"
    ]:
    dataset, X_train, Y_train, model, outlier_detection, individuals = get_data_model(dataset_name, "LogisticRegression")
    individuals = individuals.sample(n = 50, random_state=SEED)
    outlier_detection.contamination = dataset.outlier_contamination
    action_set = get_action_set(dataset, X_train, default_step_size=0.05)

    model = MonotoneClassifier(model, outlier_detection, X = X_train)

    method = MAPOFCEM(
        action_set = action_set,
        classifier = model,
        compare = "non_dom",
        max_changes = max_changes,
        outlier_contamination= dataset.outlier_contamination,
        estimate_outlier=True,
        time_limit=np.inf,
    )

    run_experiments(
        method,
        individuals=individuals,
        model=model,
        output_file=f"../results/lr/{dataset}/mapofcem_no_shap.csv"
    );

    for feat in action_set:
        feat.flip_direction = 1
        feat.update_grid()

    method = MAPOCAM(
        action_set,
        model,
        criteria = "non_dom",
        max_changes=max_changes
    )

    run_experiments(
        method,
        individuals=individuals,
        model=model,
        output_file=f"../results/lr/{dataset}/mapocam.csv"
    );

 12%|█████████████████▉                                                                                                                                   | 6/50 [05:31<39:58, 54.51s/it]

In [33]:
dataset_name = "german"
results = []
for method in ["mapofcem_no_shap", "mapocam"]:
    results_cur = pd.read_csv(f"../results/lr/{dataset_name}/{method}.csv")
    print(results_cur.shape)
    results_cur = summarize_results(results_cur, dataset_name)
    results_cur["method"] = method
    results.append(results_cur)
results = pd.concat(results)
format_df_table(results, "method", results.columns.tolist()[:-1])

(50, 5)
(50, 5)


Unnamed: 0,method,costs,n_changes,diversity,outlier,n_solutions,time
0,mapocam,0.492 (+-0.123) | 0.633,2.282 (+-0.632) | 2.906,0.058 (+-0.085) | 0.227,0.084 (+-0.142) | 0.375,67.36 (+-67.571) | 193.35,38.489 (+-21.449) | 72.263
1,mapofcem_no_shap,0.487 (+-0.132) | 0.633,2.291 (+-0.628) | 2.889,0.076 (+-0.106) | 0.283,0.043 (+-0.083) | 0.263,62.68 (+-65.862) | 193.35,40.887 (+-23.253) | 70.634


In [34]:
dataset_name = "taiwan"
results = []
for method in ["mapofcem_no_shap", "mapocam"]:
    results_cur = pd.read_csv(f"../results/lr/{dataset_name}/{method}.csv")
    print(results_cur.shape)
    results_cur = summarize_results(results_cur, dataset_name)
    results_cur["method"] = method
    results.append(results_cur)
results = pd.concat(results)
format_df_table(results, "method", results.columns.tolist()[:-1])

(50, 5)
(50, 5)


Unnamed: 0,method,costs,n_changes,outlier,diversity,n_solutions,time
0,mapocam,0.711 (+-0.14) | 0.894,2.941 (+-0.061) | 2.99,0.169 (+-0.153) | 0.473,0.288 (+-0.319) | 0.849,414.78 (+-189.785) | 658.4,206.358 (+-51.951) | 264.383
1,mapofcem_no_shap,0.695 (+-0.141) | 0.89,2.93 (+-0.079) | 2.988,0.019 (+-0.051) | 0.067,0.307 (+-0.313) | 0.859,320.94 (+-165.554) | 570.65,185.058 (+-47.722) | 233.4


In [35]:
dataset_name = "adult"
results = []
for method in ["mapofcem_no_shap", "mapocam"]:
    results_cur = pd.read_csv(f"../results/lr/{dataset_name}/{method}.csv")
    print(results_cur.shape)
    results_cur = summarize_results(results_cur, dataset_name)
    results_cur["method"] = method
    results.append(results_cur)
results = pd.concat(results)
format_df_table(results, "method", results.columns.tolist()[:-1])

(50, 5)
(50, 5)


Unnamed: 0,method,costs,n_changes,outlier,diversity,n_solutions,time
0,mapocam,0.655 (+-0.121) | 0.82,1.976 (+-0.506) | 2.653,0.025 (+-0.081) | 0.175,0.997 (+-0.004) | 1.0,26.9 (+-27.245) | 93.65,0.85 (+-1.295) | 4.05
1,mapofcem_no_shap,0.655 (+-0.12) | 0.814,1.974 (+-0.503) | 2.646,0.004 (+-0.018) | 0.016,0.997 (+-0.002) | 1.0,23.24 (+-17.843) | 62.0,0.882 (+-1.309) | 4.389
