In [32]:
import copy
import pandas as pd
import numpy as np
import sys
sys.path.append("../")

from cfmining.algorithms import MAPOFCEM
from cfmining.predictors import GeneralClassifier_Shap, GeneralClassifier
from cfmining.action_set import ActionSet
from cfmining.baselines import Bruteforce, MAPOCAM, Nice, Dice
from cfmining.criteria import *

from experiments_helper import get_data_model, run_experiments, format_df_table, summarize_results, get_action_set


%load_ext autoreload
%autoreload 2

SEED = 0

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


## 1 objective - MPC

In [2]:
max_changes = 3
for dataset_name in [
    "german", 
    "taiwan",
    "adult", 
    ]:
    dataset, X_train, Y_train, model, outlier_detection, individuals = get_data_model(dataset_name, "SVC")
    individuals = individuals.sample(n = 50, random_state=SEED)
    outlier_detection.contamination = dataset.outlier_contamination
    action_set = get_action_set(dataset, X_train, default_step_size=0.05)

    model_wrap = GeneralClassifier_Shap(
        model,
        outlier_detection,
        X_train,
        shap_explainer="permutation",
    )

    method = MAPOFCEM(
        action_set = action_set,
        classifier = model_wrap,
        compare = "percentile",
        max_changes = max_changes,
        outlier_contamination = dataset.outlier_contamination,
        estimate_outlier=True,
        time_limit=6 * 60,
    )

    run_experiments(
        method,
        individuals=individuals,
        model=model_wrap,
        output_file=f"../results/svc/{dataset}/mapofcem_percentile.csv"
    );


PermutationExplainer explainer: 101it [00:57,  1.50it/s]                                                                                                                                                 
100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 50/50 [00:54<00:00,  1.10s/it]
PermutationExplainer explainer: 101it [24:27, 14.68s/it]                                                                                                                                                 
100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 50/50 [2:30:37<00:00, 180.74s/it]
PermutationExplainer explainer: 101it [23:36, 14.16s/it]                                                                                                                                        

In [63]:
max_changes = 3
for dataset_name in [
    "german", 
    "taiwan",
    "adult", 
    ]:
    dataset, X_train, Y_train, model, outlier_detection, individuals = get_data_model(dataset_name, "SVC")
    individuals = individuals.sample(n = 50, random_state=SEED)
    outlier_detection.contamination = dataset.outlier_contamination
    action_set = get_action_set(dataset, X_train, default_step_size=0.05)

    model_wrap = GeneralClassifier_Shap(
        model,
        outlier_detection,
        X_train,
        shap_explainer="kernel",
    )

    method = MAPOFCEM(
        action_set = action_set,
        classifier = model_wrap,
        compare = "percentile",
        max_changes = max_changes,
        outlier_contamination = dataset.outlier_contamination,
        estimate_outlier=True,
        time_limit=6 * 60,
    )

    run_experiments(
        method,
        individuals=individuals,
        model=model_wrap,
        output_file=f"../results/svc/{dataset}/mapofcem_kern_percentile.csv"
    );


In [3]:
max_changes = 3
for dataset_name in ["german", "taiwan", "adult"]:
    dataset, X_train, Y_train, model, outlier_detection, individuals = get_data_model(dataset_name, "SVC")
    individuals = individuals.sample(n = 50, random_state=SEED)
    outlier_detection.contamination = dataset.outlier_contamination
    action_set = get_action_set(dataset, X_train, default_step_size=0.05)
    for feat in action_set:
        feat.flip_direction = 1
        feat.update_grid()

    model_wrap = GeneralClassifier(
        model,
        outlier_detection,
        X_train,
    )

    method = MAPOCAM(
        action_set = action_set,
        model = model_wrap,
        criteria = "percentile",
        max_changes = max_changes,
        time_limit=6 * 60,
    )

    run_experiments(
        method,
        individuals=individuals,
        model=model_wrap,
        output_file=f"../results/svc/{dataset_name}/mapocam_percentile.csv"
    );


PermutationExplainer explainer: 101it [00:58,  1.42it/s]                                                                                                                                                 
100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 50/50 [1:26:20<00:00, 103.60s/it]
PermutationExplainer explainer: 101it [24:25, 14.66s/it]                                                                                                                                                 
100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 50/50 [18:33<00:00, 22.27s/it]
ExactExplainer explainer: 101it [01:06,  1.29it/s]                                                                                                                                              

In [None]:
for dataset_name in ["german", "taiwan", "adult"]:
    dataset, X_train, Y_train, model, outlier_detection, individuals = get_data_model(dataset_name, "SVC")
    individuals = individuals.sample(n = 50, random_state=SEED)
    outlier_detection.contamination = dataset.outlier_contamination

    model_wrap = GeneralClassifier(
        model,
        outlier_detection,
        X_train,
    )

    method = Nice(
        X_train,
        Y_train,
        model = model,
        cat_features = dataset.categoric_features,
    )

    run_experiments(
        method,
        individuals = individuals,
        model = model_wrap,
        output_file=f"../results/svc/{dataset_name}/nice.csv"
    );

In [None]:
for dataset_name in ["german", "taiwan", "adult"]:
    dataset, X_train, Y_train, model, outlier_detection, individuals = get_data_model(dataset_name, "SVC")
    individuals = individuals.sample(n = 50, random_state=SEED)
    outlier_detection.contamination = dataset.outlier_contamination

    model_wrap = GeneralClassifier(
        model,
        outlier_detection,
        X_train,
    )


    method = Dice(
        X_train,
        Y_train,
        model,
        n_cfs = 1,
        mutable_features = dataset.mutable_features,
        continuous_features = dataset.continuous_features,
    )

    run_experiments(
        method,
        individuals = individuals,
        model = model_wrap,
        output_file=f"../results/svc/{dataset_name}/dice.csv"
    )


In [12]:
dataset = "german"
results = []
for method in ["mapofcem_kern_percentile", "mapocam_percentile"]:
    results_cur = pd.read_csv(f"../results/svc/{dataset}/{method}.csv")
    results_cur = summarize_results(results_cur, dataset)
    results_cur["method"] = method
    results.append(results_cur)
results = pd.concat(results)
format_df_table(results, "method", results.columns.tolist()[:-1])

Unnamed: 0,method,percentile_costs,lp_costs,max_dist_costs,n_changes,outlier,diversity,n_solutions,time
0,mapocam_percentile,0.002 (+-0.008) | 0.0,0.021 (+-0.142) | 0.0,0.021 (+-0.141) | 0.0,0.06 (+-0.314) | 0.0,0.04 (+-0.198) | 0.0,0.0 (+-0.0) | 0.0,1.0 (+-0.0) | 1.0,0.013 (+-0.027) | 0.025
1,mapofcem_kern_percentile,0.005 (+-0.021) | 0.041,0.066 (+-0.258) | 0.623,0.062 (+-0.242) | 0.622,0.163 (+-0.59) | 1.6,0.0 (+-0.0) | 0.0,0.0 (+-0.0) | 0.0,0.98 (+-0.141) | 1.0,2.24 (+-7.766) | 3.106


In [8]:
dataset = "taiwan"
results = []
for method in ["mapofcem_kern_percentile", "mapocam_percentile"]:
    results_cur = pd.read_csv(f"../results/svc/{dataset}/{method}.csv")
    results_cur = summarize_results(results_cur, dataset)
    results_cur["method"] = method
    results.append(results_cur)
results = pd.concat(results)
format_df_table(results, "method", results.columns.tolist()[:-1])

Unnamed: 0,method,percentile_costs,lp_costs,max_dist_costs,n_changes,outlier,diversity,n_solutions,time
0,mapocam_percentile,0.0 (+-0.0) | 0.0,0.0 (+-0.0) | 0.0,0.0 (+-0.0) | 0.0,0.0 (+-0.0) | 0.0,0.02 (+-0.141) | 0.0,0.0 (+-0.0) | 0.0,1.0 (+-0.0) | 1.0,0.009 (+-0.013) | 0.011
1,mapofcem_kern_percentile,0.007 (+-0.048) | 0.0,0.02 (+-0.141) | 0.0,0.02 (+-0.141) | 0.0,0.02 (+-0.141) | 0.0,0.0 (+-0.0) | 0.0,0.0 (+-0.0) | 0.0,1.0 (+-0.0) | 1.0,95.735 (+-161.695) | 73.041


In [9]:
dataset = "adult"
results = []
for method in ["mapofcem_kern_percentile", "mapocam_percentile"]:
    results_cur = pd.read_csv(f"../results/svc/{dataset}/{method}.csv")
    results_cur = summarize_results(results_cur, dataset)
    results_cur["method"] = method
    results.append(results_cur)
results = pd.concat(results)
format_df_table(results, "method", results.columns.tolist()[:-1])

Unnamed: 0,method,percentile_costs,lp_costs,max_dist_costs,n_changes,outlier,diversity,n_solutions,time
0,mapocam_percentile,0.787 (+-0.071) | 0.874,0.981 (+-0.145) | 1.167,0.867 (+-0.135) | 1.0,1.96 (+-0.198) | 2.0,0.12 (+-0.328) | 1.0,0.0 (+-0.0) | 0.0,1.0 (+-0.0) | 1.0,3.831 (+-3.857) | 9.918
1,mapofcem_kern_percentile,0.803 (+-0.072) | 0.874,1.01 (+-0.211) | 1.364,0.827 (+-0.145) | 1.0,2.163 (+-0.373) | 3.0,0.0 (+-0.0) | 0.0,0.0 (+-0.0) | 0.0,0.98 (+-0.141) | 1.0,94.435 (+-85.08) | 257.938


## Multi-objectives

In [None]:
max_changes = 3
for dataset_name in [
    "german",
    "taiwan", 
    "adult"
    ]:
    dataset, X_train, Y_train, model, outlier_detection, individuals = get_data_model(dataset_name, "SVC")
    individuals = individuals.sample(n = 50, random_state=SEED)
    outlier_detection.contamination = dataset.outlier_contamination
    action_set = get_action_set(dataset, X_train, default_step_size=0.05)

    model_wrap = GeneralClassifier_Shap(
        model,
        outlier_detection,
        X_train,
        shap_explainer="permutation",
    )

    #setting multiple criteria
    range_calc = RangeCalculator(action_set)
    perc_calc = PercentileCalculator(action_set = action_set)

    def compare_call(pivot):
        criteria_list = [
            MaxDistCriterion(
                pivot,
                range_calc,
            ),
            NumberChangesCriterion(pivot),
            PercentileCriterion(
                pivot,
                perc_calc,
            )
        ]
        return MultiCriterion(criteria_list, pivot)

    method = MAPOFCEM(
        action_set = action_set,
        classifier = model_wrap,
        compare = compare_call,
        max_changes = max_changes,
        outlier_contamination= dataset.outlier_contamination,
        estimate_outlier=True,
        time_limit=np.inf,
    )

    run_experiments(
        method,
        individuals=individuals,
        model=model_wrap,
        output_file=f"../results/svc/{dataset}/mapofcem_perm_multi.csv"
    );


In [None]:
max_changes = 3
for dataset_name in [
    "german", 
    "taiwan", 
    "adult"
    ]:
    dataset, X_train, Y_train, model, outlier_detection, individuals = get_data_model(dataset_name, "SVC")
    individuals = individuals.sample(n = 50, random_state=SEED)
    outlier_detection.contamination = dataset.outlier_contamination
    action_set = get_action_set(dataset, X_train, default_step_size=0.05)
    for feat in action_set:
        feat.flip_direction = 1
        feat.update_grid()

    model_wrap = GeneralClassifier(
        model,
        outlier_detection,
        X_train,
    )

    #setting multiple criteria
    range_calc = RangeCalculator(action_set)
    perc_calc = PercentileCalculator(action_set = action_set)

    def compare_call(pivot):
        criteria_list = [
            MaxDistCriterion(
                pivot,
                range_calc,
            ),
            NumberChangesCriterion(pivot),
            PercentileCriterion(
                pivot,
                perc_calc,
            )
        ]
        return MultiCriterion(criteria_list, pivot)
    
    method = MAPOCAM(
        action_set = action_set,
        model = model_wrap,
        criteria = compare_call,
        max_changes = max_changes,
    )

    run_experiments(
        method,
        individuals=individuals,
        model=model_wrap,
        output_file=f"../results/svc/{dataset_name}/mapocam_multi.csv"
    );
