In [198]:
import pandas as pd
import sys
sys.path.append("../")

from cfmining.algorithms import MAPOFCEM
from cfmining.predictors import GeneralClassifier_Shap
from cfmining.action_set import ActionSet
from cfmining.utils import get_data_model

from experiments_helper import run_experiments, summarize_results, format_df_table


%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [199]:
SEED = 0

## German

In [201]:
X_train, Y_train, model, outlier_detection, individuals = get_data_model("german", "LGBMClassifier")
individuals = individuals.sample(n = 50, random_state=SEED)
not_mutable_features = ['Age', 'OwnsHouse', 'isMale', 'JobClassIsSkilled', 'Single', 'ForeignWorker', 'RentsHouse']
mutable_features = [feat for feat in X_train.columns if feat not in not_mutable_features]
model_shap = GeneralClassifier_Shap(model, outlier_detection, X_train, shap_explainer="tree", threshold = 0.5)

In [202]:
step_size_list = [0.01, 0.05, 0.1, 0.15, 0.2]
for step_size in step_size_list:
    perc = int(step_size * 1000)
    action_set = ActionSet(X = X_train, default_step_size = step_size, mutable_features = mutable_features)#, default_step_type = "percentile")
    method = MAPOFCEM(
        action_set,
        model_shap,
        compare = "percentile",
        estimate_prob_max = False,
        estimate_outlier=True,
        max_changes = 3,
        outlier_percentile=0.05
    )
    run_experiments(
        method,
        individuals,
        model_shap,
        f"../results/step_size/german/mapofcem_v2_{perc}.csv"
    )


100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 50/50 [02:01<00:00,  2.43s/it]
100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 50/50 [00:16<00:00,  3.02it/s]
100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 50/50 [00:09<00:00,  5.52it/s]
100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 50/50 [00:06<00:00,  7.82it/s]
100%|███████████████

### Results

In [203]:
step_size_list = [0.01, 0.05, 0.1, 0.15, 0.2]
all_results = []
for i, step_size in enumerate(step_size_list):
    perc = int(step_size * 1000)

    results = pd.read_csv(f"../results/step_size/german/mapofcem_v2_{perc}.csv")
    results = summarize_results(results, dataset = "german", outlier_percentile=0.05)
    results["step_size"] = step_size
    results["method"] = f"MAPOFCEM_v2_{i}"
    all_results.append(results)

results = pd.concat(all_results)
format_df_table(results, "method", results.columns[:-1])

Unnamed: 0,method,costs,n_changes,outlier,outliers_score,diversity,n_solutions,time,step_size
0,MAPOFCEM_v2_0,0.096 (+-0.144) | 0.287,2.021 (+-0.863) | 3.0,0.062 (+-0.245) | 0.65,0.504 (+-0.036) | 0.558,0.0 (+-0.0) | 0.0,0.96 (+-0.198) | 1.0,2.427 (+-5.33) | 10.264,0.01 (+-0.0) | 0.01
1,MAPOFCEM_v2_1,0.114 (+-0.156) | 0.396,1.833 (+-0.808) | 3.0,0.062 (+-0.245) | 0.65,0.505 (+-0.034) | 0.558,0.0 (+-0.0) | 0.0,0.96 (+-0.198) | 1.0,0.329 (+-0.635) | 1.207,0.05 (+-0.0) | 0.05
2,MAPOFCEM_v2_2,0.137 (+-0.174) | 0.5,1.729 (+-0.765) | 3.0,0.042 (+-0.202) | 0.0,0.506 (+-0.034) | 0.558,0.0 (+-0.0) | 0.0,0.96 (+-0.198) | 1.0,0.179 (+-0.33) | 0.923,0.1 (+-0.0) | 0.1
3,MAPOFCEM_v2_3,0.137 (+-0.164) | 0.452,1.771 (+-0.751) | 3.0,0.021 (+-0.144) | 0.0,0.509 (+-0.031) | 0.551,0.0 (+-0.0) | 0.0,0.96 (+-0.198) | 1.0,0.126 (+-0.232) | 0.419,0.15 (+-0.0) | 0.15
4,MAPOFCEM_v2_4,0.178 (+-0.204) | 0.532,1.915 (+-0.686) | 3.0,0.064 (+-0.247) | 0.7,0.505 (+-0.033) | 0.557,0.0 (+-0.0) | 0.0,0.94 (+-0.24) | 1.0,0.149 (+-0.273) | 0.543,0.2 (+-0.0) | 0.2


## Taiwan

In [189]:
X_train, Y_train, model, outlier_detection, individuals = get_data_model("taiwan", "LGBMClassifier")
individuals = individuals.sample(n = 50, random_state=SEED)
not_mutable_features = ['Single', 'Age_in_25_to_40', 'Married', 'Age_lt_25', 'Age_in_40_to_59', 'Age_geq_60', 'EducationLevel']
mutable_features = [feat for feat in X_train.columns if feat not in not_mutable_features]
model_shap = GeneralClassifier_Shap(model, outlier_detection, X_train, shap_explainer="tree", threshold = 0.5)

In [190]:
step_size_list = [0.01, 0.05, 0.1, 0.15, 0.2]
for step_size in step_size_list:
    perc = int(step_size * 1000)
    action_set = ActionSet(X = X_train, default_step_size = step_size, mutable_features = mutable_features) #, default_step_type = "percentile")
    method = MAPOFCEM(
        action_set,
        model_shap,
        compare = "percentile",
        estimate_prob_max = False,
        estimate_outlier=True,
        max_changes = 3,
        outlier_percentile=0.01
    )
    run_experiments(
        method,
        individuals,
        model_shap,
        f"../results/step_size/taiwan/mapofcem_v2_{perc}.csv"
    )



100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 50/50 [15:13<00:00, 18.28s/it]
100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 50/50 [10:35<00:00, 12.70s/it]
100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 50/50 [03:45<00:00,  4.52s/it]
100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 50/50 [02:14<00:00,  2.70s/it]
100%|███████████████

### Results

In [188]:
step_size_list = [0.1, 0.5, 1, 5, 10, 20]
all_results = []
for i, step_size in enumerate(step_size_list):
    perc = int(step_size * 1000)

    results = pd.read_csv(f"../results/step_size/taiwan/mapofcem_v2_{perc}.csv")
    results = summarize_results(results, dataset = "taiwan", outlier_percentile=0.05)
    results["step_size"] = step_size
    results["method"] = f"MAPOFCEM_v2_{i}"
    all_results.append(results)

results = pd.concat(all_results)
format_df_table(results, "method", results.columns[:-1])

Unnamed: 0,method,costs,n_changes,outlier,outliers_score,diversity,n_solutions,time,step_size
0,MAPOFCEM_v2_0,0.036 (+-0.04),2.604 (+-0.644),0.042 (+-0.202),0.45 (+-0.043),0.0 (+-0.0),0.96 (+-0.198),16.454 (+-84.406),0.1 (+-0.0)
1,MAPOFCEM_v2_1,0.036 (+-0.04),2.604 (+-0.644),0.042 (+-0.202),0.45 (+-0.043),0.0 (+-0.0),0.96 (+-0.198),16.449 (+-84.411),0.5 (+-0.0)
2,MAPOFCEM_v2_2,0.036 (+-0.04),2.604 (+-0.644),0.042 (+-0.202),0.45 (+-0.043),0.0 (+-0.0),0.96 (+-0.198),16.451 (+-84.402),1.0 (+-0.0)
3,MAPOFCEM_v2_3,0.065 (+-0.063),2.224 (+-0.919),0.041 (+-0.2),0.45 (+-0.047),0.0 (+-0.0),0.98 (+-0.141),6.452 (+-41.688),5.0 (+-0.0)
4,MAPOFCEM_v2_4,0.096 (+-0.108),1.633 (+-0.859),0.061 (+-0.242),0.45 (+-0.049),0.0 (+-0.0),0.98 (+-0.141),1.64 (+-10.027),10.0 (+-0.0)
5,MAPOFCEM_v2_5,0.169 (+-0.111),1.592 (+-0.814),0.061 (+-0.242),0.449 (+-0.05),0.0 (+-0.0),0.98 (+-0.141),0.289 (+-1.007),20.0 (+-0.0)
