In [1]:
import copy
import pandas as pd
import sys
sys.path.append("../")

from cfmining.algorithms import MAPOFCEM
from cfmining.predictors import GeneralClassifier_Shap
from cfmining.action_set import ActionSet
from cfmining.utils import get_data_model, DeepPipeExplainer
from cfmining.baselines import Bruteforce, MAPOCAM, Nice, Dice

from experiments_helper import run_experiments, format_df_table, summarize_results


%load_ext autoreload
%autoreload 2

In [2]:
SEED = 0

## German

In [3]:
X_train, Y_train, model, outlier_detection, individuals = get_data_model("german", "MLPClassifier")
individuals = individuals.sample(n = 50, random_state=SEED)

In [4]:
not_mutable_features = ['Age', 'OwnsHouse', 'isMale', 'JobClassIsSkilled', 'Single', 'ForeignWorker', 'RentsHouse']
mutable_features = [feat for feat in X_train.columns if feat not in not_mutable_features]
continuous_features = ['Age', 'LoanDuration', 'LoanAmount', 'LoanRateAsPercentOfIncome','YearsAtCurrentHome', "NumberOfLiableIndividuals", "NumberOfOtherLoansAtBank"]
categoric_features = [col for col in X_train.columns if col not in continuous_features]

action_set = ActionSet(X = X_train, default_step_size = 0.05, mutable_features = mutable_features)
deep_pipe_explainer = DeepPipeExplainer(model, X_train.sample(100))
model_shap = GeneralClassifier_Shap(model, outlier_detection, X_train, shap_explainer="custom", threshold = 0.5, explainer = deep_pipe_explainer)
model_shap_simple = GeneralClassifier_Shap(model, outlier_detection, X_train)

### MAPOFCEM

In [None]:
method = MAPOFCEM(
    action_set = action_set,
    classifier = model_shap,
    compare = "percentile",
    max_changes = 3
)

run_experiments(
    method,
    individuals=individuals,
    model=model_shap,
    output_file=f"../results/mlp/german/mapofcem_percentile.csv"
);

In [None]:
method = MAPOFCEM(
    action_set = action_set,
    classifier = model_shap_simple,
    compare = "percentile",
    max_changes = 3
)

run_experiments(
    method,
    individuals=individuals,
    model=model_shap,
    output_file=f"../results/mlp/german/mapofcem_perm_percentile.csv"
);

### MAPOCAM

In [None]:
model_shap.use_predict_max = False # to not use max prediction
# little fix to action set
action_set_ = copy.deepcopy(action_set)
for feat in action_set_:
    feat.flip_direction = 1
    feat.update_grid()

method = MAPOCAM(
    action_set_,
    model_shap,
    criteria = "percentile",
    max_changes=3
)

run_experiments(
    method,
    individuals=individuals,
    model=model_shap,
    output_file=f"../results/mlp/german/mapocam_percentile.csv"
);

### Bruteforce

In [5]:
model_shap.use_predict_max = False # to not use prediction max
# little fix to action set
action_set_ = copy.deepcopy(action_set)
for feat in action_set_:
    feat.flip_direction = 1
    feat.update_grid()


method = Bruteforce(
    action_set_,
    model_shap,
    criteria = "percentile",
    max_changes=3
)

run_experiments(
    method,
    individuals=individuals,
    model=model_shap,
    output_file=f"../results/mlp/german/bruteforce_percentile.csv"
)

  6%|█████████                                                                                                                                               | 3/50 [02:52<44:52, 57.29s/it]

### DICE

In [None]:
method = Dice(
    X_train,
    Y_train,
    model_shap.clf,
    n_cfs = 1,
    mutable_features = mutable_features,
    continuous_features = continuous_features,
)

run_experiments(
    method,
    individuals = individuals,
    model = model_shap,
    output_file=f"../results/mlp/german/dice.csv"
);

### NICE

In [None]:
method = Nice(
    X_train,
    Y_train,
    model = model,
    cat_features = categoric_features,
)

run_experiments(
    method,
    individuals = individuals,
    model = model_shap,
    output_file=f"../results/mlp/german/nice.csv"
);

### Results

In [8]:
dataset = "german"
results = []
for method in ["mapofcem_percentile", "mapofcem_perm_percentile", "bruteforce_percentile", "mapocam_percentile", "dice", "nice"]:
    results_cur = pd.read_csv(f"../results/mlp/{dataset}/{method}.csv")
    results_cur = summarize_results(results_cur, dataset, 0.05)
    results_cur["method"] = method
    results.append(results_cur)
results = pd.concat(results)
format_df_table(results, "method", results.columns.tolist()[:-1])

Unnamed: 0,method,costs,n_changes,outlier,outliers_score,diversity,n_solutions,time
0,bruteforce_percentile,0.082 (+-0.123) | 0.23,1.42 (+-0.609) | 2.55,0.24 (+-0.431) | 1.0,0.526 (+-0.047) | 0.603,0.0 (+-0.0) | 0.0,1.0 (+-0.0) | 1.0,57.597 (+-0.834) | 59.299
1,dice,0.46 (+-0.276) | 0.91,2.04 (+-0.727) | 2.55,0.2 (+-0.404) | 1.0,0.519 (+-0.055) | 0.605,0.0 (+-0.0) | 0.0,1.0 (+-0.0) | 1.0,0.208 (+-0.043) | 0.251
2,mapocam_percentile,0.082 (+-0.123) | 0.23,1.78 (+-0.815) | 3.0,0.24 (+-0.431) | 1.0,0.524 (+-0.046) | 0.596,0.0 (+-0.0) | 0.0,1.0 (+-0.0) | 1.0,1.438 (+-6.382) | 4.702
3,mapofcem_percentile,0.081 (+-0.085) | 0.249,1.729 (+-0.765) | 3.0,0.146 (+-0.357) | 1.0,0.514 (+-0.037) | 0.567,0.0 (+-0.0) | 0.0,0.96 (+-0.198) | 1.0,0.414 (+-0.626) | 0.985
4,mapofcem_perm_percentile,0.081 (+-0.085) | 0.249,1.75 (+-0.758) | 3.0,0.146 (+-0.357) | 1.0,0.514 (+-0.037) | 0.567,0.0 (+-0.0) | 0.0,0.96 (+-0.198) | 1.0,0.745 (+-1.02) | 2.081
5,nice,0.486 (+-0.251) | 0.868,2.1 (+-1.568) | 5.1,0.04 (+-0.198) | 0.0,0.468 (+-0.054) | 0.554,0.0 (+-0.0) | 0.0,1.0 (+-0.0) | 1.0,0.005 (+-0.003) | 0.011


## Taiwan

In [None]:
X_train, Y_train, model, outlier_detection, individuals = get_data_model("taiwan", "MLPClassifier")
individuals = individuals.sample(n = 50, random_state=SEED)

In [None]:
not_mutable_features = ['Single', 'Age_in_25_to_40', 'Married', 'Age_lt_25', 'Age_in_40_to_59', 'Age_geq_60', 'EducationLevel']
mutable_features = [feat for feat in X_train.columns if feat not in not_mutable_features]
continuous_features = ["LIMIT_BAL", "BILL_AMT1", "BILL_AMT2", "BILL_AMT3",
                       "BILL_AMT4", "BILL_AMT5", "BILL_AMT6", "PAY_AMT1",
                       "PAY_AMT2", "PAY_AMT3", "PAY_AMT4", "PAY_AMT5",
                       "PAY_AMT6", "MaxBillAmountOverLast6Months", "MaxPaymentAmountOverLast6Months",
                       "MostRecentBillAmount", "MostRecentPaymentAmount", "MostRecentPaymentAmount", "TotalMonthsOverdue",
                       "MonthsWithZeroBalanceOverLast6Months", "MonthsWithLowSpendingOverLast6Months", "MonthsWithHighSpendingOverLast6Months", "TotalOverdueCounts"]
categoric_features = [col for col in X_train.columns if col not in continuous_features]
action_set = ActionSet(X = X_train, default_step_size = 0.05, mutable_features = mutable_features)
deep_pipe_explainer = DeepPipeExplainer(model, X_train.sample(100))
model_shap = GeneralClassifier_Shap(model, outlier_detection, X_train, shap_explainer="custom", threshold = 0.5, explainer = deep_pipe_explainer)
model_shap_simple = GeneralClassifier_Shap(model, outlier_detection, X_train)

PermutationExplainer explainer: 1001it [00:29, 23.46it/s]                                                                                                                                                                                           


### MAPOFCEM

In [13]:
method = MAPOFCEM(
    action_set = action_set,
    classifier = model_shap,
    compare = "percentile",
    max_changes = 3,
    outlier_percentile = 0.01
)

run_experiments(
    method,
    individuals=individuals,
    model=model_shap,
    output_file=f"../results/mlp/taiwan/mapofcem_percentile.csv"
);

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 50/50 [50:12<00:00, 60.25s/it]


In [14]:
method = MAPOFCEM(
    action_set = action_set,
    classifier = model_shap_simple,
    compare = "percentile",
    max_changes = 3,
    outlier_percentile=0.01
)

run_experiments(
    method,
    individuals=individuals,
    model=model_shap,
    output_file=f"../results/mlp/taiwan/mapofcem_perm_percentile.csv"
);

 76%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                                                 | 38/50 [52:12<19:14, 96.18s/it]

### MAPOCAM

In [None]:
model_shap.use_predict_max = False # to not use max prediction
# little fix to action set
action_set_ = copy.deepcopy(action_set)
for feat in action_set_:
    feat.flip_direction = 1
    feat.update_grid()

method = MAPOCAM(
    action_set_,
    model_shap,
    criteria = "percentile",
    max_changes=3
)

run_experiments(
    method,
    individuals=individuals,
    model=model_shap,
    output_file=f"../results/mlp/taiwan/mapocam_percentile.csv"
);

  0%|                                                                                                                                   | 0/50 [00:00<?, ?it/s]

100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 50/50 [26:51<00:00, 32.22s/it]


### Bruteforce

In [None]:
model_shap.use_predict_max = False # to not use prediction max
# little fix to action set
action_set_ = copy.deepcopy(action_set)
for feat in action_set_:
    feat.flip_direction = 1
    feat.step_size = 0.25
    feat.update_grid()


method = Bruteforce(
    action_set_,
    model_shap,
    criteria = "percentile",
    max_changes=3
)

run_experiments(
    method,
    individuals=individuals,
    model=model_shap,
    output_file=f"../results/mlp/taiwan/bruteforce_percentile.csv"
)

### DICE

In [None]:
method = Dice(
    X_train,
    Y_train,
    model_shap.clf,
    n_cfs = 1,
    mutable_features = mutable_features,
    continuous_features = continuous_features,
)

run_experiments(
    method,
    individuals = individuals,
    model = model_shap,
    output_file=f"../results/mlp/taiwan/dice.csv"
)

### NICE

In [None]:
method = Nice(
    X_train,
    Y_train,
    model = model,
    cat_features = categoric_features,
)

run_experiments(
    method,
    individuals = individuals,
    model = model_shap,
    output_file=f"../results/mlp/taiwan/nice.csv"
)

### Results

In [11]:
dataset = "taiwan"
results = []
for method in ["mapofcem_percentile", "mapofcem_perm_percentile", "bruteforce_percentile", "mapocam_percentile", "dice", "nice"]:
    results_cur = pd.read_csv(f"../results/mlp/{dataset}/{method}.csv")
    results_cur = summarize_results(results_cur, dataset, 0.05)
    results_cur["method"] = method
    results.append(results_cur)
results = pd.concat(results)
format_df_table(results, "method", results.columns.tolist()[:-1])

Unnamed: 0,method,costs,n_changes,outlier,outliers_score,diversity,n_solutions,time
0,bruteforce_percentile,0.674 (+-0.24) | 0.884,2.06 (+-0.867) | 3.0,0.04 (+-0.198) | 0.0,0.462 (+-0.041) | 0.521,0.0 (+-0.0) | 0.0,1.0 (+-0.0) | 1.0,381.807 (+-32.806) | 419.816
1,dice,0.662 (+-0.255) | 0.931,1.48 (+-0.544) | 2.0,0.04 (+-0.198) | 0.0,0.483 (+-0.04) | 0.542,0.0 (+-0.0) | 0.0,1.0 (+-0.0) | 1.0,0.265 (+-0.183) | 0.258
2,mapocam_percentile,0.202 (+-0.107) | 0.409,2.96 (+-0.283) | 3.0,0.02 (+-0.141) | 0.0,0.437 (+-0.038) | 0.503,0.0 (+-0.0) | 0.0,1.0 (+-0.0) | 1.0,121.675 (+-275.435) | 525.857
3,mapofcem_percentile,0.204 (+-0.111) | 0.409,2.96 (+-0.283) | 3.0,0.02 (+-0.141) | 0.0,0.437 (+-0.038) | 0.503,0.0 (+-0.0) | 0.0,1.0 (+-0.0) | 1.0,60.227 (+-60.41) | 180.038
4,mapofcem_perm_percentile,0.2 (+-0.106) | 0.409,2.98 (+-0.141) | 3.0,0.02 (+-0.141) | 0.0,0.437 (+-0.038) | 0.503,0.0 (+-0.0) | 0.0,1.0 (+-0.0) | 1.0,73.722 (+-63.901) | 180.108
5,nice,0.51 (+-0.24) | 0.848,5.48 (+-3.052) | 10.0,0.0 (+-0.0) | 0.0,0.441 (+-0.036) | 0.494,0.0 (+-0.0) | 0.0,1.0 (+-0.0) | 1.0,0.019 (+-0.005) | 0.028
