In [94]:
import pandas as pd
import numpy as np
import sys
sys.path.append("../")

from cfmining.algorithms import P2CE
from cfmining.predictors import MonotoneClassifier
from cfmining.baselines import MAPOCAM

from experiments_helper import get_data_model, run_experiments, format_df_table, summarize_results, get_action_set


%load_ext autoreload
%autoreload 2

SEED = 0

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [None]:
max_changes = 3
objective = "abs_diff"
for dataset_name in [
    "german", 
    "taiwan",
    "adult"
    ]:
    dataset, X_train, Y_train, model, outlier_detection, individuals = get_data_model(dataset_name, "LogisticRegression")
    individuals = individuals.sample(n = 50, random_state=SEED)
    outlier_detection.contamination = dataset.outlier_contamination
    action_set = get_action_set(dataset, X_train, default_step_size=0.05)

    model = MonotoneClassifier(model, outlier_detection, X = X_train)

    method = P2CE(
        action_set = action_set,
        classifier = model,
        compare = objective,
        max_changes = max_changes,
        outlier_contamination= dataset.outlier_contamination,
        estimate_outlier=True,
        time_limit=np.inf,
    )

    run_experiments(
        method,
        individuals=individuals,
        model=model,
        output_file=f"../results/lr/{dataset}/p2ce_{objective}.csv"
    );

    # ablation without outlier estimation

    method = P2CE(
        action_set = action_set,
        classifier = model,
        compare = objective,
        max_changes = max_changes,
        outlier_contamination= dataset.outlier_contamination,
        estimate_outlier=False,
        time_limit=np.inf,
    )

    run_experiments(
        method,
        individuals=individuals,
        model=model,
        output_file=f"../results/lr/{dataset}/p2ce_ablation_{objective}.csv"
    );



    for feat in action_set:
        feat.flip_direction = 1
        feat.update_grid()
    
    method = MAPOCAM(
        action_set,
        model,
        criteria = objective,
        max_changes=max_changes
    )

    run_experiments(
        method,
        individuals=individuals,
        model=model,
        output_file=f"../results/lr/{dataset}/mapocam_{objective}.csv"
    );

PermutationExplainer explainer: 101it [00:12,  1.42it/s]                                                                                                                                                 
 42%|████████████████████████████████████████████████████████████████████▉                                                                                               | 21/50 [06:06<10:39, 22.05s/it]

In [127]:
method_list = ["p2ce_abs_diff", "p2ce_ablation_abs_diff", "mapocam_abs_diff"]

In [128]:
def show_results(dataset_name, method_list):
    results = []
    for method in method_list:
        results_cur = pd.read_csv(f"../results/lr/{dataset_name}/{method}.csv")
        results_cur = summarize_results(results_cur, dataset_name)
        results_cur["method"] = method
        results.append(results_cur)
    results = pd.concat(results)
    return format_df_table(results, "method", results.columns.tolist()[:-1])

In [129]:
show_results("german", method_list)

In [130]:
show_results("taiwan", method_list)

In [131]:
show_results("adult", method_list)