In [18]:
import copy
import pandas as pd
import numpy as np
import sys
sys.path.append("../")

from cfmining.algorithms import MAPOFCEM
from cfmining.predictors import GeneralClassifier_Shap, GeneralClassifier
from cfmining.action_set import ActionSet
from cfmining.baselines import Bruteforce, MAPOCAM, Nice, Dice
from cfmining.criteria import *

from experiments_helper import run_experiments, format_df_table, summarize_results, get_data_model, get_action_set


%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [19]:
SEED = 0

In [None]:
max_changes = 3
for dataset_name in [
    "german_cat",
    "taiwan_cat",
    "adult_cat"
    ]:
    dataset, X_train, Y_train, model, outlier_detection, individuals = get_data_model(dataset_name, "LGBMClassifier")
    individuals = individuals.sample(n = 50, random_state=SEED)
    outlier_detection.contamination = dataset.outlier_contamination
    action_set = get_action_set(dataset, X_train, default_step_size=0.05)

    model_wrap = GeneralClassifier_Shap(
        model,
        outlier_detection,
        X_train,
        shap_explainer="permutation",
        categorical_features=dataset.categoric_features,
    )
    categorical_features_bool = [col in dataset.categoric_features for col in X_train.columns]


    # setting multiple criteria
    range_calc = RangeCalculator(action_set)
    perc_calc = PercentileCalculator(action_set = action_set)
    def compare_call(pivot):
        criteria_list = [
            MaxDistCriterion(
                pivot,
                range_calc,
                categorical_features_bool,
            ),
            NumberChangesCriterion(pivot),
            PercentileCriterion(
                pivot,
                perc_calc,
            ),
            CategoricDistCriterion(pivot, categorical_features_bool)
        ]
        return MultiCriterion(criteria_list, pivot)
    
    method = MAPOFCEM(
        action_set = action_set,
        classifier = model_wrap,
        compare = compare_call,
        max_changes = max_changes,
        outlier_contamination = dataset.outlier_contamination,
        categorical_features=dataset.categoric_features,
        estimate_outlier=True,
        time_limit=np.inf,
    )

    run_experiments(
        method,
        individuals=individuals,
        model=model_wrap,
        output_file=f"../results/lgbm/{dataset}/mapofcem_tree_multi.csv"
    );


PermutationExplainer explainer: 101it [00:13,  1.98it/s]                                                                                                                                                 
100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 50/50 [21:54<00:00, 26.29s/it]
PermutationExplainer explainer: 101it [00:23,  2.47it/s]                                                                                                                                                 
100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 50/50 [06:23<00:00,  7.68s/it]
PermutationExplainer explainer: 101it [00:35,  2.01it/s]                                                                                                                                        

KeyboardInterrupt: 

In [None]:
for dataset_name in [
    "german_cat", 
    "taiwan_cat", 
    "adult_cat"
    ]:
    dataset, X_train, Y_train, model, outlier_detection, individuals = get_data_model(dataset_name, "LGBMClassifier")
    individuals = individuals.sample(n = 50, random_state=SEED)
    outlier_detection.contamination = dataset.outlier_contamination

    # get number of solutions of mapofcem
    #results_cur = pd.read_csv(f"../results/lgbm/{dataset_name}/mapofcem_tree_multi.csv")
    #results_cur = summarize_results(results_cur, dataset_name)
    #n_cfs = results_cur.n_solutions.max()
    n_cfs = 5

    model_wrap = GeneralClassifier(
        model,
        outlier_detection,
        X_train,
    )

    method = Dice(
        X_train.astype(float),
        Y_train,
        model,
        n_cfs = n_cfs,
        mutable_features = dataset.mutable_features,
        continuous_features = dataset.continuous_features,
    )

    run_experiments(
        method,
        individuals = individuals,
        model = model_wrap,
        output_file=f"../results/mlp/{dataset_name}/dice.csv"
    )
