In [38]:
import json
import os
import numpy as np
import pandas as pd
from tqdm import tqdm
import copy
import time
import sys
sys.path.append("../")

from cfmining.algorithms import MAPOFCEM
from cfmining.criteria import PercentileCalculator, PercentileCriterion, PercentileChangesCriterion, NonDomCriterion
from cfmining.predictors import GeneralClassifier_Shap
from cfmining.action_set import ActionSet
from cfmining.utils import get_data_model


%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [39]:
def get_mapofcem_results(
        mapofcem_setter,
        individuals, 
        model, 
        output_file = None,
    ):
    results = []

    for i in tqdm(range(len(individuals))):
        individual = individuals.iloc[i]
        model.clear_cache()
        mapofcem = mapofcem_setter(individual.values, model)
        start = time.time()
        mapofcem.fit()
        end = time.time()

        solutions = mapofcem.solutions
        solutions = [s.tolist() for s in solutions]
        
        results.append({
            "individual" : individual.values.tolist(),
            "prob" : model.predict_proba(individual.values),
            "time" : end - start,
            "n_solutions" : len(mapofcem.solutions),
            "solutions" : solutions,
        })

        if output_file is not None:
            pd.DataFrame(results).to_csv(output_file, index=False)

        

    results = pd.DataFrame(results)
    if output_file is not None:
        results.to_csv(output_file, index=False)
    else:
        return results

In [40]:
def mapofcem_wrapper(
        action_set,
        criteria,
        estimate_outlier,
        max_changes
):  
    def f_(ind, model):
        if criteria == "Percentile":
            percCalc = PercentileCalculator(action_set = action_set)
            compare = PercentileCriterion(ind, percCalc)

        return MAPOFCEM(
            action_set,
            ind,
            model,
            estimate_outlier=estimate_outlier,
            max_changes=max_changes,
            compare = compare
        )
    return f_

In [41]:
def experiment_step_size(
    dataset_name,
    step_sizes,
    action_sets,
    individuals,
    model_shap
):
    

    for i, step_size in enumerate(step_sizes):
        # MAPOFCEM

        mapofcem_setter = mapofcem_wrapper(
            action_set=action_sets[i],
            criteria="Percentile",
            estimate_outlier=True,
            max_changes=3
        )

        get_mapofcem_results(
            mapofcem_setter=mapofcem_setter,
            individuals=individuals,
            model=model_shap,
            output_file=f"../results/{dataset_name}_step_size/mapofcem_{int(100*step_size)}.csv"
        )


        # MAPOFCEM v2

        # mapofcem_setter = mapofcem_wrapper(
        #     action_set=action_sets[i],
        #     criteria="Percentile",
        #     estimate_outlier=True,
        #     max_changes=3
        # )

        # get_mapofcem_results(
        #     mapofcem_setter=mapofcem_setter,
        #     individuals=individuals,
        #     model=model_shap,
        #     output_file=f"../results/{dataset_name}_step_size/mapofcem_v2_{int(100*step_size)}.csv"
        # )

   

In [42]:
step_sizes = [0.01, 0.05, 0.1, 0.2, 0.25]

## German

In [30]:
X_train, Y_train, model, outlier_detection, individuals = get_data_model("german")
individuals = individuals.sample(50)

In [31]:
not_mutable_features = ['Age', 'OwnsHouse', 'isMale', 'JobClassIsSkilled', 'Single', 'ForeignWorker', 'RentsHouse']
mutable_features = [feat for feat in X_train.columns if feat not in not_mutable_features]
action_set_list = []

for step_size in step_sizes:
    action_set = ActionSet(X = X_train, default_step_size = step_size)

    for feat in action_set:
        if feat.name in not_mutable_features:
            feat.mutable = False
        if not feat.name in not_mutable_features:
            feat.mutable = True

        feat.step_direction = 0
        feat.update_grid()
    
    action_set_list.append(action_set)

In [32]:
model_shap = GeneralClassifier_Shap(model, outlier_detection, X_train, tree = True, threshold = 0.5)

### MAPOFCEM

In [33]:
experiment_step_size(
    dataset_name="german",
    step_sizes=step_sizes,
    action_sets=action_set_list,
    individuals=individuals,
    model_shap=model_shap,
)

100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 50/50 [04:33<00:00,  5.47s/it]
100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 50/50 [00:35<00:00,  1.40it/s]
100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 50/50 [00:19<00:00,  2.55it/s]
100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 50/50 [00:18<00:00,  2.77it/s]
100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 50/50 [00:15<00:00,  3.19it/s]


## Taiwan

In [43]:
X_train, Y_train, model, outlier_detection, individuals = get_data_model("taiwan")
#convert bools to int
X_train = X_train.astype(int)
individuals = individuals.astype(int)
individuals = individuals.sample(50, random_state=0)

In [44]:
not_mutable_features = ['Single', 'Age_in_25_to_40', 'Married', 'Age_lt_25', 'Age_in_40_to_59', 'Age_geq_60', 'EducationLevel']
mutable_features = [feat for feat in X_train.columns if feat not in not_mutable_features]
action_set_list = []

for step_size in step_sizes:
    action_set = ActionSet(X = X_train, default_step_size = step_size)

    for feat in action_set:
        if feat.name in not_mutable_features:
            feat.mutable = False
        if feat.name in mutable_features:
            feat.mutable = True

        feat.step_direction = 0
        feat.update_grid()

    action_set_list.append(action_set)

In [45]:
model_shap = GeneralClassifier_Shap(model, outlier_detection, X_train, tree = True, threshold = 0.5)

### MAPOFCEM

In [None]:
experiment_step_size(
    dataset_name="taiwan",
    step_sizes=step_sizes,
    action_sets=action_set_list,
    individuals=individuals,
    model_shap=model_shap,
)