In [2]:
import json
import os
import copy
import numpy as np
import pandas as pd
from tqdm import tqdm
import time
import sys
sys.path.append("../")

from cfmining.algorithms import MAPOFCEM, BruteForce, MAPOCAM
from cfmining.criteria import PercentileCalculator, PercentileCriterion, PercentileChangesCriterion, NonDomCriterion
from cfmining.predictors import GeneralClassifier_Shap, GeneralClassifier
from cfmining.action_set import ActionSet
from cfmining.utils import get_data_model

import dice_ml


%load_ext autoreload
%autoreload 2

In [18]:
def get_mapofcem_results(
        mapofcem_setter,
        individuals, 
        model, 
        output_file = None,
    ):
    results = []

    for i in tqdm(range(len(individuals))):
        individual = individuals.iloc[i]
        model.clear_cache()
        mapofcem = mapofcem_setter(individual.values, model)
        start = time.time()
        mapofcem.fit()
        end = time.time()

        solutions = mapofcem.solutions
        solutions = [s.tolist() for s in solutions]
        
        results.append({
            "individual" : individual.values.tolist(),
            "prob" : model.predict_proba(individual.values),
            "time" : end - start,
            "n_solutions" : len(mapofcem.solutions),
            "solutions" : solutions,
        })

        if output_file is not None:
            pd.DataFrame(results).to_csv(output_file, index=False)

        

    results = pd.DataFrame(results)
    if output_file is not None:
        results.to_csv(output_file, index=False)
    else:
        return results

In [4]:
def get_bruteforce_results(
    bruteforce_setter,
    individuals,
    model,
    output_file = None,
):
    results = []

    for i in tqdm(range(len(individuals))):
        individual = individuals.iloc[i]
        model.clear_cache()
        m = bruteforce_setter(individual.values, model)
        start = time.time()
        m.fit()
        end = time.time()

        solutions = m.solutions
        solutions = [s.tolist() for s in solutions]
        
        results.append({
            "individual" : individual.values.tolist(),
            "prob" : model.predict_proba(individual.values),
            "time" : end - start,
            "n_solutions" : len(m.solutions),
            "solutions" : solutions,
        })

        if output_file is not None:
            pd.DataFrame(results).to_csv(output_file, index=False)

        

    results = pd.DataFrame(results)
    if output_file is not None:
        results.to_csv(output_file, index=False)
    else:
        return results
    

In [5]:
def get_dice_results(
    dice_model,
    dice_data,
    individuals,
    mutable_features,
    total_CFs,
    sparsity_weight = 0.2,
    output_file = None,
):
    exp = dice_ml.Dice(dice_data, dice_model)
    results = []
    
    for i in tqdm(range(len(individuals))):
        individual = individuals.iloc[[i]]
        start = time.time()
        dice_exp = exp.generate_counterfactuals(
            individual,
            total_CFs = total_CFs,
            desired_class = "opposite",
            sparsity_weight = sparsity_weight,
            features_to_vary=mutable_features,
        )
        end = time.time()

        # convert dice solutions to an adequated format
        solutions = json.loads(dice_exp.to_json())["cfs_list"][0]

        # remove last element of all solutions
        solutions = [solution[:-1] for solution in solutions]

        results.append({
            "individual" : individual.values.tolist(),
            "prob" : dice_model.model.predict_proba(individual.values)[0, 1],
            "time" : end - start,
            "n_solutions" : len(solutions),
            "solutions" : solutions,
        })
    results = pd.DataFrame(results)
    if output_file is not None:
        results.to_csv(output_file, index=False)
    else:
        return results

In [6]:
def mapofcem_wrapper(
        action_set,
        criteria,
        criteria_param,
        estimate_outlier,
        max_changes
):  
    def f_(ind, model):
        if criteria == "Percentile":
            compare = PercentileCriterion(ind, criteria_param)
        elif criteria == "NonDom":
            compare = NonDomCriterion(criteria_param)
        elif criteria == "PercentileChanges":
            compare = PercentileChangesCriterion(ind, criteria_param)

        return MAPOFCEM(
            action_set,
            ind,
            model,
            estimate_outlier=estimate_outlier,
            max_changes=max_changes,
            compare = compare
        )
    return f_

In [7]:
def mapocam_wrapper(
    action_set,
    criteria,
    criteria_param,
    max_changes
):
    def f_(ind, model):
        if criteria == "Percentile":
            compare = PercentileCriterion(ind, criteria_param)
        elif criteria == "NonDom":
            compare = NonDomCriterion(criteria_param)
        elif criteria == "PercentileChanges":
            compare = PercentileChangesCriterion(ind, criteria_param)

        return MAPOCAM(
            action_set,
            ind,
            model,
            max_changes=max_changes,
            compare = compare
        )
    return f_

In [8]:
def bruteforce_wrapper(
        action_set,
        criteria,
        criteria_param,
        max_changes
):  
    def f_(ind, model):
        if criteria == "Percentile":
            compare = PercentileCriterion(ind, criteria_param)
        elif criteria == "NonDom":
            compare = NonDomCriterion(criteria_param)
        elif criteria == "PercentileChanges":
            compare = PercentileChangesCriterion(ind, criteria_param)

        return BruteForce(
            action_set,
            ind,
            model,
            max_changes=max_changes,
            compare = compare
        )
    return f_

In [14]:
def run_all_mapofcem_variations(
    dataset_name,
    action_set,
    individuals,
    model_shap, percCalc,
):
    # MAPOFCEM

    ## MPC

    mapofcem_setter = mapofcem_wrapper(
        action_set=action_set,
        criteria="Percentile",
        criteria_param=percCalc,
        estimate_outlier=True,
        max_changes=3
    )

    get_mapofcem_results(
        mapofcem_setter=mapofcem_setter,
        individuals=individuals,
        model=model_shap,
        output_file=f"../results/{dataset_name}/mapofcem_percentile.csv"
    )

    ## MPC x N Changes

    mapofcem_setter = mapofcem_wrapper(
        action_set=action_set,
        criteria="PercentileChanges",
        criteria_param=percCalc,
        estimate_outlier=True,
        max_changes=3
    )

    get_mapofcem_results(
        mapofcem_setter=mapofcem_setter,
        individuals=individuals,
        model=model_shap,
        output_file=f"../results/{dataset_name}/mapofcem_percentilechanges.csv"
    )

    # MAPOFCEM v2

    ## MPC

    mapofcem_setter = mapofcem_wrapper(
        action_set=action_set,
        criteria="Percentile",
        criteria_param=percCalc,
        estimate_outlier=False,
        max_changes=3
    )

    get_mapofcem_results(
        mapofcem_setter=mapofcem_setter,
        individuals=individuals,
        model=model_shap,
        output_file=f"../results/{dataset_name}/mapofcem_v2_percentile.csv"
    )

    ## MPC x N Changes 

    mapofcem_setter = mapofcem_wrapper(
        action_set=action_set,
        criteria="PercentileChanges",
        criteria_param=percCalc,
        estimate_outlier=False,
        max_changes=3
    )

    get_mapofcem_results(
        mapofcem_setter=mapofcem_setter,
        individuals=individuals,
        model=model_shap,
        output_file=f"../results/{dataset_name}/mapofcem_v2_percentilechanges.csv"
    )

## German

In [10]:
X_train, Y_train, model, outlier_detection, individuals = get_data_model("german")

In [11]:
action_set = ActionSet(X = X_train, default_step_size = 0.1)
not_mutable_features = ['Age', 'OwnsHouse', 'isMale', 'JobClassIsSkilled', 'Single', 'ForeignWorker', 'RentsHouse']
mutable_features = [feat for feat in X_train.columns if feat not in not_mutable_features]
continuous_features = ['Age', 'LoanDuration', 'LoanAmount', 'LoanRateAsPercentOfIncome','YearsAtCurrentHome']
for feat in action_set:
    if feat.name in not_mutable_features:
        feat.mutable = False
    if not feat.name in not_mutable_features:
        feat.mutable = True

    if feat.name == "LoanDuration":
        feat.step_type = "absolute"
        feat.step_size = 6

    if feat.name == "LoanAmount":
        feat.step_size = 0.1
    
    feat.step_direction = 0
    feat.update_grid()

In [12]:
percCalc = PercentileCalculator(action_set = action_set)
model_shap = GeneralClassifier_Shap(model, outlier_detection, X_train, tree = True, threshold = 0.5)

### MAPOFCEM

In [None]:
run_all_mapofcem_variations(
    dataset_name="german",
    action_set=action_set,
    individuals=individuals,
    model_shap=model_shap,
    percCalc=percCalc
)

### MAPOCAM

In [16]:
model_shap.use_predict_max = False

In [19]:
action_set_ = copy.deepcopy(action_set)

for feat in action_set_:
    # little fix
    feat.flip_direction = 1
feat.update_grid()

mapocam_setter = mapocam_wrapper(
    action_set=action_set_,
    criteria="Percentile",
    criteria_param=percCalc,
    max_changes=3
)

get_mapofcem_results(
    mapofcem_setter=mapocam_setter,
    individuals=individuals,
    model=model_shap,
    output_file=f"../results/german/mapocam_percentile.csv"
)

  0%|                                                                                                                  | 0/62 [00:00<?, ?it/s]

100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████| 62/62 [00:08<00:00,  7.36it/s]


### Bruteforce

In [28]:
action_set_ = copy.deepcopy(action_set)

for feat in action_set_:
    # little fix
    feat.flip_direction = 1
feat.update_grid()

bruteforce_setter = bruteforce_wrapper(
    action_set=action_set_,
    criteria="Percentile",
    criteria_param=percCalc,
    max_changes=3
)

get_bruteforce_results(
    bruteforce_setter=bruteforce_setter,
    individuals=individuals,
    model=model_shap,
    output_file=f"../results/german/bruteforce_percentiles.csv"
)

100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 62/62 [07:38<00:00,  7.39s/it]


### DICE

In [29]:
dice_model = dice_ml.Model(
    model = model,
    backend = "sklearn",
    model_type = "classifier"
)
X_train_extended = X_train.copy()
X_train_extended["target"] = Y_train
dice_data = dice_ml.Data(
    dataframe = X_train_extended,
    continuous_features = X_train.columns.tolist(),
    outcome_name = "target",
)


In [30]:
get_dice_results(
    dice_model = dice_model,
    dice_data = dice_data,
    individuals = individuals,
    mutable_features = mutable_features, 
    total_CFs=1,
    sparsity_weight=0.2,
    output_file=f"../results/german/dice_1sol.csv"
)

100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  4.85it/s]
100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  6.51it/s]
100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  6.79it/s]
100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  6.79it/s]
100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  6.69it/s]
100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  6.92it/s]
100%|███████████

## Taiwan

In [20]:
X_train, Y_train, model, outlier_detection, individuals = get_data_model("taiwan")
#convert bools to int
X_train = X_train.astype(int)
individuals = individuals.astype(int)
individuals = individuals.sample(100, random_state=0)

In [21]:
action_set = ActionSet(X = X_train, default_step_size = 0.1)
not_mutable_features = ['Single', 'Age_in_25_to_40', 'Married', 'Age_lt_25', 'Age_in_40_to_59', 'Age_geq_60', 'EducationLevel']
mutable_features = [feat for feat in X_train.columns if feat not in not_mutable_features]
for feat in action_set:
    if feat.name in not_mutable_features:
        feat.mutable = False
    if feat.name in mutable_features:
        feat.mutable = True

    # if feat.name in [
    #     "MaxBillAmountOverLast6Months", 
    #     "MaxPaymentAmountOverLast6Months", 
    #     "MostRecentBillAmount", 
    #     "MostRecentPaymentAmount"
    # ]:
    #     feat.step_size = 0.1

    feat.step_direction = 0
    feat.update_grid()

### MAPOFCEM

In [22]:
percCalc = PercentileCalculator(action_set = action_set)
model_shap = GeneralClassifier_Shap(model, outlier_detection, X_train, tree = True, threshold = 0.5)

In [23]:
run_all_mapofcem_variations(
    dataset_name="taiwan",
    action_set=action_set,
    individuals=individuals,
    model_shap=model_shap,
    percCalc=percCalc
)

  0%|                                                                                                                 | 0/100 [00:00<?, ?it/s]

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████| 100/100 [04:29<00:00,  2.69s/it]
100%|███████████████████████████████████████████████████████████████████████████████████████████████████████| 100/100 [07:39<00:00,  4.60s/it]
100%|███████████████████████████████████████████████████████████████████████████████████████████████████████| 100/100 [03:34<00:00,  2.15s/it]
100%|███████████████████████████████████████████████████████████████████████████████████████████████████████| 100/100 [06:16<00:00,  3.77s/it]


### MAPOCAM

In [24]:
model_shap.use_predict_max = False

In [25]:
action_set_ = copy.deepcopy(action_set)

for feat in action_set_:
    # little fix
    feat.flip_direction = 1
feat.update_grid()

mapocam_setter = mapocam_wrapper(
    action_set=action_set_,
    criteria="Percentile",
    criteria_param=percCalc,
    max_changes=3
)

get_mapofcem_results(
    mapofcem_setter=mapocam_setter,
    individuals=individuals,
    model=model_shap,
    output_file=f"../results/taiwan/mapocam_percentile.csv"
)

  0%|                                                                                                                 | 0/100 [00:00<?, ?it/s]

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████| 100/100 [02:41<00:00,  1.61s/it]


### Bruteforce

In [None]:
action_set_ = copy.deepcopy(action_set)

for feat in action_set_:
    # little fix
    feat.flip_direction = 1
feat.update_grid()

bruteforce_setter = bruteforce_wrapper(
    action_set=action_set_,
    criteria="Percentile",
    criteria_param=percCalc,
    max_changes=3
)

get_bruteforce_results(
    bruteforce_setter=bruteforce_setter,
    individuals=individuals,
    model=model_shap,
    output_file=f"../results/taiwan/bruteforce_percentile.csv"
)

### DICE

In [48]:
dice_model = dice_ml.Model(
    model = model,
    backend = "sklearn",
    model_type = "classifier"
)
X_train_extended = X_train.copy()
X_train_extended["target"] = Y_train
dice_data = dice_ml.Data(
    dataframe = X_train_extended,
    continuous_features = X_train.columns.tolist(),#continuous_features,
    outcome_name = "target",
)

get_dice_results(
    dice_model = dice_model,
    dice_data = dice_data,
    individuals = individuals,
    mutable_features = mutable_features, 
    total_CFs=1,
    sparsity_weight=0.2,
    output_file=f"../results/taiwan/dice_1sol.csv"
)

  0%|                                                                                                                                      | 0/100 [00:00<?, ?it/s]

100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  4.06it/s]
100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  4.25it/s]
100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  4.26it/s]
100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  4.49it/s]
100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  4.56it/s]
100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  4.55it/s]
100%|███████████