In [128]:
import sys
import os
sys.path.append(os.path.join(os.getcwd(), '..'))

%load_ext autoreload
%autoreload 2

import numpy as np
import matplotlib.pyplot as plt
from sklearn import linear_model
from recourse_methods import mrmc_method, dice_method
from core import recourse_iterator
from data import data_loader
from data.adapters import continuous_adapter
from core import utils
from models import model_interface, model_loader, model_constants
from visualize.two_d_plots import Display2DPaths

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [109]:
dataset, dataset_info = data_loader.load_data(data_loader.DatasetName.CREDIT_CARD_DEFAULT)
adapter = continuous_adapter.StandardizingAdapter(
    label_column=dataset_info.label_column,
    positive_label=dataset_info.positive_label
).fit(dataset)
model = model_loader.load_model(model_constants.ModelType.LOGISTIC_REGRESSION, data_loader.DatasetName.CREDIT_CARD_DEFAULT)

In [110]:
neg_dataset = dataset[model.predict_pos_proba(dataset) < 0.4]
POI = utils.random_poi(neg_dataset, dataset_info.label_column, adapter.negative_label)

In [135]:
def get_path_length(path):
    length = 0
    for i in range(len(path)-1):
        dist = path.iloc[i+1].to_numpy() - path.iloc[i].to_numpy()
        length += np.linalg.norm(dist)
    return length

### With the same random seed, the output is always the same for the default method.
It ignores the "weight" arguments

In [140]:
dice = dice_method.DiCE(
    k_directions = 3,
    adapter=adapter,
    dataset=dataset,
    continuous_features=dataset_info.continuous_features,
    model=model,
    desired_confidence=0.6,
    #dice_kwargs={
    #    "method": "genetic"
    #},
    dice_counterfactual_kwargs={
        'sparsity_weight': 0,
        'proximity_weight': 1,
        'diversity_weight': 0
    },
    random_seed=10342
)

iterator = recourse_iterator.RecourseIterator(
    recourse_method=dice,
    adapter=adapter,
    certainty_cutoff=0.6,
    model=model
)

paths = iterator.iterate_k_recourse_paths(POI, 4, 3)

for path in paths:
    certainty = model.predict_pos_proba_series(path.iloc[-1])
    pl = get_path_length(path)
    print(f"certainty {certainty}\t\tpath length {pl}")

RANDOMSEED


100%|██████████| 1/1 [00:00<00:00,  1.41it/s]


certainty 0.9936619657289182		path length 613214.0
certainty 0.9970955614604449		path length 979548.9351992579
certainty 0.7987380677936876		path length 319566.0


In [142]:
dice = dice_method.DiCE(
    k_directions = 3,
    adapter=adapter,
    dataset=dataset,
    continuous_features=dataset_info.continuous_features,
    model=model,
    desired_confidence=0.6,
    #dice_kwargs={
    #    "method": "genetic"
    #},
    dice_counterfactual_kwargs={
        'sparsity_weight': 1,
        'proximity_weight': 0,
        'diversity_weight': 0
    },
    random_seed=10342
)

iterator = recourse_iterator.RecourseIterator(
    recourse_method=dice,
    adapter=adapter,
    certainty_cutoff=0.6,
    model=model
)

paths = iterator.iterate_k_recourse_paths(POI, 4, 3)

for path in paths:
    certainty = model.predict_pos_proba_series(path.iloc[-1])
    pl = get_path_length(path)
    print(f"certainty {certainty}\t\tpath length {pl}")

RANDOMSEED


100%|██████████| 1/1 [00:00<00:00,  1.39it/s]


certainty 0.9936619657289182		path length 613214.0
certainty 0.9970955614604449		path length 979548.9351992579
certainty 0.7987380677936876		path length 319566.0


In [144]:
dice = dice_method.DiCE(
    k_directions = 3,
    adapter=adapter,
    dataset=dataset,
    continuous_features=dataset_info.continuous_features,
    model=model,
    desired_confidence=0.6,
    dice_kwargs={
        "method": "genetic"
    },
    dice_counterfactual_kwargs={
        'sparsity_weight': 1,
        'proximity_weight': 0,
        'diversity_weight': 0,
        'random_seed': 849545
    },
    random_seed=102394
)

iterator = recourse_iterator.RecourseIterator(
    recourse_method=dice,
    adapter=adapter,
    certainty_cutoff=0.6,
    model=model
)

paths = iterator.iterate_k_recourse_paths(POI, 4, 3)

for path in paths:
    certainty = model.predict_pos_proba_series(path.iloc[-1])
    pl = get_path_length(path)
    print(f"certainty {certainty}\t\tpath length {pl}")

100%|██████████| 1/1 [00:00<00:00,  1.15it/s]


certainty 0.06770660513587982		path length 0
certainty 0.6167896335248004		path length 192453.22412472076
certainty 0.7173644904830861		path length 280398.05868621846
