# Check that MRMC and DICE can run reproducibly

In [2]:
import sys
import os
sys.path.append(os.path.join(os.getcwd(), '..'))


%load_ext autoreload
%autoreload 2


from typing import Sequence

import numpy as np
import pandas as pd 

from recourse_methods import mrmc_method, dice_method
from core import recourse_iterator, utils
from data.adapters import continuous_adapter
from data import data_loader
from models import model_loader, model_constants

In [3]:
DATASET, DATASET_INFO = data_loader.load_data(data_loader.DatasetName.CREDIT_CARD_DEFAULT)
MODEL = model_loader.load_model(
    model_constants.ModelType.LOGISTIC_REGRESSION,
    data_loader.DatasetName.CREDIT_CARD_DEFAULT)

https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


### Check MRMC reproducibility

In [6]:
def iterate_mrmc_recourse(random_seed: int) -> pd.DataFrame:
    """Iterate an MRMC path using a given random seed."""
    rng = np.random.default_rng(random_seed)
    poi_seed, adapter_seed, mrmc_seed = rng.integers(0, 100000, size=3)
    
    adapter = continuous_adapter.StandardizingAdapter(
        label_column=DATASET_INFO.label_column,
        perturb_ratio=0.5,
        positive_label=DATASET_INFO.positive_label,
        random_seed=adapter_seed,
    ).fit(DATASET)
    poi = utils.random_poi(
        DATASET,
        DATASET_INFO.label_column,
        adapter.negative_label,
        model=MODEL,
        random_seed=poi_seed)
    mrmc = mrmc_method.MRMC(
        k_directions=2,
        adapter=adapter,
        dataset=DATASET,
        rescale_direction=mrmc_method.get_constant_step_size_rescaler(0.5),
        confidence_threshold=0.8,
        model=MODEL,
        random_seed=mrmc_seed
    )
    iterator = recourse_iterator.RecourseIterator(
        mrmc,
        adapter,
        certainty_cutoff=0.8,
        model=MODEL
    )
    paths = iterator.iterate_k_recourse_paths(poi, 10)
    return paths

def check_paths_are_equal(paths1: Sequence[pd.DataFrame], paths2: Sequence[pd.DataFrame]):
    """Given to path sets, check that they are identical.
    
    The path sets should contain identical paths in the identical order.
    """
    for path1, path2 in zip(paths1, paths2):
        np.testing.assert_equal(path1.to_numpy(), path2.to_numpy())

    return True

The two path sets generated below with the same seed should be identical.

In [8]:
mrmc_path_1 = iterate_mrmc_recourse(19293)
mrmc_path_2 = iterate_mrmc_recourse(19293)

if check_paths_are_equal(mrmc_path_1, mrmc_path_2):
    print("MRMC is reproducible")

MRMC is reproducible


### Check DICE reproducibility

In [11]:
def iterate_dice_recourse(random_seed: int) -> Sequence[pd.DataFrame]:
    """Iterate DICE paths using a given random seed."""
    rng = np.random.default_rng(random_seed)
    poi_seed, adapter_seed, dice_seed = rng.integers(0, 100000, size=3)
    
    adapter = continuous_adapter.StandardizingAdapter(
        label_column=DATASET_INFO.label_column,
        perturb_ratio=0.5,
        positive_label=DATASET_INFO.positive_label,
        random_seed=adapter_seed,
    ).fit(DATASET)
    poi = utils.random_poi(
        DATASET,
        DATASET_INFO.label_column,
        adapter.negative_label,
        model=MODEL,
        random_seed=poi_seed)
    dice = dice_method.DiCE(
        k_directions=2,
        adapter=adapter,
        dataset=DATASET,
        continuous_features=DATASET_INFO.continuous_features,
        model=MODEL,
        desired_confidence=0.8,
        random_seed=dice_seed,
    )
    iterator = recourse_iterator.RecourseIterator(
        dice,
        adapter,
        certainty_cutoff=0.8,
        model=MODEL
    )
    paths = iterator.iterate_k_recourse_paths(poi, 3)
    return paths

The two path sets generated below with the same seed should be identical

In [12]:
dice_path_1 = iterate_dice_recourse(19293)
dice_path_2 = iterate_dice_recourse(19293)

if check_paths_are_equal(dice_path_1, dice_path_2):
    print("DICE is reproducible.")

100%|██████████| 1/1 [00:01<00:00,  1.17s/it]
100%|██████████| 1/1 [00:00<00:00,  1.10it/s]
100%|██████████| 1/1 [00:00<00:00,  1.06it/s]
100%|██████████| 1/1 [00:01<00:00,  1.06s/it]
100%|██████████| 1/1 [00:01<00:00,  1.39s/it]
100%|██████████| 1/1 [00:01<00:00,  1.80s/it]
100%|██████████| 1/1 [00:01<00:00,  1.28s/it]
100%|██████████| 1/1 [00:01<00:00,  1.07s/it]
100%|██████████| 1/1 [00:00<00:00,  1.13it/s]
100%|██████████| 1/1 [00:01<00:00,  1.15s/it]


DICE is reproducible.
