In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
import joblib
import time
import sys
sys.path.append("../")

from cfmining.algorithms import MAPOFCEM
from cfmining.criteria import PercentileCalculator, PercentileCriterion, NonDomCriterion
from cfmining.predictors import GeneralClassifier_Shap
from cfmining.action_set import ActionSet

import dice_ml


%load_ext autoreload
%autoreload 2

IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html


In [2]:
def get_mapofcem_results(action_set, individuals, model, outlier_detection, criteria_type = "Percentile", criteria = None, percCalc = None):
    results = []

    for i in range(len(individuals)):
        individual = individuals.iloc[i]
        start = time.time()
        if criteria_type == "Percentile":
            criteria = PercentileCriterion(individual, percCalc)
        
        start = time.time()
        mapofcem = MAPOFCEM(
            action_set, 
            individual.values, 
            model, 
            outlier_detection=outlier_detection,
            estimate_outlier=True,
            max_changes=3, 
            compare = criteria
        )
        mapofcem.fit()
        end = time.time()

        results.append({
            "individual" : i,
            "time" : end - start,
            "n_solutions" : len(mapofcem.solutions),
            "solutions" : mapofcem.solutions,
            "cost" : criteria.f(mapofcem.solutions[0]) if len(mapofcem.solutions) > 0 else np.inf
        })

    return results

In [3]:
def get_dice_results(
    dice_model,
    dice_data,
    individuals,
    mutable_features,
    outlier_detection,
    criteria_type = "Percentile",
    criteria = None,
    percCalc = None,
):
    exp = dice_ml.Dice(dice_data, dice_model)
    results = []
    
    for i in range(len(individuals)):
        individual = individuals.iloc[[i]]
        start = time.time()
        if criteria_type == "Percentile":
            criteria = PercentileCriterion(individual, percCalc)
        
        start = time.time()
        dice_exp = exp.generate_counterfactuals(
            individual,
            total_CFS = 3,
            desired_class = "opposite",
        )
        end = time.time()

        # convert dice solutions to an adequated format
        solutions = json.loads(dice_exp.to_json())["cfs_list"][0]


        results.append({
            "individual" : i,
            "time" : end - start,
            "n_solutions" : len(solutions),
            "solutions" : solutions,
            "cost" : criteria.f(solutions[0]) if len(solutions) > 0 else np.inf
        })

    return results

## German

In [4]:
VAL_RATIO = 1/7
TEST_RATIO = 0.3
SEED = 0

df = pd.read_csv("../data/german.csv")
X = df.drop("GoodCustomer", axis=1)
Y = df["GoodCustomer"]

X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=TEST_RATIO, random_state=SEED, shuffle=True)
X_train, X_val, Y_train, Y_val = train_test_split(X_train, Y_train, test_size=VAL_RATIO, random_state=SEED, shuffle=True)

model = joblib.load("../models/german/LGBMClassifier.pkl")
isolation_tree = joblib.load("../models/german/IsolationForest.pkl")

denied_individ = model.predict(X_test) == 0
individuals = X_test.iloc[denied_individ].reset_index(drop = True)

In [5]:
action_set = ActionSet(X = X_train)
not_mutable_features = ['Age', 'JobClassIsSkilled', 'OwnsHouse', 'isMale', 'JobClassIsSkilled', 'Single', 'ForeignWorker', 'RentsHouse']
mutable_features = [feat for feat in X_train.columns if feat not in not_mutable_features]
continuous_features = ['Age', 'LoanDuration', 'LoanAmount', 'LoanRateAsPercentOfIncome','YearsAtCurrentHome']
for feat in action_set:
    if feat.name in not_mutable_features:
        feat.mutable = False
    if not feat.name in not_mutable_features:
        feat.mutable = True

    feat.step_direction = 0
    feat.update_grid()

In [6]:
dice_model = dice_ml.Model(
    model = model,
    backend = "sklearn",
    model_type = "classifier"
)
X_train_extended = X_train.copy()
X_train_extended["target"] = Y_train
dice_data = dice_ml.Data(
    dataframe = X_train_extended,
    continuous_features = X_train.columns.tolist(),#continuous_features,
    outcome_name = "target",
)


## mapofcem

In [7]:
percCalc = PercentileCalculator(action_set = action_set)
model_shap = GeneralClassifier_Shap(model, X_train, Y_train, tree = True)

In [8]:
results_mapofcem = get_mapofcem_results(action_set, individuals, model_shap, isolation_tree, criteria_type = "Percentile", criteria = None, percCalc = percCalc)

## dice

In [None]:
results_dice = get_dice_results(
    dice_model,
    dice_data,
    individuals,
    mutable_features,
    outlier_detection = isolation_tree,
    criteria_type = "Percentile",
    criteria = None,
    percCalc = percCalc,
)
