In [153]:
import os
import random
import sys

import joblib
import numpy as np

from sklearn.preprocessing import LabelEncoder
le=LabelEncoder()
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.neural_network import MLPClassifier

from Phemus.Dataset import Dataset
from Phemus.Aequitas_Fully_Directed_Sklearn import Fully_Direct
from scipy.optimize import basinhopping
from Phemus.mpFully_Direct import mp_basinhopping
# import DiCE
import dice_ml
from dice_ml.utils import helpers  # helper functions
from sdv.metadata import SingleTableMetadata
from sdv.single_table import CopulaGANSynthesizer, CTGANSynthesizer
from utils.config import bank, census, credit

In [151]:
bank.input_bounds

[[1, 9],
 [0, 11],
 [0, 2],
 [0, 3],
 [0, 1],
 [-20, 179],
 [0, 1],
 [0, 1],
 [0, 2],
 [1, 31],
 [0, 11],
 [0, 99],
 [1, 63],
 [-1, 39],
 [0, 1],
 [0, 3]]

In [154]:
# Get the current working directory
current_path = os.getcwd()

# Add the directory to the PATH environment variable
os.environ['PATH'] = f"{current_path};" + os.environ['PATH']

# Add the directory to the Python sys.path list
sys.path.append(current_path)
current_path

'C:\\Users\\hussaini_21000736\\OneDrive - Universiti Teknologi PETRONAS\\PhD Dissertation\\Experiments\\Fairness\\Phemus'

In [155]:
def generate_sklearn_classifier(dataset: Dataset, output_pkl_dir):
    input_csv_dir = dataset.dataset_dir
    col_to_be_predicted = dataset.col_to_be_predicted
    model_type = dataset.model_type

    df = pd.read_csv(input_csv_dir)
    cat_feature = list(df.columns)

    for col in cat_feature:
        df[col] = le.fit_transform(df[col])

    X = df.drop([col_to_be_predicted], axis=1)
    y = df[col_to_be_predicted]

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=12)

    if model_type == "DecisionTree":
        model = DecisionTreeClassifier(random_state=42, criterion='entropy', splitter='random')
        model_name = 'DecisionTreeClassifier'
    elif model_type == "MLPC":
        model = MLPClassifier(solver='lbfgs', alpha=1e-5, hidden_layer_sizes=(7, 5), random_state=1)
        model_name = 'MLPClassifier'
    elif model_type == "SVM":
        model = SVC(gamma=0.0025)
        model_name = 'SVC'
    elif model_type == "RandomForest":
        model = RandomForestClassifier(n_estimators=10)
        model_name = 'RandomForestClassifier'
    else:
        error_message = 'The chosen types of model is not supported yet. Please choose from one of the following: \
                            DecisionTree, MLPC, SVM and RandomForest'
        raise ValueError(error_message)

    model.fit(X_train, y_train)
    pred = model.predict(X_test)

    scores = []
    scores.append({
        'model': model_name,
        'score': model.score(X_test, y_test),
        'f1_score': f1_score(y_test, pred)
    })

    model.score(X_test, y_test)

    joblib.dump(model, output_pkl_dir)

In [81]:
num_params = 8
sensitive_param_idx = 5  # Starts at O
sensitive_param_name = "Gender"
col_to_be_predicted = "LeaveOrNot"
dataset_dir = f"{current_path}\dataset\Employee.csv"
model_type = "DecisionTree"


dataset = Dataset(num_params=num_params, sensitive_param_idx=sensitive_param_idx, \
                  model_type=model_type, sensitive_param_name=sensitive_param_name, \
                  col_to_be_predicted=col_to_be_predicted, dataset_dir=dataset_dir)

pkl_dir = 'Employee_DecisionTree_Original.pkl'
improved_pkl_dir = 'Employee_DecisionTree_Original_Improved.pkl'
retrain_csv_dir = 'Employee_Retraining_Dataset.csv'
plot_dir = 'Employee_Fairness_Plot.png'

perturbation_unit = 1

num_trials = 1000
samples = 100
global_iteration_limit = 1000  # needs to be at least 1000 to be effective
local_iteration_limit = 100
threshold = 0

retrain_csv_dir = 'Employee_Retraining_Dataset.csv'

# num_params = 8
# sensitive_param_idx = 5  # Starts at O
# sensitive_param_name = "Gender"
# col_to_be_predicted = "LeaveOrNot"
# dataset_dir = f"{current_path}\dataset\Employee.csv"
# model_type = "DecisionTree"


# dataset = Dataset(num_params=num_params, sensitive_param_idx=sensitive_param_idx, \
#                   model_type=model_type, sensitive_param_name=sensitive_param_name, \
#                   col_to_be_predicted=col_to_be_predicted, dataset_dir=dataset_dir)

# pkl_dir = 'Employee_DecisionTree_Original.pkl'
# improved_pkl_dir = 'Employee_DecisionTree_Original_Improved.pkl'
# retrain_csv_dir = 'Employee_Retraining_Dataset.csv'
# plot_dir = 'Employee_Fairness_Plot.png'

# perturbation_unit = 1

# num_trials = 1000
# samples = 100
# global_iteration_limit = 1000  # needs to be at least 1000 to be effective
# local_iteration_limit = 100
# threshold = 0

# retrain_csv_dir = 'Employee_Retraining_Dataset.csv'

In [5]:
import pandas as pd
generate_sklearn_classifier(dataset, pkl_dir)

In [6]:
def get_features_range(df):
    columns = df.columns
    features = {}
    for c in columns:
        min_value = df[c].min()
        max_value = df[c].max()
        values = list(range(min_value, max_value+1))
        features[c] = values
    return features
# dfc = pd.read_csv(dataset_dir)
# featureget_features_range(dfc)

In [18]:
import json

def aequitas_fully_directed_sklearn(dataset: Dataset, perturbation_unit, threshold, global_iteration_limit, \
                                    local_iteration_limit, input_pkl_dir, retrain_csv_dir):
    print("Aequitas Fully Directed Started...\n")
    initial_input = [random.randint(low, high) for [low, high] in dataset.input_bounds]
    minimizer = {"method": "L-BFGS-B"}

    fully_direct = Fully_Direct(dataset, perturbation_unit, threshold, global_iteration_limit, \
                                local_iteration_limit, input_pkl_dir, retrain_csv_dir)

    basinhopping(fully_direct.evaluate_global, initial_input, stepsize=1.0, take_step=fully_direct.global_discovery,
                 minimizer_kwargs=minimizer,
                 niter=global_iteration_limit)

    df = pd.DataFrame(fully_direct.global_disc_inputs_list, columns=list(dataset.column_names))
    
    print('Global discrimination:', len(df))

    df.to_csv('global_discrimination.csv', header=list(dataset.column_names), index=False)

    print("Finished Global Search")
    print("Percentage discriminatory inputs - " + str(float(len(fully_direct.global_disc_inputs_list)
                                                            + len(fully_direct.local_disc_inputs_list)) / float(
        len(fully_direct.tot_inputs)) * 100))

In [19]:
aequitas_fully_directed_sklearn(
    dataset,
    perturbation_unit,
    threshold,
    global_iteration_limit,
    local_iteration_limit,
    pkl_dir,
    improved_pkl_dir)

Aequitas Fully Directed Started...

Global discrimination: 206
Finished Global Search
Percentage discriminatory inputs - 39.163498098859314


In [20]:
df = pd.read_csv('global_discrimination.csv')

In [37]:
model = joblib.load(pkl_dir)

m = dice_ml.Model(model=model, backend="sklearn")
DiCE explanation instance
exp = dice_ml.Dice(d,m, method="random")

count = 0
for count in range(5):
    data_point = df.iloc[count]
    data = np.array(data_point)[:-1]
    print(model.predict(data.reshape(1, -1)))
    count += count

[0]
[0]
[0]
[0]
[1]


In [82]:
m = dice_ml.Model(model=model, backend="sklearn")
X_train_all = pd.read_csv(dataset.dataset_dir)
data = pd.DataFrame(np.array(df), columns=X_train_all.columns)
X = data.drop(col_to_be_predicted, axis=1)

#     d = dice_ml.Data(dataframe=X_train_all,
#                      continuous_features=[],
#                     outcome_name='LeaveOrNot')
# Pre-trained ML model
model = joblib.load(pkl_dir)
features = get_features_range(X_train_all.drop(col_to_be_predicted, axis=1))
print(features)
d = dice_ml.data.Data(features=features, continuous_features=[], outcome_name=col_to_be_predicted)
m = dice_ml.Model(model=model, backend="sklearn")
# DiCE explanation instance
exp = dice_ml.Dice(d,m, method="random")

{'Education': [0, 1, 2], 'JoiningYear': [0, 1, 2, 3, 4, 5, 6], 'City': [0, 1, 2], 'PaymentTier': [0, 1, 2], 'Age': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19], 'Gender': [-1, 0, 1], 'EverBenched': [0, 1], 'ExperienceInCurrentDomain': [0, 1, 2, 3, 4, 5, 6, 7]}


In [66]:
exp_dataset = df.drop('LeaveOrNot', axis=1)
dice_exp = exp.generate_counterfactuals(dataset.iloc[0:1], total_CFs=1000, desired_class=f'opposite')

100%|██████████| 1/1 [00:00<00:00,  6.67it/s]

Only 747 (required 1000)  Diverse Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec





In [141]:
import json
col_to_be_predicted_idx = 8

exp_dataset = df.drop('LeaveOrNot', axis=1)
local_disc_inputs = []
total_cfs = []
print(sensitive_param_idx)

for count in range(len(exp_dataset)):
    data_point = exp_dataset.iloc[count:count+1]
    data = np.array(data_point)
    prediction = model.predict(data.reshape(1, -1))[0]
    
    dice_exp = exp.generate_counterfactuals(data_point, total_CFs=1000, desired_class=int(prediction))
    counterfactuals = json.loads(dice_exp.to_json())['cfs_list']
    for cfl in counterfactuals[0]:
        total_cfs.append(cfl)
        
        inp0 = [int(i) for i in cfl]
        sensValue = inp0[sensitive_param_idx]

        inp0np = np.asarray(inp0)
        inp0np = np.reshape(inp0, (1, -1))

        inp0 = np.asarray(inp0)
        inp0 = np.reshape(inp0, (1, -1))
        
        # drop y column here 
        inp1delY = np.delete(inp0, [col_to_be_predicted_idx])
        inp1delY = np.reshape(inp1delY, (1, -1))
        
        out0 = model.predict(inp1delY)
        
        # Loops through all values of the sensitive parameter
        
        
        for i in [-1, 0, 1]:
            if sensValue != i: 
                inp1 = [int(k) for k in cfl]
                inp1[sensitive_param_idx] = i

                inp1 = np.asarray(inp1)
                inp1 = np.reshape(inp1, (1, -1))

                # drop y column here 
                inp1delY = np.delete(inp1, [col_to_be_predicted_idx])
                inp1delY = np.reshape(inp1delY, (1, -1))

                out1 = model.predict(inp1delY)

                if abs(out0 - out1) > 0:
                    local_disc_inputs.append(inp0.tolist()[0])

5


100%|██████████| 1/1 [00:00<00:00,  7.06it/s]

Only 334 (required 1000)  Diverse Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec



100%|██████████| 1/1 [00:00<00:00,  6.55it/s]


Only 454 (required 1000)  Diverse Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  7.03it/s]


Only 467 (required 1000)  Diverse Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  7.30it/s]


Only 328 (required 1000)  Diverse Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  6.32it/s]


Only 331 (required 1000)  Diverse Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  6.68it/s]


Only 601 (required 1000)  Diverse Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  6.43it/s]


Only 403 (required 1000)  Diverse Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  6.53it/s]


Only 323 (required 1000)  Diverse Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  6.90it/s]


Only 274 (required 1000)  Diverse Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  6.29it/s]


Only 288 (required 1000)  Diverse Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  6.21it/s]


Only 430 (required 1000)  Diverse Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  6.88it/s]


Only 398 (required 1000)  Diverse Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  6.36it/s]


Only 559 (required 1000)  Diverse Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  6.68it/s]


Only 525 (required 1000)  Diverse Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  6.71it/s]


Only 519 (required 1000)  Diverse Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  6.90it/s]


Only 353 (required 1000)  Diverse Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  6.61it/s]


Only 496 (required 1000)  Diverse Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  6.99it/s]


Only 317 (required 1000)  Diverse Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  6.49it/s]


Only 358 (required 1000)  Diverse Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  6.83it/s]


Only 531 (required 1000)  Diverse Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  7.59it/s]


Only 345 (required 1000)  Diverse Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  6.67it/s]


Only 271 (required 1000)  Diverse Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  6.38it/s]


Only 431 (required 1000)  Diverse Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  7.41it/s]


Only 329 (required 1000)  Diverse Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  7.43it/s]

Only 303 (required 1000)  Diverse Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  7.43it/s]
100%|██████████| 1/1 [00:00<00:00,  7.16it/s]


Only 538 (required 1000)  Diverse Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  7.40it/s]


Only 606 (required 1000)  Diverse Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  7.31it/s]


Only 241 (required 1000)  Diverse Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  7.26it/s]

Only 395 (required 1000)  Diverse Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  7.26it/s]
100%|██████████| 1/1 [00:00<00:00,  7.49it/s]


Only 247 (required 1000)  Diverse Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  7.14it/s]

Only 573 (required 1000)  Diverse Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  7.09it/s]
100%|██████████| 1/1 [00:00<00:00,  7.20it/s]


Only 190 (required 1000)  Diverse Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  7.11it/s]

Only 400 (required 1000)  Diverse Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  7.06it/s]
100%|██████████| 1/1 [00:00<00:00,  7.60it/s]


Only 394 (required 1000)  Diverse Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  7.35it/s]


Only 202 (required 1000)  Diverse Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  7.30it/s]

Only 457 (required 1000)  Diverse Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  7.25it/s]
100%|██████████| 1/1 [00:00<00:00,  6.99it/s]


Only 574 (required 1000)  Diverse Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  7.56it/s]


Only 334 (required 1000)  Diverse Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  7.36it/s]


Only 389 (required 1000)  Diverse Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  7.37it/s]


Only 434 (required 1000)  Diverse Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  7.30it/s]


Only 606 (required 1000)  Diverse Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  7.16it/s]


Only 533 (required 1000)  Diverse Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  7.60it/s]


Only 312 (required 1000)  Diverse Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  7.10it/s]


Only 563 (required 1000)  Diverse Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  7.06it/s]


Only 297 (required 1000)  Diverse Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  7.02it/s]


Only 323 (required 1000)  Diverse Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  7.12it/s]


Only 606 (required 1000)  Diverse Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  7.65it/s]


Only 434 (required 1000)  Diverse Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  7.31it/s]


Only 627 (required 1000)  Diverse Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  7.27it/s]


Only 336 (required 1000)  Diverse Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  7.61it/s]


Only 523 (required 1000)  Diverse Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  7.70it/s]


Only 383 (required 1000)  Diverse Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  7.37it/s]


Only 392 (required 1000)  Diverse Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  6.68it/s]


Only 711 (required 1000)  Diverse Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  7.79it/s]


Only 235 (required 1000)  Diverse Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  7.19it/s]

Only 534 (required 1000)  Diverse Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  7.19it/s]
100%|██████████| 1/1 [00:00<00:00,  6.94it/s]


Only 371 (required 1000)  Diverse Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  7.81it/s]


Only 110 (required 1000)  Diverse Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  7.29it/s]


Only 332 (required 1000)  Diverse Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  7.07it/s]


Only 199 (required 1000)  Diverse Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  7.35it/s]

Only 295 (required 1000)  Diverse Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  7.29it/s]
100%|██████████| 1/1 [00:00<00:00,  6.84it/s]


Only 205 (required 1000)  Diverse Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  7.54it/s]

Only 685 (required 1000)  Diverse Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  7.54it/s]
100%|██████████| 1/1 [00:00<00:00,  7.30it/s]


Only 455 (required 1000)  Diverse Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  7.15it/s]


Only 535 (required 1000)  Diverse Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  7.16it/s]


Only 585 (required 1000)  Diverse Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  7.48it/s]


Only 412 (required 1000)  Diverse Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  7.27it/s]


Only 521 (required 1000)  Diverse Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  7.11it/s]


Only 338 (required 1000)  Diverse Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  6.92it/s]


Only 513 (required 1000)  Diverse Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  6.33it/s]


Only 577 (required 1000)  Diverse Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  6.66it/s]


Only 372 (required 1000)  Diverse Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  7.08it/s]


Only 238 (required 1000)  Diverse Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  6.53it/s]


Only 599 (required 1000)  Diverse Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  6.41it/s]


Only 479 (required 1000)  Diverse Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  6.55it/s]


Only 433 (required 1000)  Diverse Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  6.94it/s]


Only 646 (required 1000)  Diverse Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  6.80it/s]


Only 256 (required 1000)  Diverse Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  6.80it/s]


Only 298 (required 1000)  Diverse Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  7.25it/s]

Only 369 (required 1000)  Diverse Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  7.20it/s]
100%|██████████| 1/1 [00:00<00:00,  7.33it/s]


Only 316 (required 1000)  Diverse Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  7.19it/s]


Only 301 (required 1000)  Diverse Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  7.40it/s]


Only 336 (required 1000)  Diverse Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  6.54it/s]


Only 333 (required 1000)  Diverse Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  6.67it/s]


Only 318 (required 1000)  Diverse Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  7.34it/s]

Only 478 (required 1000)  Diverse Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  7.29it/s]
100%|██████████| 1/1 [00:00<00:00,  7.40it/s]


Only 253 (required 1000)  Diverse Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  7.71it/s]

Only 150 (required 1000)  Diverse Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  7.64it/s]
100%|██████████| 1/1 [00:00<00:00,  7.70it/s]


Only 281 (required 1000)  Diverse Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  6.76it/s]


Only 433 (required 1000)  Diverse Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  7.16it/s]


Only 367 (required 1000)  Diverse Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  6.81it/s]


Only 398 (required 1000)  Diverse Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  6.80it/s]


Only 464 (required 1000)  Diverse Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  7.21it/s]


Only 203 (required 1000)  Diverse Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  7.53it/s]

Only 293 (required 1000)  Diverse Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  7.53it/s]
100%|██████████| 1/1 [00:00<00:00,  7.06it/s]


Only 211 (required 1000)  Diverse Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  7.04it/s]

Only 554 (required 1000)  Diverse Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  6.99it/s]
100%|██████████| 1/1 [00:00<00:00,  7.28it/s]


Only 378 (required 1000)  Diverse Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  7.42it/s]


Only 385 (required 1000)  Diverse Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  7.28it/s]


Only 250 (required 1000)  Diverse Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  3.62it/s]


Only 237 (required 1000)  Diverse Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  7.22it/s]

Only 640 (required 1000)  Diverse Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  7.16it/s]
100%|██████████| 1/1 [00:00<00:00,  6.95it/s]


Only 328 (required 1000)  Diverse Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  7.36it/s]


Only 440 (required 1000)  Diverse Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  6.90it/s]


Only 291 (required 1000)  Diverse Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  7.28it/s]

Only 298 (required 1000)  Diverse Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  7.23it/s]
100%|██████████| 1/1 [00:00<00:00,  7.65it/s]

Only 468 (required 1000)  Diverse Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  7.65it/s]
100%|██████████| 1/1 [00:00<00:00,  7.42it/s]


Only 535 (required 1000)  Diverse Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  7.42it/s]


Only 698 (required 1000)  Diverse Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  6.98it/s]


Only 379 (required 1000)  Diverse Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  6.92it/s]


Only 361 (required 1000)  Diverse Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  7.10it/s]


Only 244 (required 1000)  Diverse Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  7.53it/s]


Only 440 (required 1000)  Diverse Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  7.28it/s]


Only 569 (required 1000)  Diverse Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  6.88it/s]


Only 316 (required 1000)  Diverse Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  5.92it/s]


Only 485 (required 1000)  Diverse Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  6.42it/s]


Only 237 (required 1000)  Diverse Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  7.56it/s]


Only 244 (required 1000)  Diverse Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  7.06it/s]


Only 225 (required 1000)  Diverse Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  6.83it/s]

Only 491 (required 1000)  Diverse Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  6.78it/s]
100%|██████████| 1/1 [00:00<00:00,  7.06it/s]


Only 422 (required 1000)  Diverse Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  7.11it/s]


Only 380 (required 1000)  Diverse Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  7.05it/s]


Only 245 (required 1000)  Diverse Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  7.03it/s]


Only 531 (required 1000)  Diverse Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  6.63it/s]


Only 318 (required 1000)  Diverse Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  6.86it/s]


Only 238 (required 1000)  Diverse Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  6.72it/s]


Only 598 (required 1000)  Diverse Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  7.36it/s]


Only 212 (required 1000)  Diverse Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  7.07it/s]

Only 484 (required 1000)  Diverse Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  7.02it/s]
100%|██████████| 1/1 [00:00<00:00,  7.34it/s]


Only 212 (required 1000)  Diverse Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  7.46it/s]

Only 354 (required 1000)  Diverse Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  7.40it/s]
100%|██████████| 1/1 [00:00<00:00,  7.46it/s]


Only 655 (required 1000)  Diverse Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  7.62it/s]


Only 317 (required 1000)  Diverse Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  7.19it/s]


Only 258 (required 1000)  Diverse Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  6.80it/s]


Only 301 (required 1000)  Diverse Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  6.48it/s]


Only 540 (required 1000)  Diverse Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  7.01it/s]


Only 451 (required 1000)  Diverse Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  6.18it/s]


Only 218 (required 1000)  Diverse Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  6.42it/s]


Only 460 (required 1000)  Diverse Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  6.66it/s]


Only 410 (required 1000)  Diverse Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  6.97it/s]


Only 388 (required 1000)  Diverse Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  7.01it/s]


Only 414 (required 1000)  Diverse Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  7.02it/s]


Only 330 (required 1000)  Diverse Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  7.21it/s]


Only 183 (required 1000)  Diverse Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  7.41it/s]

Only 180 (required 1000)  Diverse Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  7.35it/s]
100%|██████████| 1/1 [00:00<00:00,  7.18it/s]

Only 162 (required 1000)  Diverse Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  7.18it/s]
100%|██████████| 1/1 [00:00<00:00,  7.44it/s]

Only 217 (required 1000)  Diverse Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec



100%|██████████| 1/1 [00:00<00:00,  7.27it/s]

Only 532 (required 1000)  Diverse Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  7.22it/s]
100%|██████████| 1/1 [00:00<00:00,  7.03it/s]


Only 380 (required 1000)  Diverse Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  7.15it/s]


Only 219 (required 1000)  Diverse Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  7.57it/s]

Only 241 (required 1000)  Diverse Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  7.52it/s]
100%|██████████| 1/1 [00:00<00:00,  7.34it/s]


Only 170 (required 1000)  Diverse Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  7.36it/s]

Only 223 (required 1000)  Diverse Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  7.31it/s]
100%|██████████| 1/1 [00:00<00:00,  7.25it/s]

Only 302 (required 1000)  Diverse Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  7.20it/s]
100%|██████████| 1/1 [00:00<00:00,  6.79it/s]


Only 588 (required 1000)  Diverse Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  6.98it/s]


Only 453 (required 1000)  Diverse Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  6.72it/s]


Only 919 (required 1000)  Diverse Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  7.13it/s]


Only 429 (required 1000)  Diverse Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  7.61it/s]


Only 339 (required 1000)  Diverse Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  7.35it/s]


Only 307 (required 1000)  Diverse Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  6.65it/s]


Only 525 (required 1000)  Diverse Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  7.09it/s]


Only 345 (required 1000)  Diverse Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  7.05it/s]


Only 455 (required 1000)  Diverse Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  7.54it/s]


Only 279 (required 1000)  Diverse Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  7.41it/s]


Only 486 (required 1000)  Diverse Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  7.29it/s]


Only 218 (required 1000)  Diverse Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  6.93it/s]

Only 716 (required 1000)  Diverse Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  6.89it/s]
100%|██████████| 1/1 [00:00<00:00,  7.41it/s]


Only 209 (required 1000)  Diverse Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  7.40it/s]

Only 173 (required 1000)  Diverse Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  7.35it/s]
100%|██████████| 1/1 [00:00<00:00,  7.24it/s]

Only 489 (required 1000)  Diverse Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  7.19it/s]
100%|██████████| 1/1 [00:00<00:00,  6.96it/s]


Only 586 (required 1000)  Diverse Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  7.20it/s]


Only 392 (required 1000)  Diverse Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  7.68it/s]


Only 284 (required 1000)  Diverse Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  6.71it/s]


Only 254 (required 1000)  Diverse Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  7.14it/s]

Only 237 (required 1000)  Diverse Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  7.09it/s]
100%|██████████| 1/1 [00:00<00:00,  7.71it/s]

Only 364 (required 1000)  Diverse Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  7.65it/s]
100%|██████████| 1/1 [00:00<00:00,  7.02it/s]


Only 676 (required 1000)  Diverse Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  7.03it/s]


Only 366 (required 1000)  Diverse Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  7.39it/s]


Only 305 (required 1000)  Diverse Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  7.46it/s]

Only 541 (required 1000)  Diverse Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  7.41it/s]
100%|██████████| 1/1 [00:00<00:00,  7.42it/s]


Only 639 (required 1000)  Diverse Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  7.00it/s]


Only 410 (required 1000)  Diverse Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  7.52it/s]


Only 207 (required 1000)  Diverse Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  7.02it/s]

Only 305 (required 1000)  Diverse Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  7.02it/s]
100%|██████████| 1/1 [00:00<00:00,  6.91it/s]


Only 279 (required 1000)  Diverse Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  7.47it/s]


Only 328 (required 1000)  Diverse Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  7.54it/s]


Only 456 (required 1000)  Diverse Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  7.43it/s]


Only 453 (required 1000)  Diverse Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  7.70it/s]


Only 455 (required 1000)  Diverse Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  6.97it/s]


Only 350 (required 1000)  Diverse Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  6.30it/s]


Only 654 (required 1000)  Diverse Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  6.67it/s]


Only 246 (required 1000)  Diverse Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  7.43it/s]

Only 237 (required 1000)  Diverse Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  7.38it/s]
100%|██████████| 1/1 [00:00<00:00,  7.35it/s]

Only 373 (required 1000)  Diverse Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  7.30it/s]
100%|██████████| 1/1 [00:00<00:00,  6.97it/s]


Only 590 (required 1000)  Diverse Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  7.13it/s]


Only 365 (required 1000)  Diverse Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  7.16it/s]


Only 364 (required 1000)  Diverse Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  7.41it/s]


Only 248 (required 1000)  Diverse Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  7.58it/s]

Only 352 (required 1000)  Diverse Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  7.53it/s]
100%|██████████| 1/1 [00:00<00:00,  6.82it/s]


Only 364 (required 1000)  Diverse Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  5.99it/s]


Only 237 (required 1000)  Diverse Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  5.33it/s]


Only 529 (required 1000)  Diverse Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  5.03it/s]


Only 527 (required 1000)  Diverse Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  6.23it/s]


Only 368 (required 1000)  Diverse Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  5.71it/s]


Only 212 (required 1000)  Diverse Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  5.89it/s]


Only 378 (required 1000)  Diverse Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


In [143]:
print(len(total_cfs), len(local_disc_inputs))

80214 42223


In [30]:
df

Unnamed: 0,Education,JoiningYear,City,PaymentTier,Age,Gender,EverBenched,ExperienceInCurrentDomain,LeaveOrNot
0,1,4,2,0,7,1,0,2,0
1,2,4,2,1,9,0,1,0,1
2,1,3,0,2,6,0,0,7,1
3,1,3,1,0,13,0,1,4,0
4,0,2,2,2,0,0,0,0,1
...,...,...,...,...,...,...,...,...,...
201,2,3,1,1,19,0,1,5,1
202,2,3,1,1,8,0,1,5,0
203,1,2,2,1,5,0,0,7,0
204,1,0,2,1,16,0,0,5,0


In [14]:
import json

def aequitas_fully_directed_sklearn(dataset: Dataset, perturbation_unit, threshold, global_iteration_limit, \
                                    local_iteration_limit, input_pkl_dir, retrain_csv_dir):
    print("Aequitas Fully Directed Started...\n")
    initial_input = [random.randint(low, high) for [low, high] in dataset.input_bounds]
    minimizer = {"method": "L-BFGS-B"}

    fully_direct = Fully_Direct(dataset, perturbation_unit, threshold, global_iteration_limit, \
                                local_iteration_limit, input_pkl_dir, retrain_csv_dir)

    basinhopping(fully_direct.evaluate_global, initial_input, stepsize=1.0, take_step=fully_direct.global_discovery,
                 minimizer_kwargs=minimizer,
                 niter=global_iteration_limit)

    df = pd.DataFrame(fully_direct.global_disc_inputs_list, columns=list(dataset.column_names))
    
    print('Global discrimination:', len(df))

    df.to_csv('global_discrimination.csv', header=list(dataset.column_names), index=False)

    print("Finished Global Search")
    print("Percentage discriminatory inputs - " + str(float(len(fully_direct.global_disc_inputs_list)
                                                            + len(fully_direct.local_disc_inputs_list)) / float(
        len(fully_direct.tot_inputs)) * 100))
    
#     print()
#     print("Starting Local Search")
    
    
#     metadata = SingleTableMetadata()
#     metadata.detect_from_dataframe(data=df)

#     ctgan = CTGANSynthesizer(metadata=metadata, batch_size=50,epochs=1000,verbose=False)
#     ctgan.fit(df)
#     ctgan.save('ctgan-employee-data-synth.pkl')
#     samples = ctgan.sample(500)
#     for s in np.array(samples):
#         fully_direct.evaluate_local(s)
    
#     print("Finished Local Search")
#     print('Local discrimination:', len(fully_direct.local_disc_inputs_list))
#     print("Percentage discriminatory inputs - " + str(float(len(fully_direct.global_disc_inputs_list)
#                                                             + len(fully_direct.local_disc_inputs_list)) 
#                                                       / float(len(fully_direct.tot_inputs)) * 100))

In [15]:
#     m = dice_ml.Model(model=model, backend="sklearn")
    # DiCE explanation instance
#     exp = dice_ml.Dice(d,m, method="random")

    
    
#     # # Generate counterfactual examples
#     # dice_exp = exp.generate_counterfactuals(data, total_CFs=4, desired_class="opposite")
#     # # Visualize counterfactual explanation
#     # dice_exp.visualize_as_dataframe()
#     count = 0
#     X = data.drop('LeaveOrNot', axis=1)

#     # Generate counterfactual examples
#     dice_exp = exp.generate_counterfactuals(X, total_CFs=4, desired_class="opposite")
#     # Visualize counterfactual explanation
#     dice_exp.visualize_as_dataframe()

    # for x in X:
    #     count += 1
    #     try:
    #         # Generate counterfactual examples
    #         dice_exp = exp.generate_counterfactuals(x, total_CFs=4, desired_class="opposite")
    #         # Visualize counterfactual explanation
    #         dice_exp.visualize_as_dataframe()
    #     except:
    #         continue
    # print(count)

In [16]:
import json

def aequitas_fully_directed_sklearn(dataset: Dataset, perturbation_unit, threshold, global_iteration_limit, \
                                    local_iteration_limit, input_pkl_dir, retrain_csv_dir):
    print("Aequitas Fully Directed Started...\n")
    initial_input = [random.randint(low, high) for [low, high] in dataset.input_bounds]
    minimizer = {"method": "L-BFGS-B"}

    fully_direct = Fully_Direct(dataset, perturbation_unit, threshold, global_iteration_limit, \
                                local_iteration_limit, input_pkl_dir, retrain_csv_dir)

    basinhopping(fully_direct.evaluate_global, initial_input, stepsize=1.0, take_step=fully_direct.global_discovery,
                 minimizer_kwargs=minimizer,
                 niter=global_iteration_limit)

    df = pd.DataFrame(fully_direct.global_disc_inputs_list)

    df.to_csv('global_discrimination.csv', header=list(dataset.column_names), index=False)

#     print("Finished Global Search")
#     print("Percentage discriminatory inputs - " + str(float(len(fully_direct.global_disc_inputs_list)
#                                                             + len(fully_direct.local_disc_inputs_list)) / float(
#         len(fully_direct.tot_inputs)) * 100))
    
#     print()
#     print("Starting Local Search")
    
#     # Dataset for training an ML model
#     X_train_all = pd.read_csv(dataset.dataset_dir)
#     data = pd.DataFrame(np.array(df), columns=X_train_all.columns)
#     X = data.drop(col_to_be_predicted, axis=1)

# #     d = dice_ml.Data(dataframe=X_train_all,
# #                      continuous_features=[],
# #                     outcome_name='LeaveOrNot')
#     # Pre-trained ML model
#     model = joblib.load(pkl_dir)
#     features = get_features_range(X_train_all.drop(col_to_be_predicted, axis=1))
#     d = dice_ml.data.Data(features=features, outcome_name=col_to_be_predicted)
#     m = dice_ml.Model(model=model, backend="sklearn")
#     # DiCE explanation instance
#     exp = dice_ml.Dice(d,m, method="random")
#     # Generate counterfactual examples
#     dice_exp = exp.generate_counterfactuals(X[0:40], 
#                                             total_CFs=100, desired_class="thesame")
#     # Visualize counterfactual explanation
# #     dice_exp.visualize_as_dataframe(show_only_changes=True)
#     result = dice_exp.to_json()
#     result = json.loads(result)
#     counterfactuals = []
#     for cl in result['cfs_list']:
#         if cl:
#             for lst in cl:
#                 counterfactuals.append(lst)

#     columns = list(X.columns)
#     columns.append(col_to_be_predicted)
#     cdf = pd.DataFrame(counterfactuals, columns=columns)
#     cdf = cdf.drop_duplicates()
#     print(len(counterfactuals), len(np.array(cdf)))
#     print(result)

In [67]:
import time
import pandas as pd, numpy as np

from utils.config import census, credit, bank
from data.census import census_data
from data.credit import credit_data
from data.bank import bank_data

global_disc_inputs = set()
global_disc_inputs_list = []
local_disc_inputs = set()
local_disc_inputs_list = []
tot_inputs = set()
location = np.zeros(21)

os.chdir('C:\\Users\\hussaini_21000736\\Documents\\Fairness\\Phemus\\Examples')

sys.path.append(os.getcwd())

"""
census: 9,1 for gender, age, 8 for race
credit: 9,13 for gender,age
bank: 1 for age
"""

dataset = "bank"
data_config = {"census": census, "credit": credit, "bank": bank}
config = data_config[dataset]  # replace
sensitive_param = 5
threshold_l = 10  # replace census-7,credit-14,bank-10
threshold = 0
input_bounds = config.input_bounds
classifier_name = 'Employee_DecisionTree_Original.pkl'

# replace

model = joblib.load(classifier_name)

In [None]:
def expga_global(max_global, max_local):
    start = time.time()

    data_config = {"census": census, "credit": credit, "bank": bank}
    config = data_config[dataset]
    feature_names = config.feature_name
    class_names = config.class_name
    sens_name = config.sens_name[sensitive_param]
    params = config.params

    data = {"census": census_data, "credit": credit_data, "bank": bank_data}
    # prepare the testing data and model
    X, Y, input_shape, nb_classes = data[dataset]()

    start = time.time()

    model_name = classifier_name.split("/")[-1].split("_")[0]
    # file_name = "aequitas_"+dataset+sensitive_param+"_"+model+""
    file_name = "expga_{}_{}{}.txt".format(model_name, dataset, sensitive_param)
    f = open(file_name, "a")
    f.write("iter:" + str(iter) + "------------------------------------------" + "\n" + "\n")
    f.close()

    global_discovery = Global_Discovery()

    train_samples = global_discovery(max_global, params, input_bounds, sensitive_param)
    train_samples = np.array(train_samples)
    # train_samples = X[np.random.choice(X.shape[0], max_global, replace=False)]

    np.random.shuffle(train_samples)

    print(train_samples.shape)

    explainer = ConstructExplainer(X, feature_names, class_names)

    seed = Searchseed(model, feature_names, sens_name, explainer, train_samples, params, X)

    print('Finish Searchseed')
    for inp in seed:
        inp0 = [int(i) for i in inp]
        inp0 = np.asarray(inp0)
        inp0 = np.reshape(inp0, (1, -1))
        global_disc_inputs.add(tuple(map(tuple, inp0)))
        global_disc_inputs_list.append(inp0.tolist()[0])

    print("Finished Global Search")
    print('length of total input is:' + str(len(tot_inputs)))
    print('length of global discovery is:' + str(len(global_disc_inputs_list)))

    end = time.time()

    print('Total time:' + str(end - start))

In [53]:
expga_global(2000, 100)

KeyError: <Phemus.Dataset.Dataset object at 0x000001E7A69DDBE0>

In [77]:
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier

# Load the iris dataset as an example
iris = datasets.load_iris()
X = iris.data
y = iris.target

# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)

# Create a random forest classifier
clf = RandomForestClassifier(n_estimators=100)

# Train the classifier with the training data
clf.fit(X_train, y_train)

# Make predictions with the test data
predictions = clf.predict(X_test)

# Calculate the accuracy of the model
accuracy = clf.score(X_test, y_test)

print("Accuracy: ", accuracy)


Accuracy:  0.9333333333333333


In [78]:
import numpy as np
from sklearn.datasets import load_breast_cancer
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split

# Load the breast cancer dataset
X, y = load_breast_cancer(return_X_y=True)

# Identify the sensitive attributes
sensitive_attrs = [0, 1]  # 0 = radius, 1 = texture

# Generate synthetic data points with reversed values for the sensitive attributes
X_syn = X.copy()
X_syn[:, sensitive_attrs] = 1 - X_syn[:, sensitive_attrs]
y_syn = y

# Combine the original and synthetic data
X_combined = np.concatenate([X, X_syn])
y_combined = np.concatenate([y, y_syn])

# Split the combined data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X_combined, y_combined, test_size=0.2)

# Train a random forest classifier on the combined data
model = RandomForestClassifier()
model.fit(X_train, y_train)

# Evaluate the model on the test set
accuracy = model.score(X_test, y_test)
print("Accuracy:", accuracy)

Accuracy: 0.9780701754385965


In [79]:

from sklearn.datasets import load_breast_cancer
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split

# Load the breast cancer dataset
X, y = load_breast_cancer(return_X_y=True)

# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

# Train a random forest classifier on the training set
model = RandomForestClassifier()
model.fit(X_train, y_train)

# Evaluate the model on the test set
accuracy = model.score(X_test, y_test)
print("Accuracy:", accuracy)


Accuracy: 0.9473684210526315


In [80]:

from sklearn.datasets import load_breast_cancer
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split

# Load the breast cancer dataset
X, y = load_breast_cancer(return_X_y=True)

# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

# Train a decision tree classifier on the training set
model = DecisionTreeClassifier()
model.fit(X_train, y_train)

# Evaluate the model on the test set
accuracy = model.score(X_test, y_test)
print("Accuracy:", accuracy)

Accuracy: 0.9649122807017544


In [81]:
import numpy as np
from sklearn.datasets import load_breast_cancer
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split

# Load the breast cancer dataset
X, y = load_breast_cancer(return_X_y=True)

# Identify the sensitive attributes
sensitive_attrs = [0, 1]  # 0 = radius, 1 = texture

# Generate synthetic data points with reversed values for the sensitive attributes
X_syn = X.copy()
X_syn[:, sensitive_attrs] = 1 - X_syn[:, sensitive_attrs]
y_syn = y

# Combine the original and synthetic data
X_combined = np.concatenate([X, X_syn])
y_combined = np.concatenate([y, y_syn])

# Split the combined data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X_combined, y_combined, test_size=0.2)

# Train a decision tree classifier on the combined data
model = DecisionTreeClassifier()
model.fit(X_train, y_train)

# Evaluate the model on the test set
accuracy = model.score(X_test, y_test)
print("Accuracy:", accuracy)


Accuracy: 0.9912280701754386


In [82]:
import numpy as np
from sklearn.datasets import load_breast_cancer
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, precision_score, recall_score

# Load the breast cancer dataset
X, y = load_breast_cancer(return_X_y=True)

# Identify the sensitive attributes
sensitive_attrs = [0, 1]  # 0 = radius, 1 = texture

# Generate synthetic data points with reversed values for the sensitive attributes
X_syn = X.copy()
X_syn[:, sensitive_attrs] = 1 - X_syn[:, sensitive_attrs]
y_syn = y

# Combine the original and synthetic data
X_combined = np.concatenate([X, X_syn])
y_combined = np.concatenate([y, y_syn])

# Split the combined data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X_combined, y_combined, test_size=0.2)

# Train a decision tree classifier on the combined data
model_with_augmentation = DecisionTreeClassifier()
model_with_augmentation.fit(X_train, y_train)

# Evaluate the model with counterfactual data augmentation on the test set
y_pred = model_with_augmentation.predict(X_test)
conf_matrix = confusion_matrix(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
print("Model with counterfactual data augmentation:")
print("Confusion matrix:", conf_matrix)
print("Precision:", precision)
print("Recall:", recall)

# Split the original data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

# Train a decision tree classifier on the original data
model_without_augmentation = DecisionTreeClassifier()
model_without_augmentation.fit(X_train, y_train)

# Evaluate the model without counterfactual data augmentation on the test set
y_pred = model_without_augmentation.predict(X_test)
conf_matrix = confusion_matrix(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
print("Model without counterfactual data augmentation:")
print("Confusion matrix:", conf_matrix)
print("Precision:", precision)
print("Recall:", recall)


Model with counterfactual data augmentation:
Confusion matrix: [[ 88   2]
 [  0 138]]
Precision: 0.9857142857142858
Recall: 1.0
Model without counterfactual data augmentation:
Confusion matrix: [[46  2]
 [ 9 57]]
Precision: 0.9661016949152542
Recall: 0.8636363636363636


In [86]:

from sklearn.datasets import fetch_openml
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split

# Load the "Adult" dataset from OpenML
X, y = fetch_openml("adult", version=1, return_X_y=True, as_frame=True)

# Identify the sensitive attributes
sensitive_attrs = ["sex", "race"]

# Generate synthetic data points with reversed values for the sensitive attributes
X_syn = X.copy()

print(1 - X_syn[sensitive_attrs])

X_syn[sensitive_attrs] = 1 - X_syn[sensitive_attrs]
y_syn = y

# Combine the original and synthetic data
X_combined = pd.concat([X, X_syn])
y_combined = pd.concat([y, y_syn])

# Split the combined data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X_combined, y_combined, test_size=0.2)

# Train a decision tree classifier on the combined data
model = DecisionTreeClassifier()
model.fit(X_train, y_train)

# Evaluate the model on the test set
accuracy = model.score(X_test, y_test)
print("Accuracy:", accuracy)


TypeError: unsupported operand type(s) for -: 'int' and 'Categorical'

In [90]:

import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.datasets import fetch_openml
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split

# Load the "Adult" dataset from OpenML
X, y = fetch_openml("adult", version=1, return_X_y=True, as_frame=True)

# Convert the target variable to a binary label
y = (y == ">50K").astype(int)

# One-hot encode the categorical features
categorical_columns = ["workclass", "education", "marital-status", "occupation", "relationship", "race", "sex"]
X = pd.get_dummies(X, columns=categorical_columns)

X

# Standardize the numeric features
numeric_columns = ["age", "fnlwgt", "education-num", "capital-gain", "capital-loss", "hours-per-week"]
scaler = StandardScaler()
X[numeric_columns] = scaler.fit_transform(X[numeric_columns])

# Identify the sensitive attributes
sensitive_attrs = ["sex_Female", "race_White"]

# Generate synthetic data points with reversed values for the sensitive attributes
X_syn = X.copy()
X_syn[sensitive_attrs] = 1 - X_syn[sensitive_attrs]
y_syn = y

# Combine the original and synthetic data
X_combined = pd.concat([X, X_syn])
y_combined = pd.concat([y, y_syn])

# Split the combined data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X_combined, y_combined, test_size=0.2)

# Train a decision tree classifier on the combined data
model = DecisionTreeClassifier()
model.fit(X_train, y_train)

# Evaluate the model on the test set
accuracy = model.score(X_test, y_test)
print("Accuracy:", accuracy)


KeyError: "['capital-gain', 'capital-loss', 'hours-per-week'] not in index"

In [93]:
!pip install pyswarm

Collecting pyswarm
  Downloading pyswarm-0.6.tar.gz (4.3 kB)
  Preparing metadata (setup.py): started
  Preparing metadata (setup.py): finished with status 'done'
Building wheels for collected packages: pyswarm
  Building wheel for pyswarm (setup.py): started
  Building wheel for pyswarm (setup.py): finished with status 'done'
  Created wheel for pyswarm: filename=pyswarm-0.6-py3-none-any.whl size=4465 sha256=9028695e3b9e57696526df585c0c90231938123c25eb2890f2861b293dec1389
  Stored in directory: c:\users\hussaini_21000736\appdata\local\pip\cache\wheels\ff\d2\b7\80118e5698de2bd0b8d1b3397abf7fdfc45c15ffc454b52145
Successfully built pyswarm
Installing collected packages: pyswarm
Successfully installed pyswarm-0.6


In [214]:
# Load the dataset
import pandas as pd

df = pd.read_csv('adult.csv', header=None)

# Preprocess the data
columns = ['age', 'workclass', 'fnlwgt', 'education', 'education-num', 'marital-status', 'occupation', 'relationship',
           'race', 'sex', 'capital-gain', 'capital-loss', 'hours-per-week', 'native-country', 'income']

df.columns = columns
df = pd.get_dummies(df,
                    columns=['workclass', 'education', 'marital-status', 'occupation', 'relationship', 'race', 'sex',
                             'native-country'])
df = df.drop(columns=['fnlwgt', 'education-num'])

In [215]:

# Split the data into features and target
X = df.drop(columns=['income'])
y = df['income']

# Train a model on the data
from sklearn.ensemble import RandomForestClassifier

model = RandomForestClassifier()
model.fit(X, y)

RandomForestClassifier()

In [241]:
import numpy as np
from pyswarm import pso


def false_positive_rate(y_true, y_pred):
    print(y_true)
    print('____________________________________')
    # Calculate the number of false positives
    false_positives = sum((y_pred == 1) & (y_true == 0))

    # Calculate the number of actual negatives
    actual_negatives = sum(y_true == 0)

    # Return the false positive rate
    return false_positives / actual_negatives


def evaluate_fairness(test_case, model, sensitive_attrs, columns=[]):
    """Evaluate the fairness of the model on the given test case.
    
    Args:
        test_case (ndarray): The test case to evaluate.
        model (object): The machine learning model to test.
        sensitive_attrs (list): The sensitive attributes to consider.
    
    Returns:
        float: The fairness score of the model on the test case.
    """
    test_case = [test_case]

    print(test_case)
    # Predict the model's output for the test case
    prediction = model.predict(test_case)

    # Compute the false positive rate for each subgroup defined by the sensitive attributes
    subgroup_fprs = []
    for attr in sensitive_attrs:
        subgroup_fprs.append(false_positive_rate(prediction.label(), test_case[0][attr]))

    # Return the mean false positive rate as the fairness score
    return np.mean(subgroup_fprs)


def optimize_test_case(
        model,
        sensitive_attrs, lb, ub, ieqcons=[],
        f_ieqcons=None, args=(), kwargs={}, swarmsize=100, omega=0.5, phip=0.5, phig=0.5, maxiter=100, minstep=1e-8,
        minfunc=1e-8, debug=False):
    """Optimize a test case using the PSO algorithm.
    
    Args:
        model (object): The machine learning model to test.
        sensitive_attrs (list): The sensitive attributes to consider.
        lb (ndarray): The lower bounds of the search space.
        ub (ndarray): The upper bounds of the search space.
        ieqcons (list): A list of functions of the form f(x,*args) that should return zero or positive values.
        f_ieqcons (function): The inequality constraint function.
        args (tuple): Additional arguments passed to the objective function and constraint functions.
        kwargs (dict): Additional keyword arguments passed to the objective function and constraint functions.
        swarmsize (int): The number of particles in the swarm.
        omega (float): The particle inertia weight.
        phip (float): The particle best position weight.
        phig (float): The swarm best position weight.
        maxiter (int): The maximum number of iterations to run the PSO algorithm.
        minstep (float): The minimum step size of the swarm.
        minfunc (float): The minimum change in the objective function value.
        debug (bool): If True, print debugging information.
    
    Returns:
        ndarray: The optimal test case.
    """

    # Define the objective function for the PSO algorithm
    def objective_function(test_case, *args):
        """Evaluate the fairness of a machine learning model on a test case.
        
        Args:
            test_case (ndarray): The test case to evaluate.
            *args: Additional arguments passed to the fairness evaluation function.
        
        Returns:
            float: The fairness score of the model on the test case.
        """
        # Evaluate the fairness of the model on the test case
        fairness_score = evaluate_fairness(test_case, model, sensitive_attrs)

        print(fairness_score)

        # Return the negative of the fairness score, as we want to maximize fairness
        return -fairness_score

    # Run the PSO algorithm to optimize the test case
    xopt, fopt = pso(objective_function, lb, ub, ieqcons=ieqcons, f_ieqcons=f_ieqcons, args=args, kwargs=kwargs,
                     swarmsize=swarmsize, omega=omega, phip=phip, phig=phig, maxiter=maxiter, minstep=minstep,
                     minfunc=minfunc, debug=debug)

    # Return the optimal test case
    return xopt

In [244]:
df

Unnamed: 0,age,capital-gain,capital-loss,hours-per-week,income,workclass_ ?,workclass_ Federal-gov,workclass_ Local-gov,workclass_ Never-worked,workclass_ Private,...,native-country_ Puerto-Rico,native-country_ Scotland,native-country_ South,native-country_ Taiwan,native-country_ Thailand,native-country_ Trinadad&Tobago,native-country_ United-States,native-country_ Vietnam,native-country_ Yugoslavia,native-country_13
0,0,10,11,12,14,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
1,39,2174,0,40,<=50K,0,0,0,0,0,...,0,0,0,0,0,0,1,0,0,0
2,50,0,0,13,<=50K,0,0,0,0,0,...,0,0,0,0,0,0,1,0,0,0
3,38,0,0,40,<=50K,0,0,0,0,1,...,0,0,0,0,0,0,1,0,0,0
4,53,0,0,40,<=50K,0,0,0,0,1,...,0,0,0,0,0,0,1,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
32557,27,0,0,38,<=50K,0,0,0,0,1,...,0,0,0,0,0,0,1,0,0,0
32558,40,0,0,40,>50K,0,0,0,0,1,...,0,0,0,0,0,0,1,0,0,0
32559,58,0,0,40,<=50K,0,0,0,0,1,...,0,0,0,0,0,0,1,0,0,0
32560,22,0,0,20,<=50K,0,0,0,0,1,...,0,0,0,0,0,0,1,0,0,0


In [243]:
# Define the sensitive attributes to consider
# print(df.columns.values)
df
sensitive_attrs = ['sex_ Female', 'sex_ Male', 'race_ Amer-Indian-Eskimo', 'race_ Asian-Pac-Islander', 'race_ Black',
                   'race_ Other', 'race_ White']
sensitive_attrs_idx = [df.columns.get_loc(col) for col in sensitive_attrs]

# Define the search space for the test case
lb = np.zeros(X.shape[1])
ub = np.ones(X.shape[1])

# Use the PSO algorithm to optimize the test case
optimal_test_case = optimize_test_case(model, sensitive_attrs_idx, lb, ub, swarmsize=50, maxiter=50)

# print(optimal_test_case)

# Evaluate the fairness of the model on the optimal test case
fairness_score = evaluate_fairness(optimal_test_case, model, sensitive_attrs_idx)
print('Fairness score:', fairness_score)

[array([0.98666545, 0.92235404, 0.74844032, 0.30762444, 0.14215939,
       0.32954875, 0.39320709, 0.11394487, 0.29471626, 0.99449474,
       0.56984754, 0.79720399, 0.61837212, 0.15779359, 0.27300634,
       0.25333823, 0.16375434, 0.63209366, 0.40619807, 0.50837274,
       0.39967202, 0.84398949, 0.48536888, 0.89917827, 0.82283927,
       0.39578988, 0.31830912, 0.29463689, 0.45285185, 0.37574605,
       0.81984636, 0.76724713, 0.07631376, 0.80497819, 0.49426237,
       0.47967667, 0.89424878, 0.52317028, 0.36771479, 0.78694286,
       0.41090328, 0.2301084 , 0.99117481, 0.91485272, 0.59167432,
       0.45226309, 0.19431879, 0.32218755, 0.76428286, 0.54744884,
       0.48827298, 0.72875218, 0.36866756, 0.2410628 , 0.97800672,
       0.85613642, 0.2316033 , 0.22494902, 0.91816818, 0.18150121,
       0.94421085, 0.14161924, 0.09815354, 0.9216622 , 0.42415403,
       0.43438328, 0.89207821, 0.33384911, 0.87655923, 0.97884029,
       0.10649433, 0.92860091, 0.53324836, 0.83450957, 0.6360

AttributeError: 'numpy.ndarray' object has no attribute 'label'

In [179]:
s = pd.Series(list('abca'))

In [181]:
print(s)

0    a
1    b
2    c
3    a
dtype: object


In [182]:
pd.get_dummies(s)

Unnamed: 0,a,b,c
0,1,0,0
1,0,1,0
2,0,0,1
3,1,0,0


In [249]:
!pip install aif360[Reductions]

Collecting fairlearn~=0.7
  Downloading fairlearn-0.8.0-py3-none-any.whl (235 kB)
     -------------------------------------- 235.0/235.0 kB 4.8 MB/s eta 0:00:00
Installing collected packages: fairlearn
Successfully installed fairlearn-0.8.0


In [250]:
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from aif360.datasets import BinaryLabelDataset
from aif360.algorithms.preprocessing.counterfactual_preprocessor import CounterfactualPreprocessor

# Load the dataset and split into training and test sets
X, y = load_dataset()
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define the sensitive attributes to consider
sensitive_attrs = ['sex', 'race']

# Convert the data to a BinaryLabelDataset
dataset = BinaryLabelDataset(df=pd.DataFrame(X_train, columns=X.columns), label_names=['label'],
                             protected_attribute_names=sensitive_attrs)

# Create the counterfactual data generator
counterfactual_generator = CounterfactualPreprocessor(sensitive_attrs=sensitive_attrs)

# Generate synthetic counterfactual data points
cf_dataset = counterfactual_generator.fit_transform(dataset)

# Combine the original and counterfactual data into a single dataset
X_cf = np.concatenate([dataset.features, cf_dataset.features])
y_cf = np.concatenate([dataset.labels.ravel(), cf_dataset.labels.ravel()])

# Define the model pipeline
model = Pipeline([
    ('scaler', StandardScaler()),
    ('classifier', RandomForestClassifier(n_estimators=100))
])

# Fit the model on the augmented data
model.fit(X_cf, y_cf)

# Evaluate the model's fairness on the test set
fairness_metric = compute_fairness_metric(X_test, y_test, model, sensitive_attrs)
print(f'Fairness metric: {fairness_metric:.3f}')


ModuleNotFoundError: No module named 'aif360.algorithms.preprocessing.counterfactual_preprocessor'

In [253]:
!pip install rulefit



In [255]:
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
from rulefit import RuleFit

df = pd.read_csv('adult.csv', header=None)

# Preprocess the data
columns = ['age', 'workclass', 'fnlwgt', 'education', 'education-num', 'marital-status', 'occupation', 'relationship',
           'race', 'sex', 'capital-gain', 'capital-loss', 'hours-per-week', 'native-country', 'income']

df.columns = columns
df = pd.get_dummies(df,
                    columns=['workclass', 'education', 'marital-status', 'occupation', 'relationship', 'race', 'sex',
                             'native-country'])
df = df.drop(columns=['fnlwgt', 'education-num'])

# Split the data into features and target
X = df.drop(columns=['income'])
y = df['income']

# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create a decision tree classifier
clf = DecisionTreeClassifier()

# Train the classifier on the training data
clf.fit(X_train, y_train)

# Create a rule set from the decision tree
rf = RuleFit(clf, feature_names=columns)

# Extract the rules from the rule set
rules = rf.get_rules()

# Print the rules
print(rules)

AttributeError: 'RuleFit' object has no attribute 'coef_'

In [260]:
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
from rulefit import RuleFit

In [261]:
# Load the dataset
df = pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.data', header=None,
                 names=['age', 'workclass', 'fnlwgt', 'education', 'education-num', 'marital-status', 'occupation',
                        'relationship', 'race', 'sex', 'capital-gain', 'capital-loss', 'hours-per-week',
                        'native-country', 'income'])

In [263]:
# Select the features and target variable
X = df.drop(['income'], axis=1)
y = df['income']

# Convert the categorical features to dummy variables
X = pd.get_dummies(X)

# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create a decision tree classifier
clf = DecisionTreeClassifier()

# Train the classifier on the training data
clf.fit(X_train, y_train)

# Create a rule set from the decision tree
rf = RuleFit(clf)

# Extract the rules from the rule set
rules = rf.get_rules()

# Print the rules
print(rules)

AttributeError: 'RuleFit' object has no attribute 'coef_'

In [264]:
from sklearn.datasets import load_breast_cancer
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split

# Load the breast cancer dataset
data = load_breast_cancer()
X = data['data']
y = data['target']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)

# Train a decision tree classifier
clf = DecisionTreeClassifier()
clf.fit(X_train, y_train)

# Use the classifier to make predictions on the test set
y_pred = clf.predict(X_test)

# Evaluate the performance of the classifier
accuracy = (y_pred == y_test).mean()
print('Accuracy:', accuracy)


Accuracy: 0.8881118881118881


In [270]:
# Extract the decision rules from the trained model
decision_rules = clf.decision_path(X_test).toarray()
decision_rules.shape[1]

31

In [273]:
# Convert the decision rules to a list of strings
rules = []
for i in range(decision_rules.shape[0]):
    rule = []
    for j in range(decision_rules.shape[1]):
        try:
            if decision_rules[i, j] == 1:
                feature = data['feature_names'][j]
                threshold = clf.tree_.threshold[j]
                rule.append(f'{feature} <= {threshold}')
            else:
                rule.append('ELSE')
        except IndexError:
            pass
    rules.append(rule)

print(rules)

[['mean radius <= 0.04891999997198582', 'ELSE', 'ELSE', 'ELSE', 'ELSE', 'ELSE', 'ELSE', 'ELSE', 'ELSE', 'ELSE', 'ELSE', 'ELSE', 'ELSE', 'ELSE', 'ELSE', 'ELSE', 'ELSE', 'ELSE', 'ELSE', 'ELSE', 'worst radius <= 785.7999877929688', 'ELSE', 'ELSE', 'ELSE', 'ELSE', 'ELSE', 'worst concavity <= 0.1918499991297722', 'ELSE', 'worst symmetry <= 0.18205000460147858', 'ELSE'], ['mean radius <= 0.04891999997198582', 'mean texture <= 952.8999938964844', 'mean perimeter <= 42.19000053405762', 'mean area <= 0.4535500109195709', 'mean smoothness <= 30.145000457763672', 'ELSE', 'mean concavity <= 0.004223499912768602', 'ELSE', 'mean symmetry <= 23.199999809265137', 'ELSE', 'ELSE', 'ELSE', 'perimeter error <= -2.0', 'ELSE', 'ELSE', 'ELSE', 'ELSE', 'ELSE', 'ELSE', 'ELSE', 'ELSE', 'ELSE', 'ELSE', 'ELSE', 'ELSE', 'ELSE', 'ELSE', 'ELSE', 'ELSE', 'ELSE', 'ELSE'], ['mean radius <= 0.04891999997198582', 'mean texture <= 952.8999938964844', 'mean perimeter <= 42.19000053405762', 'mean area <= 0.4535500109195709'

In [2]:
pip install git+https://github.com/carla-recourse/carla.git#egg=carla-recourse

Collecting carla-recourse
  Cloning https://github.com/carla-recourse/carla.git to c:\users\hussaini_21000736\appdata\local\temp\pip-install-9531h_1w\carla-recourse_6945ef977495485b9de6d0c3c1b78002
  Resolved https://github.com/carla-recourse/carla.git to commit 24db00aa8616eb2faedea0d6edf6e307cee9d192
  Preparing metadata (setup.py): started
  Preparing metadata (setup.py): finished with status 'done'
Collecting protobuf<=3.21
  Using cached protobuf-3.20.3-cp39-cp39-win_amd64.whl (904 kB)
Collecting lime==0.2.0.1
  Using cached lime-0.2.0.1.tar.gz (275 kB)
  Preparing metadata (setup.py): started
  Preparing metadata (setup.py): finished with status 'done'
Collecting mip==1.12.0
  Using cached mip-1.12.0-py3-none-any.whl (47.1 MB)
Collecting numpy==1.19.4
  Using cached numpy-1.19.4-cp39-cp39-win_amd64.whl (13.0 MB)
Collecting pandas==1.1.4
  Using cached pandas-1.1.4-cp39-cp39-win_amd64.whl (8.9 MB)
Collecting recourse==1.0.0
  Using cached recourse-1.0.0-py3-none-any.whl (45 kB)
Co

  Running command git clone --filter=blob:none --quiet https://github.com/carla-recourse/carla.git 'C:\Users\hussaini_21000736\AppData\Local\Temp\pip-install-9531h_1w\carla-recourse_6945ef977495485b9de6d0c3c1b78002'
ERROR: Could not find a version that satisfies the requirement tensorflow==1.14.0 (from carla-recourse) (from versions: 2.5.0, 2.5.1, 2.5.2, 2.5.3, 2.6.0rc0, 2.6.0rc1, 2.6.0rc2, 2.6.0, 2.6.1, 2.6.2, 2.6.3, 2.6.4, 2.6.5, 2.7.0rc0, 2.7.0rc1, 2.7.0, 2.7.1, 2.7.2, 2.7.3, 2.7.4, 2.8.0rc0, 2.8.0rc1, 2.8.0, 2.8.1, 2.8.2, 2.8.3, 2.8.4, 2.9.0rc0, 2.9.0rc1, 2.9.0rc2, 2.9.0, 2.9.1, 2.9.2, 2.9.3, 2.10.0rc0, 2.10.0rc1, 2.10.0rc2, 2.10.0rc3, 2.10.0, 2.10.1, 2.11.0rc0, 2.11.0rc1, 2.11.0rc2, 2.11.0, 2.11.1, 2.12.0rc0, 2.12.0rc1, 2.12.0, 2.13.0rc0)
ERROR: No matching distribution found for tensorflow==1.14.0
