In [2]:
import pandas as pd
from sklearn.compose import ColumnTransformer
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline

import tensorflow as tf
import tensorflow.keras as keras
from alibi.explainers import Counterfactual
from alibi.explainers import CounterfactualRLTabular, CounterfactualRL
from alibi.explainers import CounterfactualProto
#from alibi.datasets import fetch_adult
from alibi.models.tensorflow import HeAE
from alibi.models.tensorflow import Actor, Critic
#from alibi.models.tensorflow import ADULTEncoder, ADULTDecoder
from alibi.explainers.cfrl_base import Callback
from alibi.explainers.backends.cfrl_tabular import get_he_preprocessor, get_statistics, \
    get_conditional_vector, apply_category_mapping



credit_data = pd.read_csv("../datasets/Credit.csv",index_col=0)

# Identify numeric and categorical columns
numeric_cols = credit_data.select_dtypes(include=['float64', 'int64']).columns.tolist()
categorical_cols = credit_data.select_dtypes(include=['object']).columns.tolist()

# Remove the target column 'Default' from the numeric columns
numeric_cols.remove('Default')

category_map = {}
for col in categorical_cols:
    category_map[col] = credit_data[col].unique().tolist()
    
feature_names = credit_data.drop(columns=['Default']).columns.tolist()

# Split data into train and test
X = credit_data.drop(columns=['Default'])
y = credit_data['Default']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=13)


categorical_ids = [X_train.columns.get_loc(col) for col in categorical_cols]
numerical_ids = [X_train.columns.get_loc(col) for col in numeric_cols]


# Define numerical standard scaler.
num_transf = StandardScaler()

# Define categorical one-hot encoder.
cat_transf = OneHotEncoder(handle_unknown="ignore")

# Define column transformer
preprocessor = ColumnTransformer(
    transformers=[
        ("cat", cat_transf, categorical_ids),
        ("num", num_transf, numerical_ids),
    ],
    sparse_threshold=0
)


# Fit preprocessor.
preprocessor.fit(X_train)

# Preprocess train and test dataset.
X_train_ohe = preprocessor.transform(X_train)
X_test_ohe = preprocessor.transform(X_test)



In [3]:
X_train

Unnamed: 0,checkingstatus1,duration,history,purpose,amount,savings,employ,installment,sex,residence,age,housing,cards,liable,tele,foreign
134,NoAccount,18,poor,goods/repair,1984,DM0-100,1-7years,4,male,4,47,forfree,2,1,no,foreign
234,DM0-200,18,poor,goods/repair,866,DM0-100,1-7years,4,male,2,25,own,1,1,no,foreign
644,NoAccount,24,terrible,goods/repair,1851,DM0-100,1-7years,4,male,2,33,own,2,1,yes,foreign
498,NoAccount,24,terrible,goods/repair,1585,DM0-100,1-7years,4,male,3,40,own,2,1,no,foreign
282,NoAccount,12,poor,goods/repair,1574,DM0-100,1-7years,4,male,2,50,own,1,1,no,foreign
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
743,NoAccount,21,poor,goods/repair,3160,DM0-100,7+years,4,male,3,41,own,1,1,yes,foreign
529,<DM0,36,poor,goods/repair,2302,DM0-100,1-7years,4,male,4,31,rent,1,1,no,foreign
75,<DM0,36,poor,edu,1977,DM0-100,7+years,4,male,4,40,own,1,1,yes,foreign
177,<DM0,12,poor,goods/repair,2577,DM0-100,1-7years,2,male,1,42,own,1,1,no,foreign


In [4]:
X_train_ohe.shape

(800, 35)

In [96]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score, classification_report
from sklearn.metrics import accuracy_score

from sklearn.preprocessing import MinMaxScaler
from xgboost import XGBClassifier
import numpy as np

clf = XGBClassifier(min_child_weight=0.5, max_depth=3, gamma=0.2)
clf.fit(X_train_ohe, y_train)

# Check the model's accuracy on the test set
predictor = lambda x: clf.predict_proba(preprocessor.transform(x))
acc = accuracy_score(y_true=y_test, y_pred=predictor(X_test).argmax(axis=1))
print("Accuracy: %.3f" % acc)


y_pred = clf.predict(X_test_ohe)
print(f"F1 Score: {f1_score(y_test, y_pred)}")

print(classification_report(y_test, y_pred))

index = np.where(y_pred == 1)[0][0]
X_orig = X_test_ohe[index].reshape(1, -1)
print(X_orig)



Accuracy: 0.735
F1 Score: 0.49523809523809526
              precision    recall  f1-score   support

           0       0.79      0.85      0.82       142
           1       0.55      0.45      0.50        58

    accuracy                           0.73       200
   macro avg       0.67      0.65      0.66       200
weighted avg       0.72      0.73      0.73       200

[[ 0.          1.          0.          0.          1.          0.
   0.          0.          0.          0.          1.          0.
   0.          1.          0.          1.          0.          0.
   0.          1.          0.          0.          1.          0.
   1.          0.          1.          0.          2.21537803  1.79130704
  -1.81031195 -1.69057225 -0.49532991  1.01279362 -0.43033148]]


In [97]:
tf.compat.v1.disable_eager_execution()
# Calculate the min and max values for the entire training and test datasets
feature_min = np.min(X_train_ohe, axis=0)
feature_max = np.max(X_train_ohe, axis=0)

print(feature_min)
print(feature_max)

# Custom predict function for CounterfactualProto
def predict_fn(X):
    # The predict_proba method of the pipeline returns an array of shape (n_samples, 2)
    # Return both columns as the CounterfactualProto explainer expects a probability for each class
    pred_proba = clf.predict_proba(X)
    return np.hstack([1 - pred_proba[:, 1].reshape(-1, 1), pred_proba[:, 1].reshape(-1, 1)])


# Initialize the explainer
cf = CounterfactualProto(predict_fn, shape, use_kdtree=True, theta=10., max_iterations=1000,
                         feature_range=(feature_min, feature_max), 
                         c_init=1., c_steps=10)

# Fit the explainer
cf.fit(X_train_preprocessed)  # make sure to use values here as the model was trained on ndarray, not pandas DataFrame

# Generate explanations
explanation = cf.explain(X_orig)
print(f'Original prediction: {explanation.orig_class}')
print(f'Counterfactual prediction: {explanation.cf["class"]}')



No encoder specified. Using k-d trees to represent class prototypes.


[ 0.          0.          0.          0.          0.          0.
  0.          0.          0.          0.          0.          0.
  0.          0.          0.          0.          0.          0.
  0.          0.          0.          0.          0.          0.
  0.          0.          0.          0.         -1.39036048 -1.06095699
 -1.81031195 -1.69057225 -1.44934682 -0.71478611 -0.43033148]
[1.         1.         1.         1.         1.         1.
 1.         1.         1.         1.         1.         1.
 1.         1.         1.         1.         1.         1.
 1.         1.         1.         1.         1.         1.
 1.         1.         1.         1.         3.19876126 5.34375567
 0.90855456 1.04535047 3.40746652 4.46795308 2.32379001]
Original prediction: 1
Counterfactual prediction: 0


In [99]:
import numpy as np

# Extract original and counterfactual values
X_original_values = X_orig
X_cf_values = explanation.cf['X']
ohe_feature_names_retrieved = X.columns.tolist()

def inverse_transform_final_fix(data):
    # Split the data into numeric and categorical parts
    data_numeric = data[:, :len(numeric_cols)]
    data_categorical = data[:, len(numeric_cols):]

    # Inverse transform numeric data
    data_numeric_inv = preprocessor.named_transformers_['num'].inverse_transform(data_numeric)
    
    # Convert one-hot encoded categorical data back to original labels
    df_cat_encoded = pd.DataFrame(data_categorical, columns=ohe_feature_names_retrieved)
    data_categorical_inv = []
    for col in categorical_cols:
        col_data = df_cat_encoded.filter(like=col).idxmax(axis=1).str.split("_", expand=True)[1].to_numpy()
        data_categorical_inv.append(col_data)
    
    data_categorical_inv = np.array(data_categorical_inv).T
    
    # Concatenate numeric and categorical data
    data_inv_transformed = np.column_stack((data_numeric_inv, data_categorical_inv))
    
    return data_inv_transformed

# For demonstration purposes, I'll use the preprocessed X_train_ohe as the input to the function
# In your actual implementation, you'd use the variables X_original_values and X_cf_values
X_original_decoded_without_imputer = inverse_transform_without_imputer(X_original_values)
X_cf_decoded_without_imputer = inverse_transform_without_imputer(X_cf_values)

# Get column names directly from the original X (excluding the target column 'Default')
all_column_names_direct = X.columns.tolist()

# Now, create the DataFrames using these column names
X_original_df_direct = pd.DataFrame(X_original_decoded_without_imputer, columns=all_column_names_direct)
X_cf_df_direct = pd.DataFrame(X_cf_decoded_without_imputer, columns=all_column_names_direct)

X_original_df_direct.head()

Unnamed: 0,checkingstatus1,duration,history,purpose,amount,savings,employ,installment,sex,residence,age,housing,cards,liable,tele,foreign
0,20.96625,6115.801016,2.9975,2.85375,47.241444,1.41375,1.15625,NoAccount,terrible,edu,DM0-100,0-1year,female,forfree,yes,foreign


In [100]:
X_cf_df_direct.head()

Unnamed: 0,checkingstatus1,duration,history,purpose,amount,savings,employ,installment,sex,residence,age,housing,cards,liable,tele,foreign
0,20.966249,5146.091797,2.9975,3.411223,43.247799,1.41375,1.15625,NoAccount,terrible,edu,DM0-100,7+years,female,own,yes,foreign


In [39]:
# # Extract original and counterfactual values
# X_original_values = X_orig
# X_cf_values = explanation.cf['X']

# def inverse_transform_without_imputer(data):
#     # Extract the transformers from the fitted preprocessor
#     num_transformer = preprocessor.named_transformers_['num']
#     cat_transformer = preprocessor.named_transformers_['cat']

#     # Split the data into numeric and categorical parts
#     data_numeric = data[:, :len(numeric_cols)]
#     data_categorical = data[:, len(numeric_cols):]

#     # Inverse transform numeric data using only the scaler
#     data_numeric = num_transformer.named_steps['scaler'].inverse_transform(data_numeric)
    
#     # Inverse transform categorical data
#     data_cat_encoded = cat_transformer.named_steps['onehot'].inverse_transform(data_categorical)
    
#     # Concatenate numeric and categorical data
#     data_inv_transformed = np.column_stack((data_numeric, data_cat_encoded))
    
#     return data_inv_transformed

# # Apply the updated inverse transformation function without imputer
# X_original_decoded_without_imputer = inverse_transform_without_imputer(X_original_values)
# X_cf_decoded_without_imputer = inverse_transform_without_imputer(X_cf_values)

# # Get column names directly from the original X (excluding the target column 'Default')
# all_column_names_direct = X.columns.tolist()

# # Now, create the DataFrames using these column names
# X_original_df_direct = pd.DataFrame(X_original_decoded_without_imputer, columns=all_column_names_direct)
# X_cf_df_direct = pd.DataFrame(X_cf_decoded_without_imputer, columns=all_column_names_direct)

# X_original_df_direct, X_cf_df_direct



(array([[48.0, 8358.0, 1.0, 0.9999999999999998, 30.0, 2.0, 1.0, 'DM0-200',
         'good', 'newcar', 'DM100-1000', '0-1year', 'female', 'own', 'no',
         'foreign']], dtype=object),
 array([[48.0, 8358.0, 1.0, 1.096319317817688, 30.0, 1.4979766607284546,
         1.0, 'DM0-200', 'good', 'newcar', 'DM100-1000', '0-1year',
         'female', 'own', 'no', 'foreign']], dtype=object))

# DiCE random

In [45]:
continuous_features = ['duration', 'amount', 'installment', 'residence', 'age', 'cards', 'liable']
categorical_features = ['checkingstatus1', 'history', 'purpose', 'savings', 'employ', 'sex', 'housing', 'tele', 'foreign']
immutable_features = ['sex', 'liable', 'foreign', 'purpose']
non_decreasing_features = ['age', 'employ']
correlated_features = []

In [54]:
import dice_ml
import time
from dice_ml.utils import helpers

# Get the transformed column names after one-hot encoding
ohe_columns = preprocessor.named_transformers_['cat'].named_steps['onehot'].get_feature_names_out(categorical_cols)

# Combine with numeric columns to get the full list of columns
transformed_columns = np.concatenate([numeric_cols, ohe_columns])

# Convert the preprocessed data to DataFrame with the correct columns
full_train_df = pd.DataFrame(X_train_preprocessed, columns=transformed_columns)

# Add the target column
full_train_df['target'] = y_train.values

# Prepare the instance for which you want to generate counterfactuals
query_instance = pd.DataFrame(X_orig, columns=transformed_columns)

# Convert object columns to integer type
for col in full_train_df.columns:
    if full_train_df[col].dtype == 'object':
        full_train_df[col] = full_train_df[col].astype(int)

d = dice_ml.Data(dataframe=full_train_df, continuous_features=continuous_features, outcome_name='target')
backend = 'sklearn'
method = 'random'

start = time.time()

full_train_df = full_train_df.astype(float)
query_instance = query_instance.astype(float)

class ModelWrapper:
    def __init__(self, model):
        self.model = model
    
    def predict_proba(self, data):
        if isinstance(data, pd.DataFrame):
            return self.model(data.values)
        else:
            return self.model(data)

def model_predict(data):
    if isinstance(data, pd.DataFrame):
        return clf.predict_proba(data.values)
    else:
        return clf.predict_proba(data)

# 使用包装器
wrapped_model = ModelWrapper(model_predict)
m = dice_ml.Model(model=wrapped_model, backend=backend)

# 创建 DiCE explainer 并生成反事实
exp = dice_ml.Dice(d, m, method = method)
dice_exp = exp.generate_counterfactuals(query_instance, total_CFs=5, desired_class="opposite")
dice_exp.visualize_as_dataframe(show_only_changes=True)



100%|█████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  2.55it/s]

Query instance (original outcome : 1)





Unnamed: 0,duration,amount,installment,residence,age,cards,liable,checkingstatus1_<DM0,checkingstatus1_DM0-200,checkingstatus1_DM200+,...,sex_female,sex_male,housing_forfree,housing_own,housing_rent,tele_no,tele_yes,foreign_foreign,foreign_german,target
0,2.215378,1.791307,-1.810312,-1.690572,-0.49533,1.012794,-0.430331,0.0,1.0,0.0,...,1.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1



Diverse Counterfactual set (new outcome: 0.0)


Unnamed: 0,duration,amount,installment,residence,age,cards,liable,checkingstatus1_<DM0,checkingstatus1_DM0-200,checkingstatus1_DM200+,...,sex_female,sex_male,housing_forfree,housing_own,housing_rent,tele_no,tele_yes,foreign_foreign,foreign_german,target
0,2.2153780341986105,1.7913070429488238,-1.8103119480428336,-1.690572245727784,-0.4953299142371799,1.0127936180915686,-0.4303314829119352,0.0,1.0,0.0,...,1.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,0
1,2.2153780341986105,3.39952994,-1.8103119480428336,-1.690572245727784,-0.4953299142371799,1.0127936180915686,-0.4303314829119352,0.0,1.0,0.0,...,1.0,0.0,1.0,1.0,0.0,1.0,0.0,1.0,0.0,0
2,2.2153780341986105,1.7913070429488238,-1.8103119480428336,-1.690572245727784,-0.4953299142371799,1.0127936180915686,1.11628832,0.0,1.0,0.0,...,1.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,0
3,2.2153780341986105,1.7913070429488238,-1.8103119480428336,-1.690572245727784,-0.4953299142371799,1.0127936180915686,1.10191108,0.0,1.0,0.0,...,1.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,0
4,2.2153780341986105,1.7913070429488238,-1.8103119480428336,-1.690572245727784,-0.4953299142371799,1.0127936180915686,-0.4303314829119352,0.0,1.0,0.0,...,1.0,0.0,1.0,1.0,0.0,1.0,0.0,1.0,0.0,0


In [59]:
# 1. 从DiCE的输出中获取反事实数据
counterfactuals_df = dice_exp.cf_examples_list[0].final_cfs_df

def inverse_transform_without_imputer2(data):
    # Ensure the input data is 2D
    data = np.atleast_2d(data)
    
    cat_cols_encoded = preprocessor.named_transformers_['cat'].named_steps['onehot'].get_feature_names_out(categorical_cols).tolist()

    # Extract the transformers from the fitted preprocessor
    num_transformer = preprocessor.named_transformers_['num']
    cat_transformer = preprocessor.named_transformers_['cat']

    # Split the data into numeric and categorical parts
    data_numeric = data[:, :len(numeric_cols)]
    data_categorical = data[:, -len(cat_cols_encoded):]  # Adjusted this line

    # Inverse transform numeric data using only the scaler
    data_numeric = num_transformer.named_steps['scaler'].inverse_transform(data_numeric)
    
    # Inverse transform categorical data
    data_cat_encoded = cat_transformer.named_steps['onehot'].inverse_transform(data_categorical)
    
    # Concatenate numeric and categorical data
    data_inv_transformed = np.column_stack((data_numeric, data_cat_encoded))
    
    # If the original input was 1D, reshape the output to be 1D
    if len(data.shape) == 1:
        data_inv_transformed = data_inv_transformed.ravel()
    
    return data_inv_transformed



# 2. 使用inverse_transform函数将数据从编码格式转换回原始格式
original_query_instance = inverse_transform_without_imputer2(query_instance.values[0])
counterfactuals_original = counterfactuals_df.apply(lambda row: inverse_transform_without_imputer2(row.values), axis=1)




Original Query Instance:
[[48.0 8358.0 1.0 0.9999999999999998 30.0 2.0 1.0 'DM0-200' 'good'
  'newcar' 'DM100-1000' '0-1year' 'female' 'own' 'no' 'foreign']]

Counterfactuals:
0    [[48.0, 8358.0, 1.0, 0.9999999999999998, 30.0,...
1    [[48.0, 12914.9615209482, 1.0, 0.9999999999999...
2    [[48.0, 8358.0, 1.0, 0.9999999999999998, 30.0,...
3    [[48.0, 8358.0, 1.0, 0.9999999999999998, 30.0,...
4    [[48.0, 8358.0, 1.0, 0.9999999999999998, 30.0,...
dtype: object


In [61]:
# Convert the counterfactuals to the appropriate shape and then to a DataFrame
counterfactuals_original_array = np.vstack(counterfactuals_original)
counterfactuals_original_formatted = pd.DataFrame(counterfactuals_original_array, columns=numeric_cols + categorical_cols)

# Display the results
print("Original Query Instance:")
display(original_query_instance_formatted)
print("\nCounterfactuals:")
display(counterfactuals_original_formatted)


Original Query Instance:


Unnamed: 0,duration,amount,installment,residence,age,cards,liable,checkingstatus1,history,purpose,savings,employ,sex,housing,tele,foreign
0,48.0,8358.0,1.0,1.0,30.0,2.0,1.0,DM0-200,good,newcar,DM100-1000,0-1year,female,own,no,foreign



Counterfactuals:


Unnamed: 0,duration,amount,installment,residence,age,cards,liable,checkingstatus1,history,purpose,savings,employ,sex,housing,tele,foreign
0,48.0,8358.0,1.0,1.0,30.0,2.0,1.0,<DM0,,goods/repair,DM0-100,unemployed,,forfree,yes,
1,48.0,12914.961521,1.0,1.0,30.0,2.0,1.0,<DM0,,goods/repair,DM0-100,unemployed,male,forfree,yes,
2,48.0,8358.0,1.0,1.0,30.0,2.0,1.561566,<DM0,,goods/repair,DM0-100,unemployed,,forfree,yes,
3,48.0,8358.0,1.0,1.0,30.0,2.0,1.556345,<DM0,,goods/repair,DM0-100,unemployed,,forfree,yes,
4,48.0,8358.0,1.0,1.0,30.0,2.0,1.0,<DM0,,goods/repair,DM0-100,unemployed,male,forfree,yes,


# Alibi RL

In [74]:
class HeAE(keras.Model):
    def __init__(self, encoder: keras.Model, decoder: keras.Model, **kwargs) -> None:
        super().__init__(**kwargs)
        self.encoder = encoder
        self.decoder = decoder

    def call(self, x: tf.Tensor, **kwargs):
        z = self.encoder(x)
        x_hat = self.decoder(z)
        return x_hat

In [83]:
# Define attribute types, required for datatype conversion.
feature_types = {
    "checkingstatus1": str,
    "duration": int,
    "history": str,
    "purpose": str,
    "amount": int,
    "savings": str,
    "employ": str,
    "installment": int,
    "sex": str,
    "residence": int,
    "age": int,
    "housing": str,
    "cards": int,
    "liable": int,
    "tele": str,
    "foreign": str
}

print(numerical_ids)
# Define data preprocessor and inverse preprocessor. The invers preprocessor include datatype conversions.
heae_preprocessor, heae_inv_preprocessor = get_he_preprocessor(X=X_train,
                                                               feature_names=feature_names,
                                                               category_map=category_map,
                                                               feature_types=feature_types)

# Define trainset
trainset_input = heae_preprocessor(X_train).astype(np.float32)
trainset_outputs = {
    "output_1": trainset_input[:, :len(numerical_ids)]
}

for i, cat_id in enumerate(categorical_ids):
    trainset_outputs.update({
        f"output_{i+2}": X_train[:, cat_id]
    })

trainset = tf.data.Dataset.from_tensor_slices((trainset_input, trainset_outputs))
trainset = trainset.shuffle(1024).batch(128, drop_remainder=True)

NameError: name 'numerical_ids' is not defined

In [62]:
# Define constants
COEFF_SPARSITY = 0.5               # sparisty coefficient
COEFF_CONSISTENCY = 0.5            # consisteny coefficient
TRAIN_STEPS = 10000                # number of training steps -> consider increasing the number of steps
BATCH_SIZE = 100                   # batch size

In [5]:
explainer = CounterfactualRLTabular(predictor = model_predict,
                                    encoder=heae.encoder,
                                    decoder=heae.decoder,
                                    latent_dim=LATENT_DIM,吗 
                                    encoder_preprocessor=heae_preprocessor,
                                    decoder_inv_preprocessor=heae_inv_preprocessor,
                                    coeff_sparsity=COEFF_SPARSITY,
                                    coeff_consistency=COEFF_CONSISTENCY,
                                    category_map= ,
                                    feature_names= ,
                                    ranges=(feature_min, feature_max),
                                    immutable_features=immutable_features,
                                    train_steps=TRAIN_STEPS,
                                    batch_size=BATCH_SIZE,
                                    backend="sklearn")
explainer = explainer.fit(X=X_train_preprocessed)