In [1]:
import tensorflow as tf
tf.get_logger().setLevel(40) # suppress deprecation messages
tf.compat.v1.disable_v2_behavior() # disable TF2 behaviour as alibi code still relies on TF1 constructs
from alibi.explainers import CounterfactualProto


print('TF version: ', tf.__version__)
print('Eager execution enabled: ', tf.executing_eagerly()) # False

  from .autonotebook import tqdm as notebook_tqdm


TF version:  2.14.1
Eager execution enabled:  False


In [None]:
import pandas as pd
import numpy as np
import tensorflow as tf
import random

def set_seed(s=0):
    random.seed(s)
    np.random.seed(s)
    tf.random.set_seed(s)

df = pd.read_csv('E://Graduation_Project//datasets//heart.csv')

categorical_features = ['sex', 'cp', 'fbs', 'restecg', 'exang', 'slope', 'ca', 'thal']
numeric_features = [col for col in df.columns if col not in categorical_features + ['target']]

In [None]:
import dice_ml
from dice_ml.utils.helpers import DataTransfomer
from sklearn.model_selection import train_test_split
transformer = DataTransfomer(func='ohe-min-max')

target = df['target']
train_dataset, test_dataset, y_train, y_test = train_test_split(df, 
                                                                target,
                                                                test_size=0.2, 
                                                                random_state=42, 
                                                                stratify=df['target'])


X_train_df = train_dataset.drop('target', axis=1)
X_test_df = test_dataset.drop('target', axis=1)
d = dice_ml.Data(dataframe=df,
                 continuous_features=numeric_features,
                 outcome_name='target')

transformer.feed_data_params(d)

transformer.initialize_transform_func()

X_train = transformer.transform(X_train_df)
X_test = transformer.transform(X_test_df)




In [None]:
import tensorflow as tf
tf.compat.v1.disable_v2_behavior()
from tensorflow import keras


def build_simple_dnn():
    model = keras.models.Sequential()
    model.add(keras.layers.Dense(16, activation='relu', input_shape=(31,)))
    model.add(keras.layers.Dense(2, activation='softmax'))
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    return model

model = build_simple_dnn()
model.load_weights('my_model_weights.h5')


In [None]:
import numpy as np

# 1) Perform OHE + MinMax on the independent variables, and generate ohe_encoded_feature_names accordingly
X = d.data_df.drop(columns=[d.outcome_name])
ohe_norm_df = d.get_ohe_min_max_normalized_data(X)
d.create_ohe_params(one_hot_encoded_data=ohe_norm_df)

ohe_cols = list(d.ohe_encoded_feature_names)

# 3) Calculate the column index groups for each "categorical feature" in the OHE matrix
#    PublicData already provides this function (returns a list[list[int]], order matches categorical_feature_names)
cat_index_groups = d.get_encoded_categorical_feature_indexes()

# 4) Calculate the size (expanded dimension) of each categorical feature
cat_sizes = [len(g) for g in cat_index_groups]

# 5) Calculate the starting column index for each categorical feature after OHE:
#    Note: In PublicData, the column order is: all continuous feature columns first, then OHE columns for each categorical feature in order.
#    Therefore, the starting index can be obtained by "number of continuous features + cumulative dimensions of previous categorical features"; or simply by min(index) of each group.
num_numeric = len(numeric_features)
cat_start_indices_by_cumsum = np.cumsum([num_numeric] + cat_sizes[:-1]) if len(cat_sizes) > 0 else np.array([])
cat_start_indices_by_min = np.array([min(g) for g in cat_index_groups]) if len(cat_index_groups) > 0 else np.array([])

# Both methods should be consistent (if needed, you can assert)
# assert np.all(cat_start_indices_by_cumsum == cat_start_indices_by_min)

# 6) Build a dictionary {start index: dimension} (equivalent to your approach with OneHotEncoder)
cat_vars_ohe = {int(start): int(size) for start, size in zip(cat_start_indices_by_min, cat_sizes)}

print("Column names after OHE:", ohe_cols)
print("Continuous features:", numeric_features)
print("Categorical features:", categorical_features)
print("OHE dimension for each categorical feature cat_sizes:", cat_sizes)
print("Starting index for each categorical feature (obtained by min):", cat_start_indices_by_min.tolist())
print("Start index to dimension mapping cat_vars_ohe:", cat_vars_ohe)

In [6]:
X = X_test.iloc[[22]].to_numpy(dtype=float)
shape = X.shape
beta = 0.5
c_init = 1
c_steps = 5
max_iterations = 500
rng = (0, 1.)  # scale features between -1 and 1
rng_shape = (1,) + X_train_df.shape[1:]
feature_range = ((np.ones(rng_shape) * rng[0]).astype(np.float32),
                 (np.ones(rng_shape) * rng[1]).astype(np.float32))

In [7]:

cf = CounterfactualProto(model,
                         shape,
                         beta=beta,
                         cat_vars=cat_vars_ohe,
                         use_kdtree=True,
                         ohe=True,  # OHE flag
                         max_iterations=max_iterations,
                         feature_range=feature_range,
                         c_init=c_init,
                         c_steps=c_steps
                        )
cf.fit(X_train.to_numpy(), d_type='mvdm')



  updates=self.state_updates,
No encoder specified. Using k-d trees to represent class prototypes.


CounterfactualProto(meta={
  'name': 'CounterfactualProto',
  'type': ['blackbox', 'tensorflow', 'keras'],
  'explanations': ['local'],
  'params': {
              'kappa': 0.0,
              'beta': 0.5,
              'gamma': 0.0,
              'theta': 0.0,
              'cat_vars': {
                            5: 2,
                            7: 5,
                            12: 2,
                            14: 3,
                            17: 2,
                            19: 3,
                            22: 4,
                            26: 5}
                          ,
              'ohe': True,
              'use_kdtree': True,
              'learning_rate_init': 0.01,
              'max_iterations': 500,
              'c_init': 1,
              'c_steps': 5,
              'eps': (0.001, 0.001),
              'clip': (-1000.0, 1000.0),
              'update_num_grad': 1,
              'write_dir': None,
              'feature_range': (array([[0., 0., 0., 0., 0., 0.,

In [8]:
explanation = cf.explain(X)
X_df = pd.DataFrame(X, columns=d.ohe_encoded_feature_names)
transformer.inverse_transform(X_df)

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal
0,46.0,1,3,150.0,231.0,0,0,147.0,0,3.6,2,0,normal


In [9]:

cf_ohe_df = pd.DataFrame(explanation.cf['X'], columns=d.ohe_encoded_feature_names)
transformer.inverse_transform(cf_ohe_df)

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal
0,46.0,0,2,150.0,231.0,0,2,141.0,0,6.2,2,1,1


In [10]:
X_test.to_numpy().shape

(61, 31)

In [11]:
import time

cfs = []

start = time.time()

for i, X_test_i in enumerate(X_test.to_numpy()):
    explanation = cf.explain(X_test_i.reshape(1, -1))
    X_df = pd.DataFrame(X_test_i.reshape(1, -1), columns=d.ohe_encoded_feature_names)
    cf_ohe_df = pd.DataFrame(explanation.cf['X'], columns=d.ohe_encoded_feature_names)
    orig_pd = transformer.inverse_transform(X_df)
    cf_pd = transformer.inverse_transform(cf_ohe_df)
    cfs.append((
        orig_pd, cf_pd
    ))
    print(f"The Counterfactual of Index {i} had been generated")
end = time.time()

time_Alibi_proto = (end - start)/X_test.shape[0]

The Counterfactual of Index 0 had been generated
The Counterfactual of Index 1 had been generated
The Counterfactual of Index 2 had been generated
The Counterfactual of Index 3 had been generated
The Counterfactual of Index 4 had been generated
The Counterfactual of Index 5 had been generated
The Counterfactual of Index 6 had been generated
The Counterfactual of Index 7 had been generated
The Counterfactual of Index 8 had been generated
The Counterfactual of Index 9 had been generated
The Counterfactual of Index 10 had been generated
The Counterfactual of Index 11 had been generated
The Counterfactual of Index 12 had been generated
The Counterfactual of Index 13 had been generated
The Counterfactual of Index 14 had been generated
The Counterfactual of Index 15 had been generated
The Counterfactual of Index 16 had been generated
The Counterfactual of Index 17 had been generated
The Counterfactual of Index 18 had been generated
The Counterfactual of Index 19 had been generated
The Counte

In [16]:
import importlib
import XAI_metrics   # 先 import 一次
importlib.reload(XAI_metrics)  # 🔄 重新加载，不需要重启内核

from XAI_metrics import calc_valid, calc_sparsity, calc_continuous_proximity, \
    calc_categorical_proximity, calc_manifold_distance, calc_cf_num


valid_alibi_proto = calc_valid(cfs, model, transformer, df.shape[1])
sparsity_alibi_proto = calc_sparsity(cfs, categorical_features)
con_proximity_alibi_proto = calc_continuous_proximity(cfs, numeric_features)
cat_proximity_alibi_proto = calc_categorical_proximity(cfs, categorical_features)
manifold_alibi_proto = calc_manifold_distance(cfs, df, categorical_features)
cf_num_alibi_proto = calc_cf_num(cfs)

In [17]:
X = X_train.iloc[[84]].to_numpy(dtype=float)
explanation = cf.explain(X)
X_df = pd.DataFrame(X, columns=d.ohe_encoded_feature_names)
cf_ohe_df = pd.DataFrame(explanation.cf['X'], columns=d.ohe_encoded_feature_names)
orig_pd = transformer.inverse_transform(X_df)
cf_pd = transformer.inverse_transform(cf_ohe_df)
print(orig_pd)
print(cf_pd)

    age sex cp  trestbps   chol fbs restecg  thalach exang  oldpeak slope ca  \
0  54.0   1  4     140.0  239.0   0       0    160.0     0      1.2     1  0   

     thal  
0  normal  
    age sex cp  trestbps   chol fbs restecg  thalach exang  oldpeak slope ca  \
0  54.0   0  2     140.0  239.0   0       2    136.0     0      6.2     2  1   

  thal  
0    1  


In [18]:
results_alibi_proto = {
    "method": ["Alibi_Proto"],
    "Avg Time(s)": [time_Alibi_proto],
    "Validity": [valid_alibi_proto],
    "Sparsity": [sparsity_alibi_proto],
    "Proximity_con": [con_proximity_alibi_proto],
    "Proximity_cat": [cat_proximity_alibi_proto],
    "Manifold": [manifold_alibi_proto],
    "Avg CF count": [cf_num_alibi_proto]
}

df_results_alibi_proto = pd.DataFrame(results_alibi_proto)
df_results_alibi_proto = df_results_alibi_proto.round(2)

In [19]:
df_results_alibi_proto.to_csv('./results/Alibi_Proto_result_heart.csv', index=False)