# Counterfactuals benchmark on tabular datasets

In [1]:
import tensorflow as tf
import os
tf.compat.v1.disable_eager_execution()
# tf.compat.v1.enable_eager_execution()


BASE_PATH = "./counterfactuals"
print("Current working directory:", os.getcwd())


2025-07-01 11:03:11.113070: I tensorflow/core/util/port.cc:111] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-07-01 11:03:11.149115: E tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:9342] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2025-07-01 11:03:11.149145: E tensorflow/compiler/xla/stream_executor/cuda/cuda_fft.cc:609] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2025-07-01 11:03:11.149175: E tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:1518] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-07-01 11:03:11.157051: I tensorflow/core/platform/cpu_feature_g

Current working directory: /home/ahmed/prototype


## Imports and preprocessing

In [2]:
# Install the dev version of the Alibi package if not already installed
try:
    from alibi import __version__ as alibi_version
    print(f"Alibi version: {alibi_version}")
except ImportError:
    print("Alibi package not found, installing...")
    # Install the dev version of Alibi
    !pip install git+https://github.com/SeldonIO/alibi.git > /dev/null


import logging

alibi_logger = logging.getLogger("alibi")
alibi_logger.setLevel("CRITICAL")


print(f"Is TensorFlow running in eager execution mode? -----→ {tf.executing_eagerly()}")
!nvidia-smi -L

  from .autonotebook import tqdm as notebook_tqdm


Alibi version: 0.9.7.dev0
Is TensorFlow running in eager execution mode? -----→ False
GPU 0: NVIDIA GeForce RTX 4060 Laptop GPU (UUID: GPU-ed7340f2-1910-df12-4a83-29feeba52695)


In [3]:
from datetime import datetime

if not os.path.exists(BASE_PATH):
    os.makedirs(BASE_PATH)


date = datetime.now().strftime('%Y-%m-%d')
EXPERIMENT_PATH = f"{BASE_PATH}/diabetes_{date}"
MODELS_EXPERIMENT_PATH = f"{BASE_PATH}/diabetes_2020-09-09"
if not os.path.exists(EXPERIMENT_PATH):
    os.makedirs(EXPERIMENT_PATH)
    


## Data import and preprocessing

In [4]:
import json
# import pickle
# import time
# from matplotlib import offsetbox
# from matplotlib.colors import ListedColormap
# import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from pprint import pprint
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical
# from sklearn.tree import DecisionTreeClassifier
from tensorflow.keras.layers import Dense, Add, Input, ActivityRegularization, Concatenate, Multiply
from tensorflow.keras import optimizers, Model, regularizers, Input

from tensorflow.keras.models import Sequential
from tensorflow.random import set_seed

from tensorflow.keras import backend as K
# from tensorflow.keras.models import load_model
import os
import time

print("Current working directory:", os.getcwd())

INITIAL_CLASS = 0
DESIRED_CLASS = 1
N_CLASSES = 2
n_training_iterations = 10


np.set_printoptions(precision=2)
set_seed(2020)
np.random.seed(2020)

# German Credit dataset

def preprocess_data_german(df, target_column="Outcome"):
    """
    Preprocess the German Credit dataset by encoding categorical variables and splitting the data into 
    train, test, and user simulation sets.
    
    Returns a dictionary with processed train, test, and user datasets.
    """
    
    # Assign meaningful column names
    df.columns = [
        'Status', 'Month', 'Credit_History', 'Purpose', 'Credit_Amount',
        'Savings', 'Employment', 'Installment_Rate', 'Personal_Status', 'Other_Debtors',
        'Residence_Duration', 'Property', 'Age', 'Other_Installment_Plans', 'Housing',
        'Existing_Credits', 'Job', 'Num_Liable_People', 'Telephone', 'Foreign_Worker',
        'Outcome'
    ]
    
    # Mapping categorical features to more meaningful values
    status_mapping = { 'A11': '< 0 DM', 'A12': '0 <= ... < 200 DM', 'A13': '>= 200 DM / salary assignments for at least 1 year', 'A14': 'no checking account' }
    credit_history_mapping = { 'A30': 'no credits taken/ all credits paid back duly', 'A31': 'all credits at this bank paid back duly', 'A32': 'existing credits paid back duly till now', 'A33': 'delay in paying off in the past', 'A34': 'critical account/other credits existing' }
    savings_mapping = { 'A61': '< 100 DM', 'A62': '100 <= ... < 500 DM', 'A63': '500 <= ... < 1000 DM', 'A64': '>= 1000 DM', 'A65': 'unknown/no savings account' }
    employment_mapping = { 'A71': 'unemployed', 'A72': '< 1 year', 'A73': '1 <= ... < 4 years', 'A74': '4 <= ... < 7 years', 'A75': '>= 7 years' }
    personal_status_mapping = { 'A91': 'male: divorced/separated', 'A92': 'female: divorced/separated/married', 'A93': 'male: single', 'A94': 'male: married/widowed', 'A95': 'female: single' }
    other_debtors_mapping = { 'A101': 'none', 'A102': 'co-applicant', 'A103': 'guarantor' }
    property_mapping = { 'A121': 'real estate', 'A122': 'building society savings agreement/life insurance', 'A123': 'car or other, not in attribute 6', 'A124': 'unknown/no property' }
    other_installment_plans_mapping = { 'A141': 'bank', 'A142': 'stores', 'A143': 'none' }
    housing_mapping = { 'A151': 'rent', 'A152': 'own', 'A153': 'for free' }
    telephone_mapping = { 'A191': 'none', 'A192': 'yes, registered under the customer\'s name' }
    foreign_worker_mapping = { 'A201': 'yes', 'A202': 'no' }

    # Apply mappings
    df['Status'] = df['Status'].map(status_mapping)
    df['Credit_History'] = df['Credit_History'].map(credit_history_mapping)
    df['Savings'] = df['Savings'].map(savings_mapping)
    df['Employment'] = df['Employment'].map(employment_mapping)
    df['Personal_Status'] = df['Personal_Status'].map(personal_status_mapping)
    df['Other_Debtors'] = df['Other_Debtors'].map(other_debtors_mapping)
    df['Property'] = df['Property'].map(property_mapping)
    df['Other_Installment_Plans'] = df['Other_Installment_Plans'].map(other_installment_plans_mapping)
    df['Housing'] = df['Housing'].map(housing_mapping)
    df['Telephone'] = df['Telephone'].map(telephone_mapping)
    df['Foreign_Worker'] = df['Foreign_Worker'].map(foreign_worker_mapping)

    # Encode ordinal columns
    ordinal_cols = ['Status', 'Credit_History', 'Savings', 'Employment']
    le = LabelEncoder()
    for col in ordinal_cols:
        df[col] = le.fit_transform(df[col])

    # One-hot encode nominal columns
    nominal_columns = ['Purpose', 'Personal_Status', 'Other_Debtors', 'Property', 
                       'Other_Installment_Plans', 'Housing', 'Job', 'Telephone', 'Foreign_Worker']
    df = pd.get_dummies(df, columns=nominal_columns, drop_first=True)

    # Process target variable
    Y = df[target_column].replace(1, 0).replace(2, 1)
    X = df.drop(columns=[target_column])

    # Get final feature set
    # list all features
    immutable_features = set(X.columns) - set(['Status', 'Credit_History'])
    

    mutable_features = set(X.columns) - set(immutable_features)
    mutable_features = list(mutable_features)

    features = list(mutable_features) + list(immutable_features)

    return  X, Y, features, immutable_features, mutable_features
    
# =========================================================


# Make sure 'german.csv' is in your project directory
df = pd.read_csv('statlog_german_credit_data/german.data', sep=' ', skiprows=1, header=None)
x,y, features, immutable_features, mutable_features = preprocess_data_german(df)

X_train, X_test, y_train, y_test = train_test_split(x,y, test_size=0.2, random_state=2020)

standard_scaler = StandardScaler()
X_train = standard_scaler.fit_transform(X_train)
X_test = standard_scaler.transform(X_test)

y_train = to_categorical(y_train)
y_test = to_categorical(y_test)



Current working directory: /home/ahmed/prototype


In [5]:
def compute_reconstruction_error(x, autoencoder):
    """Compute the reconstruction error for a given autoencoder and data points."""
    preds = autoencoder.predict(x)
    preds_flat = preds.reshape((preds.shape[0], -1))
    x_flat = x.reshape((x.shape[0], -1))
    return np.linalg.norm(x_flat - preds_flat, axis=1)

def format_metric(metric):
    """Return a formatted version of a metric, with the confidence interval."""
    return f"{metric.mean():.3f} ± {1.96*metric.std()/np.sqrt(len(metric)):.3f}"

def compute_metrics(samples, counterfactuals, latencies, classifier, autoencoder,
                    batch_latency=None):
    """ Summarize the relevant metrics in a dictionary. """
    reconstruction_error = compute_reconstruction_error(counterfactuals, autoencoder)
    delta = np.abs(samples-counterfactuals)
    l1_distances = delta.reshape(delta.shape[0], -1).sum(axis=1)
    prediction_gain = (
        classifier.predict(counterfactuals)[:, DESIRED_CLASS] - 
        classifier.predict(samples)[:, DESIRED_CLASS]
    )

    metrics = dict()
    metrics["reconstruction_error"] = format_metric(reconstruction_error)
    metrics["prediction_gain"] = format_metric(prediction_gain)
    metrics["sparsity"] = format_metric(l1_distances)
    metrics["latency"] = format_metric(latencies)
    batch_latency = batch_latency if batch_latency else sum(latencies)
    metrics["latency_batch"] = f"{batch_latency:.3f}"

    return metrics

def save_experiment(method_name, samples, counterfactuals, latencies, 
                    batch_latency=None):
    """Create an experiment folder and save counterfactuals, latencies and metrics."""
    if not os.path.exists(f"{EXPERIMENT_PATH}/{method_name}"):
        os.makedirs(f"{EXPERIMENT_PATH}/{method_name}")   

    np.save(f"{EXPERIMENT_PATH}/{method_name}/counterfactuals.npy", counterfactuals)
    np.save(f"{EXPERIMENT_PATH}/{method_name}/latencies.npy", latencies)

    metrics = compute_metrics(samples, counterfactuals, latencies, classifier, autoencoder)
    json.dump(metrics, open(f"{EXPERIMENT_PATH}/{method_name}/metrics.json", "w"))
    pprint(metrics)

In [6]:
from tensorflow.keras.models import load_model
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Add, Input, ActivityRegularization
from tensorflow.keras import Model, optimizers, regularizers
from tensorflow.keras.utils import to_categorical

tf.random.set_seed(2020)
np.random.seed(2020)

# def create_classifier(input_shape):
#     """Define and compile a neural network binary classifier.""" 
#     model = Sequential([
#         Dense(20, activation='relu', input_shape=input_shape),
#         Dense(20, activation='relu'),
#         Dense(2, activation='softmax'),
#     ], name="classifier")
#     optimizer = optimizers.Adam(learning_rate=0.0002, beta_1=0.5)
#     model.compile(optimizer, 'binary_crossentropy', ['accuracy'])
#     return model

# classifier = create_classifier((x.shape[1],))
# print(X_train.dtype, y_train.dtype)
# print(X_test.dtype, y_test.dtype)

# X_train = X_train.astype(np.float32)
# X_test = X_test.astype(np.float32)
# y_train = y_train.astype(np.float32)
# y_test = y_test.astype(np.float32)
# training = classifier.fit(X_train, y_train, batch_size=32, epochs=200, verbose=0,
#                           validation_data=(X_test, y_test),)
# print(f"Training: loss={training.history['loss'][-1]:.4f}, "
#       f"accuracy={training.history['accuracy'][-1]:.4f}")
# print(f"Validation: loss={training.history['val_loss'][-1]:.4f}, "
#       f"accuracy={training.history['val_accuracy'][-1]:.4f}")

# classifier.save(f"{EXPERIMENT_PATH}/classifier.keras")

# Load the classifier model
filename = f"{EXPERIMENT_PATH}/classifier.keras"
classifier = load_model(filename)
classifier.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
print(f"Classifier loaded from {filename}") 



Classifier loaded from ./counterfactuals/diabetes_2025-07-01/classifier.keras


## Estimate density with the reconstruction error of a (denoising) autoencoder


In [7]:
# def add_noise(x, noise_factor=1e-6):
#     x_noisy = x + noise_factor * np.random.normal(loc=0.0, scale=1.0, size=x.shape) 
#     return x_noisy

    
# def create_autoencoder(in_shape=(x.shape[1],)):
#     input_ = Input(shape=in_shape) 

#     x = Dense(32, activation="relu")(input_)
#     encoded = Dense(8)(x)
#     x = Dense(32, activation="relu")(encoded)
#     decoded = Dense(in_shape[0], activation="tanh")(x)

#     autoencoder = Model(input_, decoded)
#     optimizer = optimizers.Nadam()
#     autoencoder.compile(optimizer, 'mse')
#     return autoencoder

# autoencoder = create_autoencoder()
# training = autoencoder.fit(
#     add_noise(X_train), X_train, epochs=100, batch_size=32, shuffle=True, 
#     validation_data=(X_test, X_test), verbose=0
# )
# print(f"Training loss: {training.history['loss'][-1]:.4f}")
# print(f"Validation loss: {training.history['val_loss'][-1]:.4f}")

# n_samples = 1000
# # Compute the reconstruction error of noise data
# samples = np.random.randn(n_samples, X_train.shape[1])
# reconstruction_error_noise = compute_reconstruction_error(samples, autoencoder)

# # Save and print the autoencoder metrics
# reconstruction_error = compute_reconstruction_error(X_test, autoencoder)
# autoencoder_metrics = {
#     "reconstruction_error": format_metric(reconstruction_error),
#     "reconstruction_error_noise": format_metric(reconstruction_error_noise),
# }
# json.dump(autoencoder_metrics, open(f"{EXPERIMENT_PATH}/autoencoder_metrics.json", "w"))
# pprint(autoencoder_metrics)

# autoencoder.save(f"{EXPERIMENT_PATH}/autoencoder.keras")

# Load the autoencoder model
filename = f"{EXPERIMENT_PATH}/autoencoder.keras" 
autoencoder = load_model(filename)
# Ensure the autoencoder is compiled with the same optimizer and loss function  
autoencoder.compile(optimizer='nadam', loss='mse')

print(f"Autoencoder loaded from {filename}")



Autoencoder loaded from ./counterfactuals/diabetes_2025-07-01/autoencoder.keras


## Regularized Gradient Descent

In [8]:

# from alibi.explainers import CounterFactual

# shape = (1,) + X_train.shape[1:]
# feature_range = (X_train.min(), X_train.max())

# cf = CounterFactual(classifier, shape=shape, target_proba=1.0, tol=0.5,
#                     target_class=DESIRED_CLASS, max_iter=100, lam_init=0.001,
#                     max_lam_steps=5, learning_rate_init=0.1,
#                     feature_range=feature_range)

# sample = X_test[1]

# t_initial = time.time()
# explanation = cf.explain(np.expand_dims(sample, axis=0))
# print(f"Produced explanation in {time.time() - t_initial:.2f} seconds ")

# y_prob = classifier.predict(np.expand_dims(sample, axis=0))[0]
# print(f'Original prediction: {y_prob.argmax()} with probability {y_prob.max():.3f}')

# pred_class = explanation.cf['class']
# proba = explanation.cf['proba'][0][pred_class]
# print(f'Counterfactual prediction: {pred_class} with probability {proba:.3f}')

# perturbations = (explanation.cf['X'] - sample)[0]
# perturbations[-len(immutable_features):] = 0.
# print(f"Suggested perturbations: {perturbations}")

In [9]:
# samples = X_test 

# latencies = np.empty(len(samples))
# counterfactuals = np.empty_like(samples)

# for i, sample in enumerate(samples):
#     if ((i % 20) == 0) or (i == (len(samples)-1)):
#         print(f"Iteration {i} at {datetime.now()}")
#     t_initial = time.time()
#     try:
#         explanation = cf.explain(np.expand_dims(sample, axis=0))
#         counterfactuals[i] = explanation.cf['X']
#     except (UnboundLocalError, TypeError):  # counterfactual search failed
#         print(f"{i}-th sampled failed")
#         counterfactuals[i] = sample
#     latencies[i] = 1000*(time.time() - t_initial)

# print("Metrics before immutable features projection:")
# pprint(compute_metrics(samples, counterfactuals, latencies, classifier, autoencoder,
#                     batch_latency=None))
# print("-"*80)

# # Set immutable features to original values
# counterfactuals[:, len(mutable_features):] = samples[:, len(mutable_features):]

# print("Metrics after immutable features projection:")
# save_experiment("rgd", samples, counterfactuals, latencies)

## Counterfactual Search Guided by Prototypes

In [10]:
# from alibi.explainers import CounterFactualProto

# shape = (1,) + X_train.shape[1:]
# feature_range = (X_train.min(), X_train.max())

# cf_proto = CounterFactualProto(
#     classifier, shape, use_kdtree=True, theta=10., feature_range=feature_range,
#     max_iterations=200, c_steps=10
# )
# cf_proto.fit(X_train, trustscore_kwargs=None);

In [11]:
# sample = X_test[1]

# t_initial = time.time()
# explanation = cf_proto.explain(
#     np.expand_dims(sample, axis=0), k=5, k_type='mean', target_class=[DESIRED_CLASS]
# )

# print(f"Produced explanation in {time.time() - t_initial:.2f} seconds ")

# y_prob = classifier.predict(np.expand_dims(sample, axis=0))[0]
# print(f'Original prediction: {y_prob.argmax()} with probability {y_prob.max():.3f}')

# if explanation.cf is not None:
#     pred_class = explanation.cf['class']
#     proba = explanation.cf['proba'][0][pred_class]
#     print(f'Counterfactual prediction: {pred_class} with probability {proba:.3f}')
#     perturbations = (explanation.cf['X'] - sample)[0]
#     perturbations[-len(immutable_features):] = 0.
#     print(f"Suggested perturbations: {perturbations}")
# else:
#     print("No counterfactual found for this sample.")
#     counterfactual = sample  # fallback to original sample
    

In [12]:
# verbose = False
# samples = X_test

# latencies = np.empty(len(samples))
# counterfactuals = np.empty_like(samples)
# for i, sample in enumerate(samples):
#     if ((i % 20) == 0) or (i == (len(samples)-1)):
#         print(f"{i+1}-th iteration at {datetime.now()}")
#     t_initial = time.time()
#     try:
#         explanation = cf_proto.explain(np.expand_dims(sample, axis=0), k=20, 
#                                        k_type='mean', target_class=[DESIRED_CLASS])
#         counterfactuals[i] = explanation.cf['X']
#     except (UnboundLocalError, TypeError) as e:  # counterfactual search failed
#         if verbose:
#             print(f"{i}-th sampled failed")
#         counterfactuals[i] = sample
#     latencies[i] = 1000*(time.time() - t_initial)

# print("Metrics before immutable features projection:")
# pprint(compute_metrics(samples, counterfactuals, latencies, classifier, autoencoder,
#                     batch_latency=None))
# print("-"*80)

# # Set immutable features to original values
# counterfactuals[:, len(mutable_features):] = samples[:, len(mutable_features):]

# print("Metrics after immutable features projection:")
# save_experiment("csgp", samples, counterfactuals, latencies)

## GAN-based counterfactual search

In [13]:
def generate_fake_samples(x, generator):
    """Use the input generator to generate samples."""
    return generator.predict(x)

def data_stream(x, y=None, batch_size=500):
    """Generate batches until exhaustion of the input data."""
    n_train = x.shape[0]
    if y is not None:
        assert n_train == len(y)
    n_complete_batches, leftover = divmod(n_train, batch_size)
    n_batches = n_complete_batches + bool(leftover)

    perm = np.random.permutation(n_train)
    for i in range(n_batches):
        batch_idx = perm[i * batch_size:(i + 1) * batch_size]
        if y is not None:
            output = (x[batch_idx], y[batch_idx])
        else:
            output = x[batch_idx]
        yield output


def infinite_data_stream(x, y=None, batch_size=500):
    """Infinite batch generator."""
    batches = data_stream(x, y, batch_size=batch_size)
    while True:
        try:
            yield next(batches)
        except StopIteration:
            batches = data_stream(x, y, batch_size=batch_size)
            yield next(batches)

def create_generator(in_shape=(X_train.shape[1],), residuals=True):
    """Define and compile the residual generator of the CounteRGAN."""
    generator_input = Input(shape=in_shape, name='generator_input')
    generator = Dense(64, activation='relu')(generator_input)
    generator = Dense(32, activation='relu')(generator)
    generator = Dense(64, activation='relu')(generator)
    generator = Dense(in_shape[0], activation='tanh')(generator)
    generator_output = ActivityRegularization(l1=0., l2=1e-6)(generator)
    
    if residuals:
        generator_output = Add(name="output")([generator_input, generator_output])

    return Model(inputs=generator_input, outputs=generator_output)


def create_discriminator(in_shape=(X_train.shape[1],)):
    """ Define a neural network binary classifier to classify real and generated 
    examples."""
    model = Sequential([
        Dense(128, activation='relu', input_shape=in_shape),
        Dropout(0.2),
        Dense(1, activation='sigmoid'),
    ], name="discriminator")
    optimizer = optimizers.legacy.Adam(learning_rate=0.0005, beta_1=0.5, decay=1e-8)
    model.compile(optimizer, 'binary_crossentropy', ['accuracy'])
    return model


def define_countergan(generator, discriminator, classifier, 
                      input_shape=(X_train.shape[1],)):
    """Combine a generator, discriminator, and fixed classifier into the CounteRGAN."""
    discriminator.trainable = False
    classifier.trainable = False

    countergan_input = Input(shape=input_shape, name='countergan_input')
  
    x_generated = generator(countergan_input)

    countergan = Model(
        inputs=countergan_input, 
        outputs=[discriminator(x_generated), classifier(x_generated)]
    )
        
    optimizer = optimizers.legacy.RMSprop(learning_rate=2e-4, decay=1e-8)
    countergan.compile(optimizer, ["binary_crossentropy", "categorical_crossentropy"])
    return countergan


def define_weighted_countergan(generator, discriminator, 
                               input_shape=(X_train.shape[1],)):
    """Combine a generator and a discriminator for the weighted version of the 
    CounteRGAN."""
    discriminator.trainable = False
    classifier.trainable = False
    countergan_input = Input(shape=input_shape, name='countergan_input')
  
    x_generated = generator(countergan_input)

    countergan = Model(inputs=countergan_input, outputs=discriminator(x_generated))
    optimizer = optimizers.legacy.RMSprop(learning_rate=5e-4, decay=1e-8)
    countergan.compile(optimizer, "binary_crossentropy")  
    return countergan



In [14]:
def train_countergan(n_discriminator_steps, n_generator_steps, n_training_iterations,
                     classifier, discriminator, generator, batches, 
                     weighted_version=False):
    """ Main function: train the CounteRGAN"""
    def check_divergence(x_generated):
        return np.all(np.isnan(x_generated))

    def print_training_information(generator, classifier, X_test, iteration):
        X_gen = generator.predict(X_test)
        clf_pred_test = classifier.predict(X_test)
        clf_pred = classifier.predict(X_gen)

        delta_clf_pred = (clf_pred - clf_pred_test)[:, DESIRED_CLASS]
        y_target = to_categorical([DESIRED_CLASS] * len(clf_pred), 
                                  num_classes=N_CLASSES)
        print('='*88)
        print(f"Training iteration {iteration} at {datetime.now()}")
        
        
        reconstruction_error = np.mean(compute_reconstruction_error(X_gen, autoencoder))
        print(f"Autoencoder reconstruction error (infinity to 0): {reconstruction_error:.3f}")
        print(f"Counterfactual prediction gain (0 to 1): {delta_clf_pred.mean():.3f}")
        print(f"Sparsity (L1, infinity to 0): {np.mean(np.abs(X_gen-X_test)):.3f}")

    if weighted_version:
        countergan = define_weighted_countergan(generator, discriminator)
    else:
        countergan = define_countergan(generator, discriminator, classifier)

    for iteration in range(n_training_iterations):
        if iteration > 0:
            x_generated = generator.predict(x_fake_input)
            if check_divergence(x_generated):
                print("Training diverged with the following loss functions:")
                print(discrim_loss_1, discrim_accuracy, gan_loss, 
                    discrim_loss, discrim_loss_2, clf_loss)
                break

        # Periodically print and plot training information 
        if (iteration % 1000 == 0) or (iteration == n_training_iterations - 1):
            print_training_information(generator, classifier, X_test, iteration)

        # Train the discriminator
        discriminator.trainable = True
        for _ in range(n_discriminator_steps):
            x_fake_input, _ = next(batches)
            x_fake = generate_fake_samples(x_fake_input, generator)
            x_real = x_fake_input

            x_batch = np.concatenate([x_real, x_fake])
            y_batch = np.concatenate([np.ones(len(x_real)), np.zeros(len(x_fake))])
            
            # Shuffle real and fake examples
            p = np.random.permutation(len(y_batch))
            x_batch, y_batch = x_batch[p], y_batch[p]

            if weighted_version:
                classifier_scores = classifier.predict(x_batch)[:, DESIRED_CLASS]
                
                # The following update to the classifier scores is needed to have the 
                # same order of magnitude between real and generated samples losses
                real_samples = np.where(y_batch == 1.)
                average_score_real_samples = np.mean(classifier_scores[real_samples])
                classifier_scores[real_samples] /= average_score_real_samples
                
                fake_samples = np.where(y_batch == 0.)
                classifier_scores[fake_samples] = 1.

                discriminator.train_on_batch(
                    x_batch, y_batch, sample_weight=classifier_scores
                )
            else:
                discriminator.train_on_batch(x_batch, y_batch)

        # Train the generator 
        discriminator.trainable = False
        for _ in range(n_generator_steps):
            x_fake_input, _ = next(batches)
            y_fake = np.ones(len(x_fake_input))
            if weighted_version:
                countergan.train_on_batch(x_fake_input, y_fake)
            else:
                y_target = to_categorical([DESIRED_CLASS] * len(x_fake_input), 
                                          num_classes=N_CLASSES)
                countergan.train_on_batch(x_fake_input, [y_fake, y_target])
    return countergan

## Counterfactual search with a regular GAN

In [15]:
# discriminator = create_discriminator()
# generator = create_generator(residuals=False)
# batches = infinite_data_stream(X_train, y_train, batch_size=256)

# method_name = "regular_gan"
# countergan = train_countergan(2, 4, 2000, classifier, discriminator, generator, batches)

# t_initial = time.time()
# counterfactuals = generator.predict(X_test)
# batch_latency = 1000*(time.time() - t_initial)

# latencies = np.zeros(len(X_test))
# for i, x in enumerate(X_test):
#     t_initial = time.time()
#     _ = generator.predict(np.expand_dims(x, axis=0))
#     latencies[i] = 1000*(time.time() - t_initial)

# print("-"*80)
# print("Metrics before immutable features projection:")
# pprint(compute_metrics(samples, counterfactuals, latencies, classifier, autoencoder,
#                     batch_latency=None))
# print("-"*80)

# # Set immutable features to original values
# # Fix immutable features
# counterfactuals[:, len(mutable_features):] = samples[:, len(mutable_features):]

# # Re-evaluate
# print("Metrics after immutable features projection:")
# # pprint(compute_metrics(samples, counterfactuals, latencies, classifier, autoencoder,
# #                        batch_latency=batch_latency))
# save_experiment(method_name, X_test, counterfactuals, latencies, batch_latency)

# generator.save(f"{EXPERIMENT_PATH}/{method_name}/generator.h5", save_format='h5')
# discriminator.save(f"{EXPERIMENT_PATH}/{method_name}/discriminator.h5", save_format='h5')
# countergan.save(f"{EXPERIMENT_PATH}/{method_name}/countergan.h5", save_format='h5')

## CounteRGAN: first formulation for differentiable classifiers

In [16]:
discriminator = create_discriminator()
generator = create_generator(residuals=True)
batches = infinite_data_stream(X_train, y_train, batch_size=256)
samples = X_test 

method_name = "countergan"
countergan = train_countergan(2, 4, 2000, classifier, discriminator, generator, batches)

t_initial = time.time()
counterfactuals = generator.predict(X_test)
batch_latency = 1000*(time.time() - t_initial)

latencies = np.zeros(len(X_test))
for i, x in enumerate(X_test):
    t_initial = time.time()
    _ = generator.predict(np.expand_dims(x, axis=0))
    latencies[i] = 1000*(time.time() - t_initial)

print("-"*80)
print("Metrics before immutable features projection:")
pprint(compute_metrics(samples, counterfactuals, latencies, classifier, autoencoder,
                    batch_latency=None))
print("-"*80)

# Set immutable features to original values
counterfactuals[:, len(mutable_features):] = samples[:, len(mutable_features):]

print("Metrics after immutable features projection:")
save_experiment(method_name, X_test, counterfactuals, latencies, batch_latency)

generator.save(f"{EXPERIMENT_PATH}/{method_name}/generator.h5", save_format='h5')
discriminator.save(f"{EXPERIMENT_PATH}/{method_name}/discriminator.h5", save_format='h5')
countergan.save(f"{EXPERIMENT_PATH}/{method_name}/countergan.h5", save_format='h5')

2025-07-01 11:03:14.794308: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:880] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2025-07-01 11:03:14.849013: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:880] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2025-07-01 11:03:14.849074: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:880] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2025-07-01 11:03:14.855786: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:880] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2025-07-01 11:03:14.855858: I tensorflow/compile

Training iteration 0 at 2025-07-01 11:03:16.515406
Autoencoder reconstruction error (infinity to 0): 6.419
Counterfactual prediction gain (0 to 1): 0.016
Sparsity (L1, infinity to 0): 0.256


2025-07-01 11:03:17.688367: W tensorflow/c/c_api.cc:305] Operation '{name:'loss_3/AddN' id:710 op device:{requested: '', assigned: ''} def:{{{node loss_3/AddN}} = AddN[N=3, T=DT_FLOAT, _has_manual_control_dependencies=true](loss_3/mul, loss_3/mul_1, model/activity_regularization/ActivityRegularizer/truediv)}}' was changed by setting attribute after it was run by a session. This mutation will have no effect, and will trigger an error in the future. Either don't modify nodes after running them or create a new session.
2025-07-01 11:03:17.716441: W tensorflow/c/c_api.cc:305] Operation '{name:'training_2/RMSprop/dense_3_1/kernel/rms/Assign' id:1248 op device:{requested: '', assigned: ''} def:{{{node training_2/RMSprop/dense_3_1/kernel/rms/Assign}} = AssignVariableOp[_has_manual_control_dependencies=true, dtype=DT_FLOAT, validate_shape=false](training_2/RMSprop/dense_3_1/kernel/rms, training_2/RMSprop/dense_3_1/kernel/rms/Initializer/zeros)}}' was changed by setting attribute after it was r

Training iteration 1000 at 2025-07-01 11:04:36.664400
Autoencoder reconstruction error (infinity to 0): 6.096
Counterfactual prediction gain (0 to 1): 0.049
Sparsity (L1, infinity to 0): 0.295
Training iteration 1999 at 2025-07-01 11:05:53.414342
Autoencoder reconstruction error (infinity to 0): 6.156
Counterfactual prediction gain (0 to 1): 0.039
Sparsity (L1, infinity to 0): 0.210
--------------------------------------------------------------------------------
Metrics before immutable features projection:
{'latency': '1.171 ± 0.029',
 'latency_batch': '234.203',
 'prediction_gain': '0.036 ± 0.010',
 'reconstruction_error': '6.155 ± 0.242',
 'sparsity': '7.753 ± 0.310'}
--------------------------------------------------------------------------------
Metrics after immutable features projection:
{'latency': '1.171 ± 0.029',
 'latency_batch': '234.203',
 'prediction_gain': '0.009 ± 0.003',
 'reconstruction_error': '6.137 ± 0.234',
 'sparsity': '0.683 ± 0.049'}


  saving_api.save_model(


## CounteRGAN: second formulation for any classifier

In [17]:
# discriminator = create_discriminator()
# generator = create_generator(residuals=True)
# batches = infinite_data_stream(X_train, y_train, batch_size=256)

# method_name = "countergan-wt"
# countergan = train_countergan(2, 3, 2000, classifier, discriminator, generator, 
#                               batches, weighted_version=True)

# t_initial = time.time()
# counterfactuals = generator.predict(X_test)
# batch_latency = 1000*(time.time() - t_initial)

# latencies = np.zeros(len(X_test))
# for i, x in enumerate(X_test):
#     t_initial = time.time()
#     _ = countergan.predict(np.expand_dims(x, axis=0))
#     latencies[i] = 1000*(time.time() - t_initial)

# print("-"*80)
# print("Metrics before immutable features projection:")
# pprint(compute_metrics(samples, counterfactuals, latencies, classifier, autoencoder,
#                     batch_latency=None))
# print("-"*80)

# # Set immutable features to original values
# counterfactuals[:, len(mutable_features):] = samples[:, len(mutable_features):]

# print("Metrics after immutable features projection:")
# save_experiment(method_name, X_test, counterfactuals, latencies, batch_latency)

# generator.save(f"{EXPERIMENT_PATH}/{method_name}/generator.h5", save_format='h5')
# discriminator.save(f"{EXPERIMENT_PATH}/{method_name}/discriminator.h5", save_format='h5')
# countergan.save(f"{EXPERIMENT_PATH}/{method_name}/countergan.h5", save_format='h5')

## Generate the benchmark table

In [18]:
# METHODS = ["rgd", "csgp", "regular_gan", "countergan", "countergan-wt"]
# METRIC_NAMES = [
#     "prediction_gain", "reconstruction_error", "sparsity", "latency", "latency_batch"
# ]

# metrics = dict()
# for method in METHODS:
#     method_metrics = json.load(open(f"{EXPERIMENT_PATH}/{method}/metrics.json", "r"))
#     method_metrics = {k: v for k, v in method_metrics.items() if k in METRIC_NAMES}
#     metrics[method] = method_metrics

# metrics = pd.DataFrame(metrics)
# metrics.columns =  ["RGD",  "CSGP", "GAN", "CounterGAN", "CounterRGAN-wt"] 

# metrics.index = [
#     "↓ Realism",
#     "↑ Prediction gain",
#     "↓ Sparsity",
#     "↓ Latency (ms)",
#     "↓ Batch latency (ms)",
# ]

# metrics

## Individual examples

In [19]:
# negative_idx = np.where(classifier.predict(X_test)[:, 1] < 0.5)[0]
# x_negative = X_test[negative_idx]
# original_features = standard_scaler.inverse_transform(x_negative)
# negative_df = pd.DataFrame(original_features, columns=features)
# negative_df.head(5)

In [20]:
# counterfactuals = standard_scaler.inverse_transform(
#     generator.predict(X_test[negative_idx])
# )
# residuals = (counterfactuals - 
#              standard_scaler.inverse_transform(X_test[negative_idx]))
# residuals_df = pd.DataFrame(residuals, columns=features)
# residuals_df[list(immutable_features)] = 0.
# residuals_df.head(5)

In [21]:
# sample_idx = 20
# sample = np.expand_dims(X_test[sample_idx], axis=0)

# def compute_residuals(sample, counterfactual):
#     counterfactual = standard_scaler.inverse_transform(counterfactual)
#     residuals = (counterfactual - standard_scaler.inverse_transform(sample))[0]
#     residuals[-len(immutable_features):] = 0
#     return residuals

# method_outputs = dict()

# d = negative_df.iloc[sample_idx].to_dict()
# d["Classifier Prediction"] = classifier.predict(sample)[0][1]
# method_outputs["Initial values"] = d


# explanation = cf.explain(sample)
# counterfactual = explanation.cf['X']
# scaled_counterfactual = compute_residuals(sample, counterfactual)
# d = {k: v for k, v in zip(features, list(scaled_counterfactual))}
# d["Classifier Prediction"] = classifier.predict(counterfactual)[0][1]
# method_outputs["RGD"] = d

# explanation = cf_proto.explain(sample, k=5, k_type='mean', target_class=[DESIRED_CLASS])
# counterfactual = explanation.cf['X']
# scaled_counterfactual = compute_residuals(sample, counterfactual)
# d = {k: v for k, v in zip(features, list(scaled_counterfactual))}
# d["Classifier Prediction"] = classifier.predict(counterfactual)[0][1]
# method_outputs["CSGP"] = d

# for method in ["regular_gan", "countergan", "countergan-wt"]:
#     generator = load_model(f"{EXPERIMENT_PATH}/{method}/generator.h5")
#     counterfactual = generator.predict(sample)
#     scaled_counterfactual = compute_residuals(sample, counterfactual)
#     d = {k: v for k, v in zip(features, list(scaled_counterfactual))}
#     d["Classifier Prediction"] = classifier.predict(counterfactual)[0][1]
#     method_outputs[method] = d

# df = pd.DataFrame(method_outputs)
# df