In [1]:
import os
import sys
import gc
import random
import warnings; warnings.filterwarnings("ignore")
import numpy as np
import pandas as pd
from scipy import special
from typing import Optional

# Scikit-learn and AIF360
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
from sklearn.tree import DecisionTreeClassifier
from imblearn.over_sampling import ADASYN
from aif360.datasets import BinaryLabelDataset
from aif360.metrics import utils, BinaryLabelDatasetMetric, ClassificationMetric

# TensorFlow and Privacy Tools
import tensorflow as tf
from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack import (
    advanced_mia as amia,
    membership_inference_attack as mia,
)
from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.data_structures import AttackInputData
from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack import plotting as mia_plotting

# Fairness-related Pre-/In-processing
from aif360.algorithms.preprocessing import DisparateImpactRemover, LFR, OptimPreproc, Reweighing
from fairlearn.reductions import EqualizedOdds, ExponentiatedGradient
from aif360.sklearn.inprocessing import ExponentiatedGradientReduction
from data_utils import DatasetBuilder

# Plotting
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
###############################################
#   1. Utility Functions (loss & statistic)   #
###############################################
def log_loss(labels: np.ndarray,
             pred: np.ndarray,
             sample_weight: Optional[np.ndarray] = None,
             from_logits: bool = False,
             small_value: float = 1e-8) -> np.ndarray:
    """
    Computes the per-example cross-entropy loss.
    """
    if labels.shape[0] != pred.shape[0]:
        raise ValueError('Mismatch between labels and predictions.')
    if sample_weight is None:
        sample_weight = 1.0
    else:
        if np.shape(sample_weight)[0] != np.shape(labels)[0]:
            raise ValueError('Sample weights and labels must have the same length.')

    if pred.size == pred.shape[0]:
        pred = pred.flatten()
        if from_logits:
            pred = special.expit(pred)
        indices_class0 = (labels == 0)
        prob_correct = np.copy(pred)
        prob_correct[indices_class0] = 1 - prob_correct[indices_class0]
        return -np.log(np.maximum(prob_correct, small_value)) * sample_weight

    if from_logits:
        pred = special.softmax(pred, axis=-1)
    return -np.log(np.maximum(pred[np.arange(labels.size), labels], small_value)) * sample_weight


def calculate_statistic(probabilities: np.ndarray,
                        labels: np.ndarray,
                        sample_weight: Optional[np.ndarray] = None,
                        convert_to_prob: bool = False) -> np.ndarray:
    """
    Calculates, for each example, the probability assigned to the true class.
    """
    if convert_to_prob:
        probabilities = special.softmax(probabilities, axis=-1)
    stat = probabilities[np.arange(labels.size), labels]
    if sample_weight is not None:
        stat *= sample_weight
    return stat

In [3]:
# #####################################
# #   2. Data Loading & Preprocessing #
# #####################################
# def load_bank_dataset():
#     """
#     Loads the preprocessed bank dataset and returns:
#       - features X,
#       - labels y,
#       - index of protected attribute,
#       - the BinaryLabelDataset (AIF360),
#       - the original dataframe,
#       - protected attribute name,
#       - label name.
#     """
#     df = pd.read_csv('./data/bank_preprocessed.csv')
#     if 'Unnamed: 0' in df.columns:
#         df = df.drop(columns=['Unnamed: 0'])
#     protected_attribute_name = 'age'
#     label_name = 'y'
    
#     dataset_binary = BinaryLabelDataset(
#         favorable_label=0,
#         unfavorable_label=1,
#         df=df,
#         label_names=[label_name],
#         protected_attribute_names=[protected_attribute_name]
#     )
    
#     X = dataset_binary.features
#     y = dataset_binary.labels.ravel().astype(int)
#     protected_attribute_index = df.columns.get_loc(protected_attribute_name)
#     return X, y, protected_attribute_index, dataset_binary, df, protected_attribute_name, label_name

def load_dataset(dataset):
    dataset_builder =  DatasetBuilder(dataset)
    dataset_binary = dataset_builder.load_data()
    X = dataset_binary.features
    y = dataset_binary.labels.ravel().astype(int)
    
    df = dataset_binary.convert_to_dataframe()[0]
    protected_attribute_name, label_name = dataset_binary.protected_attribute_names[0], dataset_binary.label_names[0]
    protected_attribute_index = df.columns.get_loc(protected_attribute_name)
    
    return X, y, protected_attribute_index, dataset_binary, df, protected_attribute_name, label_name

In [4]:
#####################################
#         3. Model Builders         #
#####################################
def simple_nn_reduced(input_dim):
    """Simplified 2-layer neural network (using Keras Sequential)."""
    model = tf.keras.Sequential([
        tf.keras.layers.Input(shape=(input_dim,)),
        tf.keras.layers.Dense(32, activation='relu'),
        tf.keras.layers.Dense(16, activation='relu'),
        tf.keras.layers.Dense(2)
    ])
    return model


def scikit_learn_model():
    """Returns a scikit-learn DecisionTreeClassifier with max_depth=10."""
    return DecisionTreeClassifier(max_depth=10)

In [5]:
##############################################
# 4. Statistics and Loss Extraction Functions
##############################################
def get_stat_and_loss_tabular(model, x, y, batch_size=256, use_proba: bool = True):
    """
    Compute statistics and losses.
      - If use_proba is True then we assume a scikit-learn model (using predict_proba).
      - Otherwise (e.g. for neural nets) we use model.predict with an optional softmax conversion.
    """
    if use_proba:
        prob = model.predict_proba(x)
    else:
        prob = model.predict(x, batch_size=batch_size)
        if prob.shape[1] > 1:
            prob = special.softmax(prob, axis=-1)
    losses = log_loss(y, prob)
    stats = calculate_statistic(prob, y)
    return np.expand_dims(stats, axis=1), np.expand_dims(losses, axis=1)

In [6]:
############################################
# 5. Accuracy & Metric Calculation Methods #
############################################
def calculate_subpopulation_accuracies(X_combined, y_combined, protected_attribute_index, model):
    results = {}
    if isinstance(X_combined, pd.DataFrame):
        prot_col = X_combined.columns[protected_attribute_index]
        subgroups = {
            'Privileged Favorable': ((X_combined[prot_col] == 1) & (y_combined == 1)),
            'Unprivileged Favorable': ((X_combined[prot_col] == 0) & (y_combined == 1)),
            'Unprivileged Unfavorable': ((X_combined[prot_col] == 0) & (y_combined == 0)),
            'Privileged Unfavorable': ((X_combined[prot_col] == 1) & (y_combined == 0)),
        }
        for group_name, condition in subgroups.items():
            subgroup_indices = np.where(condition)[0]
            X_subgroup = X_combined.iloc[subgroup_indices]
            y_subgroup = np.array(y_combined)[subgroup_indices]
            predictions = model.predict(X_subgroup)
            accuracy = accuracy_score(y_subgroup, predictions)
            results[group_name] = accuracy
    else:
        # If it's a numpy array, assume the protected attribute is at position protected_attribute_index
        subgroups = {
            'Privileged Favorable': ((X_combined[:, protected_attribute_index] == 1) & (y_combined == 1)),
            'Unprivileged Favorable': ((X_combined[:, protected_attribute_index] == 0) & (y_combined == 1)),
            'Unprivileged Unfavorable': ((X_combined[:, protected_attribute_index] == 0) & (y_combined == 0)),
            'Privileged Unfavorable': ((X_combined[:, protected_attribute_index] == 1) & (y_combined == 0)),
        }
        for group_name, condition in subgroups.items():
            subgroup_indices = np.where(condition)[0]
            X_subgroup = X_combined[subgroup_indices]
            y_subgroup = y_combined[subgroup_indices]
            predictions = model.predict(X_subgroup)
            accuracy = accuracy_score(y_subgroup, predictions)
            results[group_name] = accuracy
    return results




def get_metrics(X_test, y_test, y_pred, protected_attribute_index):
    """
    Calculate fairness and performance metrics using AIF360's ClassificationMetric.
    """
    # Create dataframes with features named as "feature_i"
    num_features = X_test.shape[1]
    feature_names = [f'feature_{i}' for i in range(num_features)]
    df_true = pd.DataFrame(X_test, columns=feature_names)
    df_true['label'] = y_test
    df_pred = pd.DataFrame(X_test, columns=feature_names)
    df_pred['label'] = y_pred

    dataset_true = BinaryLabelDataset(
        favorable_label=1,
        unfavorable_label=0,
        df=df_true,
        label_names=['label'],
        protected_attribute_names=[f'feature_{protected_attribute_index}']
    )
    dataset_pred = BinaryLabelDataset(
        favorable_label=1,
        unfavorable_label=0,
        df=df_pred,
        label_names=['label'],
        protected_attribute_names=[f'feature_{protected_attribute_index}']
    )

    classification_metric = ClassificationMetric(
        dataset_true,
        dataset_pred,
        unprivileged_groups=[{f'feature_{protected_attribute_index}': 0}],
        privileged_groups=[{f'feature_{protected_attribute_index}': 1}]
    )
    
    balanced_accuracy = (classification_metric.sensitivity() + classification_metric.specificity()) / 2
    metrics = {
        'balanced_accuracy': balanced_accuracy,
        'average_odds_difference': classification_metric.average_odds_difference(),
        'disparate_impact': (1 - min((classification_metric.disparate_impact()),
                                      1 / classification_metric.disparate_impact())),
        'statistical_parity_difference': classification_metric.statistical_parity_difference(),
        'equal_opportunity_difference': classification_metric.equal_opportunity_difference(),
        'theil_index': classification_metric.theil_index()
    }
    return metrics

def print_mean_accuracies(accuracies_train, accuracies_test, train_subpop, test_subpop):
    """
    Print overall and subpopulation mean accuracies.
    """
    mean_train_overall = np.mean(accuracies_train)
    mean_test_overall = np.mean(accuracies_test)
    print("Mean Train Accuracy (Overall):", mean_train_overall)
    print("Mean Test Accuracy (Overall):", mean_test_overall)
    print('-------------------')
    mean_train_subpop = {key: np.mean([sub[key] for sub in train_subpop])
                         for key in train_subpop[0].keys()}
    mean_test_subpop = {key: np.mean([sub[key] for sub in test_subpop])
                        for key in test_subpop[0].keys()}
    print("Mean Train Accuracy (Subpopulations):")
    for key, value in mean_train_subpop.items():
        print(f"{key}: {value}")
    print("\nMean Test Accuracy (Subpopulations):")
    for key, value in mean_test_subpop.items():
        print(f"{key}: {value}")
    print()


In [7]:
##############################################
# 6. Membership Inference Attack Functions   #
##############################################
def perform_mia(in_indices, stats, losses, num_shadows=5):
    """
    For each model (treated as the target), use the other models as shadows to perform the LiRA attack.
    Returns the AUC scores (one per model) and the overall mean.
    """
    results = []
    for idx in range(num_shadows + 1):
        stat_target = stats[idx]
        in_indices_target = in_indices[idx]
        # Exclude target model from shadow models
        stat_shadow = np.array(stats[:idx] + stats[idx + 1:])
        in_indices_shadow = np.array(in_indices[:idx] + in_indices[idx + 1:])
        # For each example in the target model, gather in/out stats from shadows
        stat_in = [stat_shadow[:, j][in_indices_shadow[:, j]] for j in range(len(stat_target))]
        stat_out = [stat_shadow[:, j][~in_indices_shadow[:, j]] for j in range(len(stat_target))]
        scores = amia.compute_score_lira(stat_target, stat_in, stat_out, fix_variance=True)
        attack_input = AttackInputData(
            loss_train=scores[in_indices_target],
            loss_test=scores[~in_indices_target]
        )
        result_lira = mia.run_attacks(attack_input).single_attack_results[0]
        results.append(result_lira.get_auc())
    return np.round(results, 6), np.round(np.mean(results), 6)


def perform_mia_on_subgroups(X_combined, y_combined, protected_attr,
                             in_indices, stats, losses, num_shadows=5):
    """
    Perform MIA for subgroups. For numpy arrays, protected_attr should be an integer index.
    For DataFrames, it can be a column name.
    """
    results_dict = {}
    # Define subgroup conditions based on type of X_combined
    if isinstance(X_combined, np.ndarray):
        subgroups = {
            'Privileged Favorable': ((X_combined[:, protected_attr] == 1) & (y_combined == 1)),
            'Unprivileged Favorable': ((X_combined[:, protected_attr] == 0) & (y_combined == 1)),
            'Unprivileged Unfavorable': ((X_combined[:, protected_attr] == 0) & (y_combined == 0)),
            'Privileged Unfavorable': ((X_combined[:, protected_attr] == 1) & (y_combined == 0)),
        }
    else:
        # Assuming X_combined is a DataFrame
        subgroups = {
            'Privileged Favorable': ((X_combined[protected_attr] == 1) & (y_combined == 1)),
            'Unprivileged Favorable': ((X_combined[protected_attr] == 0) & (y_combined == 1)),
            'Unprivileged Unfavorable': ((X_combined[protected_attr] == 0) & (y_combined == 0)),
            'Privileged Unfavorable': ((X_combined[protected_attr] == 1) & (y_combined == 0)),
        }
    print("Results for subgroup:\n")
    for group_name, condition in subgroups.items():
        subgroup_indices = np.where(condition)[0]
        subgroup_in_indices = [arr[subgroup_indices] for arr in in_indices]
        subgroup_stat = [arr[subgroup_indices] for arr in stats]
        subgroup_losses = [arr[subgroup_indices] for arr in losses]
        mia_results, mia_mean = perform_mia(subgroup_in_indices, subgroup_stat, subgroup_losses, num_shadows=num_shadows)
        results_dict[group_name] = mia_mean
        print(f"{group_name}: {mia_results}\nMean: {mia_mean}")
        print('---------------------')
    return results_dict

In [8]:
#####################################
# 7. Oversampling/Synthetic Methods #
#####################################
def group_indices(dataset, unprivileged_groups):
    """
    Returns indices of examples in the unprivileged and privileged groups.
    """
    feature_names = dataset.feature_names
    cond_vec = utils.compute_boolean_conditioning_vector(dataset.features, feature_names, unprivileged_groups)
    indices = [i for i, x in enumerate(cond_vec) if x]
    priv_indices = [i for i, x in enumerate(cond_vec) if not x]
    return indices, priv_indices


def balance(dataset, n_extra, inflate_rate, f_label, uf_label):
    """
    Oversample one of the groups using ADASYN and then select extra samples.
    """
    dataset_transf_train = dataset.copy(deepcopy=True)
    f_indices = np.where(dataset.labels == f_label)[0].tolist()
    uf_indices = np.where(dataset.labels == uf_label)[0].tolist()
    f_dataset = dataset.subset(f_indices)
    uf_dataset = dataset.subset(uf_indices)
    
    inflated_uf_features = np.repeat(uf_dataset.features, inflate_rate, axis=0)
    sample_features = np.concatenate((f_dataset.features, inflated_uf_features))
    inflated_uf_labels = np.repeat(uf_dataset.labels, inflate_rate, axis=0)
    sample_labels = np.concatenate((f_dataset.labels, inflated_uf_labels))
    
    oversample = ADASYN(sampling_strategy='minority')
    X, y = oversample.fit_resample(sample_features, sample_labels)
    y = y.reshape(-1, 1)
    # Only keep samples of f_label (favorable)
    X = X[np.where(y == f_label)[0].tolist()]
    y = y[y == f_label]
    selected = int(f_dataset.features.shape[0] + n_extra)
    X = X[:selected, :]
    y = y[:selected]
    y = y.reshape(-1, 1)
    
    # Set new instance weights and protected attributes for extra samples
    instance_weights_list = (f_dataset.instance_weights.flatten().tolist()
                             if isinstance(f_dataset.instance_weights, np.ndarray)
                             else f_dataset.instance_weights)
    protected_attributes_list = (f_dataset.protected_attributes.flatten().tolist()
                                 if isinstance(f_dataset.protected_attributes, np.ndarray)
                                 else f_dataset.protected_attributes)
    inc = X.shape[0] - f_dataset.features.shape[0]
    new_weights = [random.choice(instance_weights_list) for _ in range(inc)]
    new_attributes = np.array([random.choice(protected_attributes_list) for _ in range(inc)]).reshape(-1, 1)
    
    dataset_transf_train.features = np.concatenate((uf_dataset.features, X))
    dataset_transf_train.labels = np.concatenate((uf_dataset.labels, y))
    dataset_transf_train.instance_weights = np.concatenate((uf_dataset.instance_weights, f_dataset.instance_weights, new_weights))
    dataset_transf_train.protected_attributes = np.concatenate((uf_dataset.protected_attributes, f_dataset.protected_attributes, new_attributes))
    
    # Also create an extra dataset with just the new samples
    dataset_extra_train = dataset.copy()
    X_ex = X[-int(n_extra):]
    y_ex = y[-int(n_extra):].reshape(-1, 1)
    new_weights = [random.choice(instance_weights_list) for _ in range(int(n_extra))]
    new_attributes = np.array([random.choice(protected_attributes_list) for _ in range(int(n_extra))]).reshape(-1, 1)
    dataset_extra_train.features = X_ex
    dataset_extra_train.labels = y_ex
    dataset_extra_train.instance_weights = new_weights
    dataset_extra_train.protected_attributes = new_attributes
    return dataset_transf_train, dataset_extra_train


def synthetic_balance(dataset, unprivileged_groups, bp, bnp, f_label, uf_label, sampling_strategy=1.0):
    """
    Oversample the unprivileged group so that the number of favorable samples matches that of the privileged group.
    """
    dataset_transf_train = dataset.copy(deepcopy=True)
    indices, priv_indices = group_indices(dataset, unprivileged_groups)
    unprivileged_dataset = dataset.subset(indices)
    privileged_dataset = dataset.subset(priv_indices)
    n_unpriv_favor = np.count_nonzero(unprivileged_dataset.labels == f_label)
    n_unpriv_unfavor = np.count_nonzero(unprivileged_dataset.labels != f_label)
    n_priv_favor = np.count_nonzero(privileged_dataset.labels == f_label)
    
    if n_unpriv_favor < n_priv_favor:
        n_extra_sample = (n_priv_favor - n_unpriv_favor) * sampling_strategy
        if n_extra_sample + n_unpriv_favor >= n_unpriv_unfavor:
            inflate_rate = int(((n_extra_sample + n_unpriv_favor) / n_unpriv_unfavor) + 1)
        else:
            inflate_rate = round(((n_extra_sample + n_unpriv_favor) / n_unpriv_unfavor) + 1)
        _, extra_favored = balance(unprivileged_dataset, n_extra_sample, inflate_rate, f_label, uf_label)
        
        n_extra_sample = (n_extra_sample + n_unpriv_favor - bp * (n_extra_sample + n_unpriv_favor + n_unpriv_unfavor)) / bp
        if n_extra_sample + n_unpriv_unfavor >= n_unpriv_favor:
            inflate_rate = int(((n_extra_sample + n_unpriv_unfavor) / n_unpriv_favor) + 1)
        else:
            inflate_rate = round(((n_extra_sample + n_unpriv_unfavor) / n_unpriv_favor) + 1)
        _, extra_unfavored = balance(unprivileged_dataset, n_extra_sample, inflate_rate, uf_label, f_label)
        
        dataset_transf_train.features = np.concatenate((dataset_transf_train.features, extra_favored.features, extra_unfavored.features))
        dataset_transf_train.labels = np.concatenate((dataset_transf_train.labels, extra_favored.labels, extra_unfavored.labels))
        dataset_transf_train.instance_weights = np.concatenate((dataset_transf_train.instance_weights, extra_favored.instance_weights, extra_unfavored.instance_weights))
        dataset_transf_train.protected_attributes = np.concatenate((dataset_transf_train.protected_attributes, extra_favored.protected_attributes, extra_unfavored.protected_attributes))
    return dataset_transf_train


def synthetic_favor_unpriv(dataset, unprivileged_groups, bp, bnp, f_label, uf_label, sampling_strategy=1.0):
    """
    Oversample favorable examples in the unprivileged group.
    """
    indices, priv_indices = group_indices(dataset, unprivileged_groups)
    unprivileged_dataset = dataset.subset(indices)
    privileged_dataset = dataset.subset(priv_indices)
    n_unpriv_favor = np.count_nonzero(unprivileged_dataset.labels == f_label)
    n_unpriv_unfavor = np.count_nonzero(unprivileged_dataset.labels != f_label)
    n_extra_sample = (bp * len(indices) - n_unpriv_favor) / (1 - bp) * sampling_strategy
    if n_extra_sample + n_unpriv_favor >= n_unpriv_unfavor:
        inflate_rate = int(((n_extra_sample + n_unpriv_favor) / n_unpriv_unfavor) + 1)
    else:
        inflate_rate = round(((n_extra_sample + n_unpriv_favor) / n_unpriv_unfavor) + 1)
    _, extra_favored_unpriv = balance(unprivileged_dataset, n_extra_sample, inflate_rate, f_label, uf_label)
    return unprivileged_dataset, extra_favored_unpriv


def synthetic_unfavor_priv(dataset, unprivileged_groups, bp, bnp, f_label, uf_label, sampling_strategy=1.0):
    """
    Oversample the unfavored examples in the privileged group.
    """
    indices, priv_indices = group_indices(dataset, unprivileged_groups)
    unprivileged_dataset = dataset.subset(indices)
    privileged_dataset = dataset.subset(priv_indices)
    n_priv_favor = np.count_nonzero(privileged_dataset.labels == f_label)
    n_priv_unfavor = np.count_nonzero(privileged_dataset.labels != f_label)
    n_extra_sample = (n_priv_favor - bnp * len(priv_indices)) / bnp * sampling_strategy
    if n_extra_sample + n_priv_unfavor >= n_priv_favor:
        inflate_rate = int(((n_extra_sample + n_priv_unfavor) / n_priv_favor) + 1)
    else:
        inflate_rate = round(((n_extra_sample + n_priv_unfavor) / n_priv_favor) + 1)
    _, extra_unfavored_priv = balance(privileged_dataset, n_extra_sample, inflate_rate, uf_label, f_label)
    return privileged_dataset, extra_unfavored_priv


def synthetic(dataset, unprivileged_groups, bp, bnp, f_label, uf_label, os_mode=2, sampling_strategy=0.5):
    """
    Depending on os_mode, perform one of the following oversampling methods:
      1: Oversample unfavorable privileged.
      2: Oversample favorable unprivileged.
      3: Both.
    If bp < bnp then use synthetic_balance.
    """
    dataset_transf_train = dataset.copy(deepcopy=True)
    if bp < bnp:
        dataset_transf_train = synthetic_balance(dataset, unprivileged_groups, bp, bnp, f_label, uf_label)
        return dataset_transf_train

    if os_mode == 1:
        _, sample_unfavor_priv = synthetic_unfavor_priv(dataset, unprivileged_groups, bp, bnp, f_label, uf_label, sampling_strategy=1.0)
        dataset_transf_train.features = np.concatenate((dataset_transf_train.features, sample_unfavor_priv.features))
        dataset_transf_train.labels = np.concatenate((dataset_transf_train.labels, sample_unfavor_priv.labels))
        dataset_transf_train.instance_weights = np.concatenate((dataset_transf_train.instance_weights, sample_unfavor_priv.instance_weights))
        dataset_transf_train.protected_attributes = np.concatenate((dataset_transf_train.protected_attributes, sample_unfavor_priv.protected_attributes))
    elif os_mode == 2:
        _, sample_favor_unpriv = synthetic_favor_unpriv(dataset, unprivileged_groups, bp, bnp, f_label, uf_label, sampling_strategy=1.0)
        dataset_transf_train.features = np.concatenate((dataset_transf_train.features, sample_favor_unpriv.features))
        dataset_transf_train.labels = np.concatenate((dataset_transf_train.labels, sample_favor_unpriv.labels))
        dataset_transf_train.instance_weights = np.concatenate((dataset_transf_train.instance_weights, sample_favor_unpriv.instance_weights))
        dataset_transf_train.protected_attributes = np.concatenate((dataset_transf_train.protected_attributes, sample_favor_unpriv.protected_attributes))
    elif os_mode == 3:
        _, sample_unfavor_priv = synthetic_unfavor_priv(dataset, unprivileged_groups, bp, bnp, f_label, uf_label, sampling_strategy=1.0)
        dataset_transf_train.features = np.concatenate((dataset_transf_train.features, sample_unfavor_priv.features))
        dataset_transf_train.labels = np.concatenate((dataset_transf_train.labels, sample_unfavor_priv.labels))
        dataset_transf_train.instance_weights = np.concatenate((dataset_transf_train.instance_weights, sample_unfavor_priv.instance_weights))
        dataset_transf_train.protected_attributes = np.concatenate((dataset_transf_train.protected_attributes, sample_unfavor_priv.protected_attributes))
        _, sample_favor_unpriv = synthetic_favor_unpriv(dataset, unprivileged_groups, bp, bnp, f_label, uf_label, sampling_strategy=1.0)
        dataset_transf_train.features = np.concatenate((dataset_transf_train.features, sample_favor_unpriv.features))
        dataset_transf_train.labels = np.concatenate((dataset_transf_train.labels, sample_favor_unpriv.labels))
        dataset_transf_train.instance_weights = np.concatenate((dataset_transf_train.instance_weights, sample_favor_unpriv.instance_weights))
        dataset_transf_train.protected_attributes = np.concatenate((dataset_transf_train.protected_attributes, sample_favor_unpriv.protected_attributes))
    else:
        sys.exit("Oversampling mode is missing: 1, 2, or 3 must be specified.")
    return dataset_transf_train


In [9]:
##############################################
# 8. Training Functions for Different Scenarios
##############################################
def train_shadow_model(X, y, indices, protected_attribute_index, model_builder=scikit_learn_model):
    """
    Train one model on X[indices] and compute predictions, metrics, subpopulation accuracies,
    and obtain the per‐example statistics and losses.
    """
    model = model_builder()
    model.fit(X[indices], y[indices])
    pred_train = model.predict(X[indices])
    pred_test = model.predict(X[~indices])
    metrics = get_metrics(X[~indices], y[~indices], pred_test, protected_attribute_index)
    acc_train = accuracy_score(y[indices], pred_train)
    acc_test = accuracy_score(y[~indices], pred_test)
    subpop_train = calculate_subpopulation_accuracies(X[indices], y[indices], protected_attribute_index, model)
    subpop_test = calculate_subpopulation_accuracies(X[~indices], y[~indices], protected_attribute_index, model)
    stat, loss = get_stat_and_loss_tabular(model, X, y, use_proba=True)
    return {'model': model,
            'accuracy_train': acc_train,
            'accuracy_test': acc_test,
            'metrics': metrics,
            'subpop_train': subpop_train,
            'subpop_test': subpop_test,
            'stat': stat,
            'loss': loss}


def train_models(X, y, protected_attribute_index, num_shadows=5, model_builder=scikit_learn_model):
    """
    Train a collection of (shadow and target) models on the original data arrays.
    Returns lists of random in_indices, statistics, losses, accuracies and subpopulation accuracies.
    """
    n_samples = X.shape[0]
    in_indices_list = []
    stats, losses = [], []
    accuracies_train, accuracies_test = [], []
    subpop_train_list, subpop_test_list = [], []
    all_metrics = []
    
    for i in range(num_shadows + 1):
        indices = np.random.binomial(1, 0.5, n_samples).astype(bool)
        in_indices_list.append(indices)
        result = train_shadow_model(X, y, indices, protected_attribute_index, model_builder)
        stats.append(result['stat'])
        losses.append(result['loss'])
        accuracies_train.append(result['accuracy_train'])
        accuracies_test.append(result['accuracy_test'])
        subpop_train_list.append(result['subpop_train'])
        subpop_test_list.append(result['subpop_test'])
        all_metrics.append(result['metrics'])
        tf.keras.backend.clear_session()
        gc.collect()
    return in_indices_list, stats, losses, subpop_train_list, subpop_test_list, accuracies_train, accuracies_test, all_metrics


def train_models_syn(X, y, dataset_binary, protected_attribute_index, num_shadows=5,
                     model_builder=scikit_learn_model, transform_fn=synthetic):
    """
    Train models when using a transformed (synthetic oversampled) dataset.
    Uses the AIF360 BinaryLabelDataset API.
    """
    n_samples = X.shape[0]
    in_indices_list = []
    stats, losses = [], []
    accuracies_train, accuracies_test = [], []
    subpop_train_list, subpop_test_list = [], []
    all_metrics = []
    
    for i in range(num_shadows + 1):
        indices = np.random.binomial(1, 0.5, n_samples).astype(bool)
        in_indices_list.append(indices)
        
        dataset_train = dataset_binary.subset(indices)
        dataset_val = dataset_binary.subset(~indices)
        # Apply synthetic transformation on training dataset:
        transformed_dataset_train = transform_fn(dataset_train,
                                                 unprivileged_groups,
                                                 base_rate_privileged_private,
                                                 base_rate_unprivileged_private,
                                                 f_label, uf_label, os_mode=2)
        X_train = transformed_dataset_train.features
        y_train = transformed_dataset_train.labels.ravel().astype(int)
        X_test = dataset_val.features
        y_test = dataset_val.labels.ravel().astype(int)
        
        model = model_builder()
        model.fit(X_train, y_train)
        pred_train = model.predict(X_train)
        pred_test = model.predict(X_test)
        met = get_metrics(X_test, y_test, pred_test, protected_attribute_index)
        all_metrics.append(met)
        acc_train = accuracy_score(y_train, pred_train)
        acc_test = accuracy_score(y_test, pred_test)
        accuracies_train.append(acc_train)
        accuracies_test.append(acc_test)
        subpop_train = calculate_subpopulation_accuracies(X[indices], y[indices], protected_attribute_index, model)
        subpop_test = calculate_subpopulation_accuracies(X[~indices], y[~indices], protected_attribute_index, model)
        subpop_train_list.append(subpop_train)
        subpop_test_list.append(subpop_test)
        stat, loss = get_stat_and_loss_tabular(model, X, y, use_proba=True)
        stats.append(stat)
        losses.append(loss)
        print(f"Accuracy scores: {acc_train}, {acc_test}")
        print('-----------------------------')
        tf.keras.backend.clear_session()
        gc.collect()
        
    return in_indices_list, stats, losses, subpop_train_list, subpop_test_list, accuracies_train, accuracies_test, all_metrics


def train_syn_target(X, y, dataset_binary, protected_attribute_index, num_shadows=5,
                     model_builder=scikit_learn_model):
    """
    Train a set of models where one (the target) is trained on a transformed (synthetic) dataset
    and the remaining (shadow models) are trained on the original dataset.
    Then perform MIA on the target model.
    """
    n_samples = X.shape[0]
    overall_results = []
    subgroup_results = {}
    train_accuracies = []
    test_accuracies = []
    subpop_train_list = []
    subpop_test_list = []
    all_metrics = []
    
    subgroups = {
        'Privileged Favorable': ((X[:, protected_attribute_index] == 1) & (y == 1)),
        'Unprivileged Favorable': ((X[:, protected_attribute_index] == 0) & (y == 1)),
        'Unprivileged Unfavorable': ((X[:, protected_attribute_index] == 0) & (y == 0)),
        'Privileged Unfavorable': ((X[:, protected_attribute_index] == 1) & (y == 0)),
    }
    
    for target_idx in range(num_shadows + 1):
        print(f"Training Model #{target_idx} as the Target")
        in_indices_list = []
        stats = []
        losses = []
        
        for i in range(num_shadows + 1):
            indices = np.random.binomial(1, 0.5, n_samples).astype(bool)
            in_indices_list.append(indices)
            train_indices = indices
            val_indices = ~indices
            
            if i == target_idx:
                dataset_train = dataset_binary.subset(train_indices)
                dataset_val = dataset_binary.subset(val_indices)
                transformed_dataset = synthetic(dataset_train,
                                                unprivileged_groups,
                                                base_rate_privileged_private,
                                                base_rate_unprivileged_private,
                                                f_label, uf_label, os_mode=2)
                X_train, y_train = transformed_dataset.features, transformed_dataset.labels.ravel()
                X_val, y_val = dataset_val.features, dataset_val.labels.ravel()
            else:
                X_train, y_train = X[train_indices], y[train_indices]
                X_val, y_val = X[val_indices], y[val_indices]
            
            model = model_builder()
            model.fit(X_train, y_train)
            
            if i == target_idx:
                pred_train = model.predict(X_train)
                pred_test = model.predict(X_val)
                train_accuracies.append(accuracy_score(y_train, pred_train))
                test_accuracies.append(accuracy_score(y_val, pred_test))
                met = get_metrics(X_val, y_val, pred_test, protected_attribute_index)
                all_metrics.append(met)
                subpop_train = calculate_subpopulation_accuracies(X_train, y_train, protected_attribute_index, model)
                subpop_test = calculate_subpopulation_accuracies(X_val, y_val, protected_attribute_index, model)
                subpop_train_list.append(subpop_train)
                subpop_test_list.append(subpop_test)
            
            stat, loss = get_stat_and_loss_tabular(model, X, y, use_proba=True)
            stats.append(stat)
            losses.append(loss)
        
        print(f"Performing MIA for Target Model #{target_idx}")
        stat_target = stats[target_idx]
        in_indices_target = in_indices_list[target_idx]
        stat_shadow = np.array([stats[i] for i in range(num_shadows + 1) if i != target_idx])
        in_indices_shadow = np.array([in_indices_list[i] for i in range(num_shadows + 1) if i != target_idx])
        stat_in = [stat_shadow[:, j][in_indices_shadow[:, j]] for j in range(len(stat_target))]
        stat_out = [stat_shadow[:, j][~in_indices_shadow[:, j]] for j in range(len(stat_target))]
        scores = amia.compute_score_lira(stat_target, stat_in, stat_out, fix_variance=True)
        attack_input = AttackInputData(
            loss_train=scores[in_indices_target],
            loss_test=scores[~in_indices_target]
        )
        result_lira = mia.run_attacks(attack_input).single_attack_results[0]
        overall_results.append(result_lira.get_auc())
        
        # MIA on subgroups
        for group_name, condition in subgroups.items():
            subgroup_indices = np.where(condition)[0]
            subgroup_in_indices = [arr[subgroup_indices] for arr in in_indices_list]
            subgroup_stat = [arr[subgroup_indices] for arr in stats]
            subgroup_stat_target = subgroup_stat[target_idx]
            subgroup_in_indices_target = subgroup_in_indices[target_idx]
            subgroup_stat_shadow = np.array([subgroup_stat[i] for i in range(num_shadows + 1) if i != target_idx])
            subgroup_in_indices_shadow = np.array([subgroup_in_indices[i] for i in range(num_shadows + 1) if i != target_idx])
            subgroup_stat_in = [subgroup_stat_shadow[:, j][subgroup_in_indices_shadow[:, j]] for j in range(len(subgroup_stat_shadow[0]))]
            subgroup_stat_out = [subgroup_stat_shadow[:, j][~subgroup_in_indices_shadow[:, j]] for j in range(len(subgroup_stat_shadow[0]))]
            subgroup_scores = amia.compute_score_lira(subgroup_stat_target, subgroup_stat_in, subgroup_stat_out, fix_variance=True)
            subgroup_attack_input = AttackInputData(
                loss_train=subgroup_scores[subgroup_in_indices_target],
                loss_test=subgroup_scores[~subgroup_in_indices_target]
            )
            subgroup_result = mia.run_attacks(subgroup_attack_input).single_attack_results[0]
            if group_name not in subgroup_results:
                subgroup_results[group_name] = []
            subgroup_results[group_name].append(subgroup_result.get_auc())
            
    overall_mean = np.round(np.mean(overall_results), 6)
    subgroup_means = {group: np.round(np.mean(vals), 6) for group, vals in subgroup_results.items()}
    
    print("\nOverall MIA Results:")
    print(f"Results: {np.round(overall_results, 6)}\nMean: {overall_mean}")
    print("\nSubgroup MIA Results:")
    for group, results in subgroup_results.items():
        print(f"{group}: Results={np.round(results, 6)}, Mean={subgroup_means[group]}")
    print("\nAccuracy Results:")
    print(f"Mean Train Accuracy (Overall): {np.mean(train_accuracies)}")
    print(f"Mean Test Accuracy (Overall): {np.mean(test_accuracies)}")
    for group in subgroups:
        mean_train_sub = np.mean([acc[group] for acc in subpop_train_list])
        mean_test_sub = np.mean([acc[group] for acc in subpop_test_list])
        print(f"{group}: Mean Train Accuracy = {mean_train_sub}, Mean Test Accuracy = {mean_test_sub}")
    
    return (overall_results, overall_mean, subgroup_results, subgroup_means,
            train_accuracies, test_accuracies, subpop_train_list, subpop_test_list, all_metrics)


def train_rew_target(X, y, dataset_binary, protected_attribute_index, num_shadows=5,
                     model_builder=scikit_learn_model):
    """
    Train one target model on a reweighted dataset (using AIF360 Reweighing) and shadow models on the original data.
    Then perform MIA and compute accuracies.
    """
    n_samples = X.shape[0]
    overall_results = []
    subgroup_results = {}
    train_accuracies = []
    test_accuracies = []
    subpop_train_list = []
    subpop_test_list = []
    all_metrics = []
    
    RW = Reweighing(unprivileged_groups=unprivileged_groups,
                    privileged_groups=privileged_groups)
    
    subgroups = {
        'Privileged Favorable': ((X[:, protected_attribute_index] == 1) & (y == 1)),
        'Unprivileged Favorable': ((X[:, protected_attribute_index] == 0) & (y == 1)),
        'Unprivileged Unfavorable': ((X[:, protected_attribute_index] == 0) & (y == 0)),
        'Privileged Unfavorable': ((X[:, protected_attribute_index] == 1) & (y == 0)),
    }
    
    for target_idx in range(num_shadows + 1):
        print(f"Training Model #{target_idx} as the Target")
        in_indices_list = []
        stats = []
        losses = []
        for i in range(num_shadows + 1):
            indices = np.random.binomial(1, 0.5, n_samples).astype(bool)
            in_indices_list.append(indices)
            train_indices = indices
            val_indices = ~indices
            if i == target_idx:
                dataset_train = dataset_binary.subset(train_indices)
                dataset_val = dataset_binary.subset(val_indices)
                reweighted_dataset = RW.fit_transform(dataset_train)
                X_train = reweighted_dataset.features
                y_train = reweighted_dataset.labels.ravel().astype(int)
                X_val = dataset_val.features
                y_val = dataset_val.labels.ravel().astype(int)
            else:
                X_train, y_train = X[train_indices], y[train_indices]
                X_val, y_val = X[val_indices], y[val_indices]
            
            model = model_builder()
            if i == target_idx:
                model.fit(X_train, y_train, sample_weight=reweighted_dataset.instance_weights)
            else:
                model.fit(X_train, y_train)
            
            if i == target_idx:
                pred_train = model.predict(X_train)
                pred_test = model.predict(X_val)
                train_accuracies.append(accuracy_score(y_train, pred_train))
                test_accuracies.append(accuracy_score(y_val, pred_test))
                met = get_metrics(X_val, y_val, pred_test, protected_attribute_index)
                all_metrics.append(met)
                subpop_train = calculate_subpopulation_accuracies(X_train, y_train, protected_attribute_index, model)
                subpop_test = calculate_subpopulation_accuracies(X_val, y_val, protected_attribute_index, model)
                subpop_train_list.append(subpop_train)
                subpop_test_list.append(subpop_test)
            stat, loss = get_stat_and_loss_tabular(model, X, y, use_proba=True)
            stats.append(stat)
            losses.append(loss)
        print(f"Performing MIA for Target Model #{target_idx}")
        stat_target = stats[target_idx]
        in_indices_target = in_indices_list[target_idx]
        stat_shadow = np.array([stats[i] for i in range(num_shadows + 1) if i != target_idx])
        in_indices_shadow = np.array([in_indices_list[i] for i in range(num_shadows + 1) if i != target_idx])
        stat_in = [stat_shadow[:, j][in_indices_shadow[:, j]] for j in range(len(stat_target))]
        stat_out = [stat_shadow[:, j][~in_indices_shadow[:, j]] for j in range(len(stat_target))]
        scores = amia.compute_score_lira(stat_target, stat_in, stat_out, fix_variance=True)
        attack_input = AttackInputData(
            loss_train=scores[in_indices_target],
            loss_test=scores[~in_indices_target]
        )
        result_lira = mia.run_attacks(attack_input).single_attack_results[0]
        overall_results.append(result_lira.get_auc())
        for group_name, condition in subgroups.items():
            subgroup_indices = np.where(condition)[0]
            subgroup_in_indices = [arr[subgroup_indices] for arr in in_indices_list]
            subgroup_stat = [arr[subgroup_indices] for arr in stats]
            subgroup_stat_target = subgroup_stat[target_idx]
            subgroup_in_indices_target = subgroup_in_indices[target_idx]
            subgroup_stat_shadow = np.array([subgroup_stat[i] for i in range(num_shadows + 1) if i != target_idx])
            subgroup_in_indices_shadow = np.array([subgroup_in_indices[i] for i in range(num_shadows + 1) if i != target_idx])
            subgroup_stat_in = [subgroup_stat_shadow[:, j][subgroup_in_indices_shadow[:, j]] for j in range(len(subgroup_stat_shadow[0]))]
            subgroup_stat_out = [subgroup_stat_shadow[:, j][~subgroup_in_indices_shadow[:, j]] for j in range(len(subgroup_stat_shadow[0]))]
            subgroup_scores = amia.compute_score_lira(subgroup_stat_target, subgroup_stat_in, subgroup_stat_out, fix_variance=True)
            subgroup_attack_input = AttackInputData(
                loss_train=subgroup_scores[subgroup_in_indices_target],
                loss_test=subgroup_scores[~subgroup_in_indices_target]
            )
            subgroup_result = mia.run_attacks(subgroup_attack_input).single_attack_results[0]
            if group_name not in subgroup_results:
                subgroup_results[group_name] = []
            subgroup_results[group_name].append(subgroup_result.get_auc())
    overall_mean = np.round(np.mean(overall_results), 6)
    subgroup_means = {group: np.round(np.mean(vals), 6) for group, vals in subgroup_results.items()}
    print("\nOverall MIA Results:")
    print(f"Results: {np.round(overall_results, 6)}\nMean: {overall_mean}")
    print("\nSubgroup MIA Results:")
    for group, results in subgroup_results.items():
        print(f"{group}: Results={np.round(results, 6)}, Mean={subgroup_means[group]}")
    print("\nAccuracy Results:")
    print(f"Mean Train Accuracy (Overall): {np.mean(train_accuracies)}")
    print(f"Mean Test Accuracy (Overall): {np.mean(test_accuracies)}")
    for group in subgroups:
        mean_train_sub = np.mean([acc[group] for acc in subpop_train_list])
        mean_test_sub = np.mean([acc[group] for acc in subpop_test_list])
        print(f"{group}: Mean Train Accuracy = {mean_train_sub}, Mean Test Accuracy = {mean_test_sub}")
    return overall_results, overall_mean, subgroup_results, subgroup_means, train_accuracies, test_accuracies, subpop_train_list, subpop_test_list, all_metrics


def train_models_eg(X, y, dataset_binary, protected_attribute_index, num_shadows=5,
                    model_builder=scikit_learn_model):
    """
    Train models using in-processing mitigation with ExponentiatedGradientReduction.
    Here X is a DataFrame and y is a NumPy array.
    """
    n_samples = X.shape[0]
    in_indices_list = []
    stats, losses = [], []
    accuracies_train, accuracies_test = [], []
    subpop_train_list, subpop_test_list = [], []
    all_metrics = []
    
    for i in range(num_shadows + 1):
        indices = np.random.binomial(1, 0.5, n_samples).astype(bool)
        in_indices_list.append(indices)
        train_X, train_y = X.iloc[indices], y[indices]
        test_X, test_y = X.iloc[~indices], y[~indices]
        
        model = scikit_learn_model()
        constraint = EqualizedOdds(difference_bound=0.001)
        mitigator = ExponentiatedGradientReduction(prot_attr=protected_attribute_name,
                                                   estimator=model,
                                                   constraints=constraint)
        mitigator.fit(X.iloc[indices], y[indices])
        pred_train = mitigator.predict(X.iloc[indices])
        pred_test = mitigator.predict(X.iloc[~indices])
        met = get_metrics(test_X.to_numpy(), test_y, pred_test, protected_attribute_index)
        all_metrics.append(met)
        acc_train = accuracy_score(y[indices], pred_train)
        acc_test = accuracy_score(y[~indices], pred_test)
        accuracies_train.append(acc_train)
        accuracies_test.append(acc_test)
        subpop_train = calculate_subpopulation_accuracies(X.iloc[indices], y[indices],
                                                          protected_attribute_index, mitigator)
        subpop_test = calculate_subpopulation_accuracies(X.iloc[~indices], y[~indices],
                                                         protected_attribute_index, mitigator)
        subpop_train_list.append(subpop_train)
        subpop_test_list.append(subpop_test)
        print(f"Accuracy scores: {acc_train}, {acc_test}")
        print('-----------------------------')
        # Pass X as DataFrame instead of converting to numpy array:
        stat, loss = get_stat_and_loss_tabular(mitigator, X, y, use_proba=True)
        stats.append(stat)
        losses.append(loss)
        tf.keras.backend.clear_session()
        gc.collect()
    return in_indices_list, stats, losses, subpop_train_list, subpop_test_list, accuracies_train, accuracies_test, all_metrics


In [10]:
def average_dicts(dict_list):
    """Given a list of dictionaries, return a Series with the average for each key."""
    return pd.DataFrame(dict_list).mean()

def save_summary_tables(dataset):
    """
    Merges and saves three summary tables as CSV files:
      1. Merged Accuracies: Combines overall (mean train and test) and subpopulation accuracies.
      2. Merged MIA: Combines the overall MIA score with subpopulation MIA scores.
      3. Merged Fairness: A table of average fairness metrics per mitigator.

    The function assumes that the following global variables exist:
    
      -- ACCURACIES --
      accuracies_train_orig, accuracies_test_orig,
      accuracies_train_syn, accuracies_test_syn,
      accuracies_train_syn_target, accuracies_test_syn_target,
      accuracies_train_dir, accuracies_test_dir,
      accuracies_train_rew, accuracies_test_rew,
      accuracies_train_egr, accuracies_test_egr,
      
      train_subpop_orig, test_subpop_orig,
      train_subpop_syn, test_subpop_syn,
      train_subpop_syn_target, test_subpop_syn_target,
      train_subpop_dir, test_subpop_dir,
      train_subpop_rew, test_subpop_rew,
      train_subpop_egr, test_subpop_egr,
      
      -- MIA --
      mia_orig, mia_syn, mia_syn_target, mia_dir, mia_rew, mia_egr,
      results_mia_subpop_orig, results_mia_subpop_syn, results_mia_subpop_syn_target,
      results_mia_subpop_dir, results_mia_subpop_rew, results_mia_subpop_egr,
      
      -- FAIRNESS --
      all_metrics_orig, all_metrics_syn, all_metrics_syn_target,
      all_metrics_dir, all_metrics_rew, all_metrics_egr.
    """
    ### 1. Merged Accuracies Table ###
    # Overall accuracies (mean of each experiment)
    overall_acc = {
        "orig": [np.mean(accuracies_train_orig), np.mean(accuracies_test_orig)],
        "syn": [np.mean(accuracies_train_syn), np.mean(accuracies_test_syn)],
        "syn_target": [np.mean(train_accuracies_syn_target), np.mean(test_accuracies_syn_target)],
        "dir": [np.mean(accuracies_train_dir), np.mean(accuracies_test_dir)],
        "rew": [np.mean(train_accuracies_rew), np.mean(test_accuracies_rew)],
        "egr": [np.mean(accuracies_train_egr), np.mean(accuracies_test_egr)]
    }

    overall_acc_df = pd.DataFrame(overall_acc, index=["Overall Train Accuracy", "Overall Test Accuracy"])
    
    # Average train subpopulation accuracies per experiment
    train_subpop_agg = {
        "orig": average_dicts(train_subpop_orig),
        "syn": average_dicts(train_subpop_syn),
        "syn_target": average_dicts(train_subpop_syn_target),
        "dir": average_dicts(train_subpop_dir),
        "rew": average_dicts(train_subpop_rew),
        "egr": average_dicts(train_subpop_egr)
    }
    train_subpop_df = pd.DataFrame(train_subpop_agg)
    # Prefix row labels with "Train: "
    train_subpop_df.index = ["Train: " + str(idx) for idx in train_subpop_df.index]
    
    # Average test subpopulation accuracies per experiment
    test_subpop_agg = {
        "orig": average_dicts(test_subpop_orig),
        "syn": average_dicts(test_subpop_syn),
        "syn_target": average_dicts(test_subpop_syn_target),
        "dir": average_dicts(test_subpop_dir),
        "rew": average_dicts(test_subpop_rew),
        "egr": average_dicts(test_subpop_egr)
    }
    test_subpop_df = pd.DataFrame(test_subpop_agg)
    test_subpop_df.index = ["Test: " + str(idx) for idx in test_subpop_df.index]
    
    # Merge overall accuracies, train subpopulation, and test subpopulation vertically
    accuracies_df = pd.concat([overall_acc_df, train_subpop_df, test_subpop_df], axis=0)
    accuracies_df.index.name = "Accuracy Metric"
    accuracies_df.to_csv(f"new_results/lira_train_test_accuracies/lira_{dataset}_train_test_accuracies.csv")
    print(f"Saved {dataset}_train_test_accuracies.csv")
    
    ### 2. Merged MIA Table ###
    # Overall MIA scores
    mia_overall = {
        "orig": mia_orig,
        "syn": mia_syn,
        "syn_target": mia_syn_target,
        "dir": mia_dir,
        "rew": mia_rew,
        "egr": mia_egr
    }
    mia_overall_df = pd.DataFrame(mia_overall, index=["Overall MIA"])
    
    # Subpopulation MIA scores (assumed to be dictionaries)
    mia_subpop_dict = {
        "orig": results_mia_subpop_orig,
        "syn": results_mia_subpop_syn,
        "syn_target": subgroup_means_syn_target,
        "dir": results_mia_subpop_dir,
        "rew": subgroup_means_rew,
        "egr": results_mia_subpop_egr
    }
    mia_subpop_df = pd.DataFrame(mia_subpop_dict)
    mia_subpop_df.index.name = "Subpopulation"
    
    # Merge overall and subpopulation MIA vertically
    mia_df = pd.concat([mia_overall_df, mia_subpop_df], axis=0)
    mia_df.to_csv(f"new_results/lira_mia_results/lira_{dataset}_mia.csv")
    print(f"Saved {dataset}_mia.csv")
    
    ### 3. Merged Fairness Table ###
    # For each experiment, average the fairness metrics (list of dictionaries) into one Series.
    fairness_agg = {
        "orig": average_dicts(all_metrics_orig),
        "syn": average_dicts(all_metrics_syn),
        "syn_target": average_dicts(all_metrics_syn_target),
        "dir": average_dicts(all_metrics_dir),
        "rew": average_dicts(all_metrics_rew),
        "egr": average_dicts(all_metrics_egr)
    }
    fairness_df = pd.DataFrame(fairness_agg)
    fairness_df.index.name = "Fairness Metric"
    fairness_df.to_csv(f"new_results/lira_fairness/lira_{dataset}_fairness.csv")
    print(f"Saved {dataset}_train_test_accuracies.csv")

In [11]:
if __name__ == '__main__':
    # Load data and define global variables
    # choices=['bank', 'compas_sex', 'compas_race', 'german_age', 'german_sex', 'meps19', 'grade', 'law_sex', 'law_race', 'law_gender_aif', 'law_race_aif']
    dataset = 'law_sex' 
    X, y, protected_attribute_index, dataset_binary, df, protected_attribute_name, label_name = load_dataset(dataset)
    
    # Define groups for fairness (for reweighting and synthetic functions)
    privileged_groups = [{protected_attribute_name: 1}]
    unprivileged_groups = [{protected_attribute_name: 0}]
    
    metric_orig = BinaryLabelDatasetMetric(dataset_binary,
                                             unprivileged_groups=unprivileged_groups,
                                             privileged_groups=privileged_groups)
    f_label = dataset_binary.favorable_label
    uf_label = dataset_binary.unfavorable_label
    base_rate_privileged_private = metric_orig.base_rate(privileged=True)
    base_rate_unprivileged_private = metric_orig.base_rate(privileged=False)
    
    print("\n=== Training on Original Data ===")
    (in_indices_orig, stats_orig, losses_orig,
     train_subpop_orig, test_subpop_orig,
     accuracies_train_orig, accuracies_test_orig,
     all_metrics_orig) = train_models(X, y, protected_attribute_index, num_shadows=5)
    print_mean_accuracies(accuracies_train_orig, accuracies_test_orig, train_subpop_orig, test_subpop_orig)
    
    mia_results_orig, mia_orig = perform_mia(in_indices_orig, stats_orig, losses_orig, num_shadows=5)
    print(f"All MIA results on whole population: {mia_results_orig}\nMean: {mia_orig}")
    
    results_mia_subpop_orig = perform_mia_on_subgroups(X, y, protected_attribute_index, in_indices_orig, stats_orig, losses_orig, num_shadows=5)
    print("MIA accuracies on subpopulations:")
    print(results_mia_subpop_orig)
    
    
    print("\n=== Training with Synthetic Oversampling ===")
    (in_indices_syn, stats_syn, losses_syn,
     train_subpop_syn, test_subpop_syn,
     accuracies_train_syn, accuracies_test_syn,
     all_metrics_syn) = train_models_syn(X, y, dataset_binary, protected_attribute_index, num_shadows=5)
    print_mean_accuracies(accuracies_train_syn, accuracies_test_syn, train_subpop_syn, test_subpop_syn)
    
    mia_results_syn, mia_syn = perform_mia(in_indices_syn, stats_syn, losses_syn, num_shadows=5)
    print(f"All MIA results (synthetic): {mia_results_syn}\nMean: {mia_syn}")
    
    results_mia_subpop_syn = perform_mia_on_subgroups(X, y, protected_attribute_index, in_indices_syn, stats_syn, losses_syn, num_shadows=5)
    print("MIA accuracies on subpopulations (synthetic):")
    print(results_mia_subpop_syn)
    print_mean_accuracies(accuracies_train_syn, accuracies_test_syn, train_subpop_syn, test_subpop_syn)

    
    print("\n=== Training with Synthetic Target ===")
    (mia_results_syn_target, mia_syn_target, subgroup_results_syn_target, subgroup_means_syn_target,
     train_accuracies_syn_target, test_accuracies_syn_target, train_subpop_syn_target, test_subpop_syn_target,
     all_metrics_syn_target) = train_syn_target(X, y, dataset_binary, protected_attribute_index, num_shadows=5)

    
    print("\n=== Training with Disparate Impact Remover (DIR) ===")
    # Apply DIR transformation to the original dataset
    DIR = DisparateImpactRemover(repair_level=0.5, sensitive_attribute=protected_attribute_name)
    dataset_dir = DIR.fit_transform(dataset_binary)
    X_dir = dataset_dir.features
    y_dir = dataset_dir.labels.ravel().astype(int)
    (in_indices_dir, stats_dir, losses_dir,
     train_subpop_dir, test_subpop_dir,
     accuracies_train_dir, accuracies_test_dir,
     all_metrics_dir) = train_models(X_dir, y_dir, protected_attribute_index, num_shadows=5)
    print_mean_accuracies(accuracies_train_dir, accuracies_test_dir, train_subpop_dir, test_subpop_dir)
    mia_results_dir, mia_dir = perform_mia(in_indices_dir, stats_dir, losses_dir, num_shadows=5)
    print(f"All MIA results (DIR): {mia_results_dir}\nMean: {mia_dir}")
    results_mia_subpop_dir = perform_mia_on_subgroups(X_dir, y_dir, protected_attribute_index, in_indices_dir, stats_dir, losses_dir, num_shadows=5)
    print("MIA accuracies on subpopulations (DIR):")
    print(results_mia_subpop_dir)
    
    print("\n=== Training with Reweighing ===")
    (mia_results_rew, mia_rew, subgroup_results_rew, subgroup_means_rew,
     train_accuracies_rew, test_accuracies_rew, train_subpop_rew, test_subpop_rew,
     all_metrics_rew) = train_rew_target(X, y, dataset_binary, protected_attribute_index, num_shadows=5)

        
    print("\n=== Training with Inprocessing (Exponentiated Gradient) ===")
    # For the inprocessing example, use a DataFrame for X and a NumPy array for y.
    X_df = df.drop(columns=[label_name])
    y_arr = np.array(df[label_name]).astype(int)
    (in_indices_egr, stats_egr, losses_egr,
     train_subpop_egr, test_subpop_egr,
     accuracies_train_egr, accuracies_test_egr,
     all_metrics_egr) = train_models_eg(X_df, y_arr, dataset_binary, protected_attribute_index, num_shadows=5)
    print_mean_accuracies(accuracies_train_egr, accuracies_test_egr, train_subpop_egr, test_subpop_egr)
    
    mia_results_egr, mia_egr = perform_mia(in_indices_egr, stats_egr, losses_egr, num_shadows=5)
    print(f"All MIA results (EG): {mia_results_egr}\nMean: {mia_egr}")
    results_mia_subpop_egr = perform_mia_on_subgroups(X_df, y_arr, protected_attribute_name, in_indices_egr, stats_egr, losses_egr, num_shadows=5)
    print("MIA accuracies on subpopulations (EG):")
    print(results_mia_subpop_egr)



=== Training on Original Data ===
Mean Train Accuracy (Overall): 0.970526703567522
Mean Test Accuracy (Overall): 0.9377539923371326
-------------------
Mean Train Accuracy (Subpopulations):
Privileged Favorable: 0.9960177242770967
Unprivileged Favorable: 0.994895185293775
Unprivileged Unfavorable: 0.5169558224629642
Privileged Unfavorable: 0.4691864571200759

Mean Test Accuracy (Subpopulations):
Privileged Favorable: 0.9789956774873944
Unprivileged Favorable: 0.9755626750759637
Unprivileged Unfavorable: 0.20301896924920892
Privileged Unfavorable: 0.17297577112271734

All MIA results on whole population: [0.522809 0.525413 0.517595 0.52003  0.520301 0.516789]
Mean: 0.52049
Results for subgroup:

Privileged Favorable: [0.519536 0.518339 0.500272 0.510434 0.507122 0.502825]
Mean: 0.509755
---------------------
Unprivileged Favorable: [0.497821 0.507615 0.516576 0.508788 0.515612 0.508658]
Mean: 0.509178
---------------------
Unprivileged Unfavorable: [0.679282 0.694937 0.609918 0.683561 

Accuracy scores: 0.9711668273866924, 0.9396816263904871
-----------------------------
Accuracy scores: 0.9736816920279453, 0.9372886269204754
-----------------------------
Accuracy scores: 0.9725369694641828, 0.9394260400616333
-----------------------------
Accuracy scores: 0.9708212560386473, 0.9352029096477795
-----------------------------
Accuracy scores: 0.9665807314045641, 0.9405326876513317
-----------------------------
Accuracy scores: 0.9697642967542504, 0.9401684855447061
-----------------------------
Mean Train Accuracy (Overall): 0.9707586288460469
Mean Test Accuracy (Overall): 0.9387167293694022
-------------------
Mean Train Accuracy (Subpopulations):
Privileged Favorable: 0.9980203343137575
Unprivileged Favorable: 0.996846724306609
Unprivileged Unfavorable: 0.4753125548390646
Privileged Unfavorable: 0.4710432864894838

Mean Test Accuracy (Subpopulations):
Privileged Favorable: 0.9808135369429536
Unprivileged Favorable: 0.9787278928812156
Unprivileged Unfavorable: 0.174269

In [12]:
save_summary_tables(dataset)

Saved law_sex_train_test_accuracies.csv
Saved law_sex_mia.csv
Saved law_sex_train_test_accuracies.csv
