# SET UP

In [1]:
import pandas as pd
from aif360.datasets import BinaryLabelDataset
import argparse
import numpy as np
import optuna
from catboost import CatBoostClassifier, CatBoostRegressor
from catboost.metrics import RMSEWithUncertainty
from sklearn.metrics import roc_auc_score, accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from aif360.algorithms.preprocessing import DisparateImpactRemover, LFR, OptimPreproc, Reweighing
from imblearn.over_sampling import ADASYN
import random

In [2]:
privileged_groups = [{'gender': 1}]
unprivileged_groups = [{'gender': 0}]

df = pd.read_csv("./data/law_preprocessed.csv")

# Create a BinaryLabelDataset using the binary labels (gpa_class) and relevant attributes
dataset = BinaryLabelDataset(
    favorable_label=1,  # 1 indicates "high GPA" (favorable outcome)
    unfavorable_label=0,  # 0 indicates "low GPA" (unfavorable outcome)
    df=df,
    label_names=['pass_bar'],  # The newly created binary label
    protected_attribute_names=['gender']  # The protected attribute (e.g., gender)
)

In [3]:
X = dataset.features
y = dataset.labels.ravel()

In [4]:
# feature_names = dataset.feature_names
# race_column_index = feature_names.index('race')

In [5]:
# Split dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=42, stratify=y)
X_val, X_test_us, y_val, y_test_us = train_test_split(X_test, y_test, test_size=0.2, random_state=42, stratify=y_test)
X_test = np.concatenate((X_train, X_test_us), axis=0)
y_test = np.concatenate((y_train, y_test_us), axis=0)
membership = ["in"] * X_train.shape[0] + ["out"] * X_test_us.shape[0]

In [7]:
# test_dataset = np.concatenate((X_test, y_test.reshape(-1,1)), axis=1)

In [8]:
# protected_test = test_dataset[:, race_column_index]

In [9]:
# Create a DataFrame for the test dataset to recreate the BinaryLabelDataset
df_test = pd.DataFrame(X_test, columns=dataset.feature_names)
df_test['pass_bar'] = y_test  # Add the label
# df_test['race'] = protected_test  # Add the protected attribute

# Recreate the BinaryLabelDataset for the test set
test_dataset = BinaryLabelDataset(
    favorable_label=1,
    unfavorable_label=0,
    df=df_test,
    label_names=['pass_bar'],  # Label column name
    protected_attribute_names=['gender']  # Protected attribute column name
)

# Training

In [10]:
# Optuna objective
def objective(trial):
    param = {
        "depth": trial.suggest_int("depth", 1, 10),
        "l2_leaf_reg": trial.suggest_float("l2_leaf_reg", 1, 10, log=True),
        "learning_rate": trial.suggest_float("learning_rate", 0.01, 1, log=True),
        "random_strength": trial.suggest_float("random_strength", 1, 10, log=True),
        "objective": trial.suggest_categorical("objective", ["Logloss", "CrossEntropy"]),
        "iterations": trial.suggest_int("iterations", 1, 1000, log=True),
    }

    param["thread_count"] = 4
    param["random_seed"] = 42

    _X_train, _X_valid, _y_train, _y_valid = train_test_split(
        X_train, y_train, test_size=0.2, stratify=y_train, random_state=np.random.randint(0, 1000)
    )

    clf = CatBoostClassifier(**param)
    clf.fit(_X_train, _y_train, verbose=0)
    _y_pred_test = clf.predict(_X_valid, prediction_type="Probability")[:, 1]
    score = roc_auc_score(_y_valid, _y_pred_test)

    return score

In [11]:
# Optimize using Optuna
study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=15)

[I 2024-10-24 17:49:19,367] A new study created in memory with name: no-name-a43b8a1a-7b7e-4f42-a723-d1dfde5f0325
[I 2024-10-24 17:49:19,690] Trial 0 finished with value: 0.8374922142634693 and parameters: {'depth': 10, 'l2_leaf_reg': 1.0964813663024324, 'learning_rate': 0.03893810788124229, 'random_strength': 1.4094998897031572, 'objective': 'CrossEntropy', 'iterations': 2}. Best is trial 0 with value: 0.8374922142634693.
[I 2024-10-24 17:49:19,834] Trial 1 finished with value: 0.784575969324198 and parameters: {'depth': 4, 'l2_leaf_reg': 6.204742984539694, 'learning_rate': 0.030770977031366394, 'random_strength': 5.145583066235087, 'objective': 'CrossEntropy', 'iterations': 3}. Best is trial 0 with value: 0.8374922142634693.
[I 2024-10-24 17:49:19,996] Trial 2 finished with value: 0.8275872975708503 and parameters: {'depth': 5, 'l2_leaf_reg': 1.8847431443926963, 'learning_rate': 0.04025679682769234, 'random_strength': 1.9037562853120868, 'objective': 'Logloss', 'iterations': 12}. Bes

In [12]:
# Detailed objective to train model with best parameters
def detailed_objective(trial):
    param = {
        "depth": trial.suggest_int("depth", 1, 10),
        "l2_leaf_reg": trial.suggest_float("l2_leaf_reg", 1, 10, log=True),
        "learning_rate": trial.suggest_float("learning_rate", 0.01, 1, log=True),
        "random_strength": trial.suggest_float("random_strength", 1, 10, log=True),
        "objective": trial.suggest_categorical("objective", ["Logloss", "CrossEntropy"]),
        "iterations": trial.suggest_int("iterations", 1, 1000, log=True),
    }

    param["thread_count"] = 4
    param["random_seed"] = 42

    clf = CatBoostClassifier(**param)
    clf.fit(X_train, y_train, verbose=0)

    score_val = clf.predict(X_val, prediction_type="Probability")[:, 1]
    score_test = clf.predict(X_test, prediction_type="Probability")[:, 1]

    return score_val, score_test

In [13]:
# Get scores for validation and test set
y_score_val, y_score_test = detailed_objective(study.best_trial)

# Attack

In [14]:
# Apply log-odds transformation
f_scores = lambda prob, l: (np.log(prob) - np.log(1 - prob)) * (2 * l - 1)
y_score_test = f_scores(y_score_test, y_test)
y_score_val = f_scores(y_score_val, y_val)

In [15]:
# Quantile regression objective
def objective_2(trial):
    param = {
        "depth": trial.suggest_int("depth", 1, 10),
        "l2_leaf_reg": trial.suggest_float("l2_leaf_reg", 1e-4, 1e4, log=True),
        "learning_rate": trial.suggest_float("learning_rate", 1e-4, 1, log=True),
        "iterations": trial.suggest_int("iterations", 1, 1000, log=True),
    }

    param["thread_count"] = 1
    param["objective"] = "RMSEWithUncertainty"
    param["posterior_sampling"] = True
    param["random_seed"] = 42
    eval_metric = RMSEWithUncertainty()

    _X_train, _X_valid, _y_train, _y_valid = train_test_split(
        X_val, y_score_val, test_size=0.2, random_state=np.random.randint(0, 1000), stratify=y_val
    )

    clf = CatBoostRegressor(**param)
    clf.fit(_X_train, _y_train, verbose=0)
    _y_pred_valid = clf.predict(_X_valid, prediction_type="RawFormulaVal")
    score = eval_metric.eval(label=_y_valid.T, approx=_y_pred_valid.T)

    return score

In [16]:
# Optimize quantile regression
study = optuna.create_study(direction="minimize", pruner=optuna.pruners.HyperbandPruner())
study.optimize(objective_2, n_trials=15, n_jobs=30)

[I 2024-10-24 17:49:37,571] A new study created in memory with name: no-name-83eac210-8708-4ae2-b34f-c1f16cef0e29
[I 2024-10-24 17:49:38,443] Trial 4 finished with value: 2.105907469012939 and parameters: {'depth': 10, 'l2_leaf_reg': 4.841842748189172, 'learning_rate': 0.0013172457032780333, 'iterations': 1}. Best is trial 4 with value: 2.105907469012939.
[I 2024-10-24 17:49:38,604] Trial 2 finished with value: 2.1343538020581674 and parameters: {'depth': 4, 'l2_leaf_reg': 0.0005204166454603278, 'learning_rate': 0.00010512938419523253, 'iterations': 6}. Best is trial 4 with value: 2.105907469012939.
[I 2024-10-24 17:49:38,740] Trial 12 finished with value: 1.7644317314254574 and parameters: {'depth': 7, 'l2_leaf_reg': 0.00019754495646021728, 'learning_rate': 0.7136104021750086, 'iterations': 1}. Best is trial 12 with value: 1.7644317314254574.
[I 2024-10-24 17:49:38,748] Trial 6 finished with value: 1.6905593692941983 and parameters: {'depth': 8, 'l2_leaf_reg': 12.555676773181865, 'lea

In [17]:
def detailed_objective2(trial):
    param = {
        "depth": trial.suggest_int("depth", 1, 10),
        "l2_leaf_reg": trial.suggest_float("l2_leaf_reg", 1e-4, 1e4, log=True),
        "learning_rate": trial.suggest_float("learning_rate", 1e-4, 1, log=True),
        "iterations": trial.suggest_int("iterations", 1, 1000, log=True),
    }

    param["thread_count"] = 1
    param["objective"] = "RMSEWithUncertainty"
    param["posterior_sampling"] = True
    param["random_seed"] = 42

    clf = CatBoostRegressor(**param)
    clf.fit(X_val, y_score_val, verbose=0)

    conf_test = clf.predict(X_test, prediction_type="RawFormulaVal")
    return conf_test

In [18]:
# Get confidence values for test set
y_conf = detailed_objective2(study.best_trial)

In [19]:
gaussian_pred = {}

gaussian_pred["score"] = y_score_test
gaussian_pred["mu"] = y_conf[:, 0]
gaussian_pred["log_sigma"] = y_conf[:, 1]
gaussian_pred["membership"] = membership

gaussian_pred = pd.DataFrame(gaussian_pred)

In [20]:
from sklearn.metrics import accuracy_score

# Step 1: Set the threshold as `mu`, and classify based on whether the actual score exceeds `mu`
predicted_membership = ["in" if score > mu else "out" for score, mu in zip(gaussian_pred["score"], gaussian_pred["mu"])]

# Step 2: Calculate the accuracy by comparing the predicted membership with the true membership
true_membership = gaussian_pred["membership"]


true_membership_numeric = [1 if m == "in" else 0 for m in true_membership]
predicted_membership_numeric = [1 if m == "in" else 0 for m in predicted_membership]

qmia_accuracy = accuracy_score(true_membership_numeric, predicted_membership_numeric)


print(f"QMIA Privacy Accuracy: {qmia_accuracy}")

QMIA Privacy Accuracy: 0.6024521195608622


# Subpopulations

In [21]:
def calculate_mia_accuracy_for_subpopulation(group_indices, gaussian_pred):
    sub_gaussian_pred = gaussian_pred.iloc[group_indices]
    predicted_membership = ["in" if score > mu else "out" for score, mu in zip(sub_gaussian_pred["score"], sub_gaussian_pred["mu"])]
    predicted_membership_numeric = [1 if member == "in" else 0 for member in predicted_membership]
    true_membership_numeric = [1 if member == "in" else 0 for member in sub_gaussian_pred["membership"]]
    return accuracy_score(true_membership_numeric, predicted_membership_numeric)

# Calculate subpopulation indices within test set
priv_fav_indices = np.where((test_dataset.protected_attributes.ravel() == 1) & (test_dataset.labels.ravel() == 1))[0]
priv_unfav_indices = np.where((test_dataset.protected_attributes.ravel() == 1) & (test_dataset.labels.ravel() == 0))[0]
unpriv_fav_indices = np.where((test_dataset.protected_attributes.ravel() == 0) & (test_dataset.labels.ravel() == 1))[0]
unpriv_unfav_indices = np.where((test_dataset.protected_attributes.ravel() == 0) & (test_dataset.labels.ravel() == 0))[0]

# Calculate MIA accuracy for each subpopulation within the test set
priv_fav_mia_accuracy = calculate_mia_accuracy_for_subpopulation(priv_fav_indices, gaussian_pred)
priv_unfav_mia_accuracy = calculate_mia_accuracy_for_subpopulation(priv_unfav_indices, gaussian_pred)
unpriv_fav_mia_accuracy = calculate_mia_accuracy_for_subpopulation(unpriv_fav_indices, gaussian_pred)
unpriv_unfav_mia_accuracy = calculate_mia_accuracy_for_subpopulation(unpriv_unfav_indices, gaussian_pred)

# Print MIA accuracy for each subpopulation
print(f"Privileged group with favorable outcome MIA accuracy: {priv_fav_mia_accuracy}")
print(f"Privileged group with unfavorable outcome MIA accuracy: {priv_unfav_mia_accuracy}")
print(f"Unprivileged group with favorable outcome MIA accuracy: {unpriv_fav_mia_accuracy}")
print(f"Unprivileged group with unfavorable outcome MIA accuracy: {unpriv_unfav_mia_accuracy}")


Privileged group with favorable outcome MIA accuracy: 0.6579858497666717
Privileged group with unfavorable outcome MIA accuracy: 0.18543046357615894
Unprivileged group with favorable outcome MIA accuracy: 0.5836531082118189
Unprivileged group with unfavorable outcome MIA accuracy: 0.15217391304347827


# Applying syn

In [22]:
dataset_orig_train, dataset_orig_test = dataset.split([0.5], shuffle=True)
dataset_orig_val = dataset_orig_test
from aif360.metrics import BinaryLabelDatasetMetric
# check fairness on the original data
metric_orig_train = BinaryLabelDatasetMetric(dataset_orig_train, 
                                            unprivileged_groups=unprivileged_groups,
                                            privileged_groups=privileged_groups)

In [23]:
f_label = dataset_orig_train.favorable_label
uf_label = dataset_orig_train.unfavorable_label
base_rate_privileged = metric_orig_train.base_rate(privileged=True)
base_rate_unprivileged = metric_orig_train.base_rate(privileged=False)

In [24]:
from aif360.metrics import utils
# return dataset indices of unprivileged and privileaged groups
def group_indices (dataset, unprivileged_groups):
    feature_names = dataset.feature_names
    cond_vec = utils.compute_boolean_conditioning_vector(dataset.features, feature_names, unprivileged_groups)

    # indices of examples in the unprivileged and privileged groups
    indices = [i for i, x in enumerate(cond_vec) if x == True]
    priv_indices = [i for i, x in enumerate(cond_vec) if x == False]
    return indices, priv_indices

In [25]:
# oversample unprivileged favored so that the absolute number 
# of the unprivileged favored matches with the privilged favored  
def synthetic_balance (dataset, unprivileged_groups, bp, bnp, f_label, uf_label, sampling_strategy=1.00):

    # make a duplicate copy of the input data
    dataset_transf_train = dataset.copy(deepcopy=True)

    # indices of examples in the unprivileged and privileged groups
    indices, priv_indices = group_indices(dataset, unprivileged_groups)

    # subset: unprivileged--unprivileged_dataset and privileged--privileged_dataset 
    unprivileged_dataset = dataset.subset(indices) # unprivileaged
    privileged_dataset = dataset.subset(priv_indices) # privilegaed

    # number of unprivileged/privileged with favorable label
    n_unpriv_favor = np.count_nonzero(unprivileged_dataset.labels==f_label) # unprivileged with favorable label
    n_unpriv_unfavor = np.count_nonzero(unprivileged_dataset.labels!=f_label) # unprivileged with unfavorable label
    n_priv_favor = np.count_nonzero(privileged_dataset.labels==f_label) # privileged with favorable label
    n_priv_unfavor = np.count_nonzero(privileged_dataset.labels!=f_label) # privileged with unfavorable label

    # privileged group has more favored in absolute number than the unprivileged group
    if n_unpriv_favor < n_priv_favor:
        # inflate unprivileged favored class
        n_extra_sample = (n_priv_favor - n_unpriv_favor)*sampling_strategy
        if n_extra_sample + n_unpriv_favor >= n_unpriv_unfavor:
            inflate_rate = int(((n_extra_sample+n_unpriv_favor)/n_unpriv_unfavor)+1)
        else:
            inflate_rate = round(((n_extra_sample+n_unpriv_favor)/n_unpriv_unfavor)+1)
        _, extra_favored  = balance(unprivileged_dataset, n_extra_sample, inflate_rate, f_label, uf_label)

        # inflate unprivileged unfavored class to match the privileged baserate
        n_extra_sample = (n_extra_sample + n_unpriv_favor - bp * (n_extra_sample + n_unpriv_favor + n_unpriv_unfavor))/bp
        if n_extra_sample + n_unpriv_unfavor >= n_unpriv_favor:
            inflate_rate = int(((n_extra_sample+n_unpriv_unfavor)/n_unpriv_favor)+1)
        else:
            inflate_rate = round(((n_extra_sample+n_unpriv_unfavor)/n_unpriv_favor)+1)
        _, extra_unfavored  = balance(unprivileged_dataset, n_extra_sample, inflate_rate, uf_label, f_label)

        dataset_transf_train.features = np.concatenate((dataset_transf_train.features, extra_favored.features, extra_unfavored.features))
        dataset_transf_train.labels = np.concatenate((dataset_transf_train.labels, extra_favored.labels, extra_unfavored.labels))
        dataset_transf_train.instance_weights = np.concatenate((dataset_transf_train.instance_weights, extra_favored.instance_weights, extra_unfavored.instance_weights))
        dataset_transf_train.protected_attributes = np.concatenate((dataset_transf_train.protected_attributes, extra_favored.protected_attributes, extra_unfavored.protected_attributes))

    return dataset_transf_train

In [26]:
# adaptive oversampling for the unprivileged group
def synthetic(dataset, unprivileged_groups, bp, bnp, f_label, uf_label, os_mode=2, sampling_strategy=0.50):

    # make a duplicate copy of the input data
    dataset_transf_train = dataset.copy(deepcopy=True)

    # Case 1: privileged group is not relatively favored but has more favored in absolute number than the unprivileged group
    # if privileged base rate is less than unprivilegeted base rate
    if bp < bnp:
        dataset_transf_train = synthetic_balance(dataset, unprivileged_groups, bp, bnp, f_label, uf_label)
        return dataset_transf_train

    # Case 2: if privileged is favored, i.e. has a higher base rate

    # [Method 1] inflate privileged unfavored class
    if os_mode == 1:
        _, sample_unfavor_priv = synthetic_unfavor_priv (dataset, unprivileged_groups, bp, bnp, f_label, uf_label, sampling_strategy=1.00)
        dataset_transf_train.features = np.concatenate((dataset_transf_train.features, sample_unfavor_priv.features))
        dataset_transf_train.labels = np.concatenate((dataset_transf_train.labels, sample_unfavor_priv.labels))
        dataset_transf_train.instance_weights = np.concatenate((dataset_transf_train.instance_weights, sample_unfavor_priv.instance_weights))
        dataset_transf_train.protected_attributes = np.concatenate((dataset_transf_train.protected_attributes, sample_unfavor_priv.protected_attributes))
    elif os_mode == 2:
    # [Method 2] inflate unprivileged favored class
        _, sample_favor_unpriv = synthetic_favor_unpriv (dataset, unprivileged_groups, bp, bnp, f_label, uf_label, sampling_strategy=1.0)
        dataset_transf_train.features = np.concatenate((dataset_transf_train.features, sample_favor_unpriv.features))
        dataset_transf_train.labels = np.concatenate((dataset_transf_train.labels, sample_favor_unpriv.labels))
        dataset_transf_train.instance_weights = np.concatenate((dataset_transf_train.instance_weights, sample_favor_unpriv.instance_weights))
        dataset_transf_train.protected_attributes = np.concatenate((dataset_transf_train.protected_attributes, sample_favor_unpriv.protected_attributes))
    # [Method 3] combine methods 1 and 2 
    elif os_mode == 3:
        _, sample_unfavor_priv = synthetic_unfavor_priv (dataset, unprivileged_groups, bp, bnp, f_label, uf_label, sampling_strategy=1.00)
        dataset_transf_train.features = np.concatenate((dataset_transf_train.features, sample_unfavor_priv.features))
        dataset_transf_train.labels = np.concatenate((dataset_transf_train.labels, sample_unfavor_priv.labels))
        dataset_transf_train.instance_weights = np.concatenate((dataset_transf_train.instance_weights, sample_unfavor_priv.instance_weights))
        dataset_transf_train.protected_attributes = np.concatenate((dataset_transf_train.protected_attributes, sample_unfavor_priv.protected_attributes))
        _, sample_favor_unpriv = synthetic_favor_unpriv (dataset, unprivileged_groups, bp, bnp, f_label, uf_label, sampling_strategy=1.00)
        dataset_transf_train.features = np.concatenate((dataset_transf_train.features, sample_favor_unpriv.features))
        dataset_transf_train.labels = np.concatenate((dataset_transf_train.labels, sample_favor_unpriv.labels))
        dataset_transf_train.instance_weights = np.concatenate((dataset_transf_train.instance_weights, sample_favor_unpriv.instance_weights))
        dataset_transf_train.protected_attributes = np.concatenate((dataset_transf_train.protected_attributes, sample_favor_unpriv.protected_attributes))
    else:
        sys.exit("Oversampling mode is missing: 1: oversample unfavorable privileged; 2: oversample favorable unprivileged; 3. both")

    return dataset_transf_train



In [27]:
# oversample favorable in the unprivileged group
def synthetic_favor_unpriv (dataset, unprivileged_groups, bp, bnp, f_label, uf_label, sampling_strategy=1.00):

    indices, priv_indices = group_indices (dataset, unprivileged_groups)

    # subset: unprivileged--unprivileged_dataset and privileged--privileged_dataset 
    unprivileged_dataset = dataset.subset(indices) # unprivileaged
    privileged_dataset = dataset.subset(priv_indices) # privilegaed

    n_unpriv_favor = np.count_nonzero(unprivileged_dataset.labels==f_label) # unprivileged with favorable label
    n_unpriv_unfavor = np.count_nonzero(unprivileged_dataset.labels!=f_label) # unprivileged with unfavorable label

    n_extra_sample = (bp * len(indices)-n_unpriv_favor) / (1- bp) * sampling_strategy
    # unprivileged favorable > unprivileged unfavorable
    if n_extra_sample + n_unpriv_favor >= n_unpriv_unfavor:
        inflate_rate = int(((n_extra_sample+n_unpriv_favor)/n_unpriv_unfavor)+1)
    else:
        inflate_rate = round(((n_extra_sample+n_unpriv_favor)/n_unpriv_unfavor)+1)

    dataset_transf_refprivileged_train, extra_favored_unpriv  = balance(unprivileged_dataset, n_extra_sample, inflate_rate, f_label, uf_label)

    return dataset_transf_refprivileged_train, extra_favored_unpriv

In [28]:
def balance(dataset, n_extra, inflate_rate, f_label, uf_label):

    # make a duplicate copy of the input data
    dataset_transf_train = dataset.copy(deepcopy=True)

    # subsets with favorable labels and unfavorable labels
    f_dataset = dataset.subset(np.where(dataset.labels==f_label)[0].tolist())
    uf_dataset = dataset.subset(np.where(dataset.labels==uf_label)[0].tolist())

    # expand the group with uf_label for oversampling purpose
    inflated_uf_features = np.repeat(uf_dataset.features, inflate_rate, axis=0)
    sample_features = np.concatenate((f_dataset.features, inflated_uf_features))
    inflated_uf_labels = np.repeat(uf_dataset.labels, inflate_rate, axis=0)
    sample_labels = np.concatenate((f_dataset.labels, inflated_uf_labels))
    

    # oversampling favorable samples
    # X: inflated dataset with synthetic samples of f_label attached to the end

    # Now apply ADASYN oversampling
    oversample = ADASYN(sampling_strategy='minority')
    X, y = oversample.fit_resample(sample_features, sample_labels)
    y = y.reshape(-1,1)

    # take samples from dataset with only favorable labels
    X = X[np.where(y==f_label)[0].tolist()]  # data with f_label + new samples
    y = y[y==f_label]

    selected = int(f_dataset.features.shape[0]+n_extra)

    X = X[:selected, :]
    y = y[:selected]
    y = y.reshape(-1,1)
    
    # print(f"Type of instance_weights: {type(f_dataset.instance_weights)}")
    # print(f"Shape of instance_weights: {getattr(f_dataset.instance_weights, 'shape', 'N/A')}")
    # print(f"Content of instance_weights: {f_dataset.instance_weights}")

    # print(f"Type of protected_attributes: {type(f_dataset.protected_attributes)}")
    # print(f"Shape of protected_attributes: {getattr(f_dataset.protected_attributes, 'shape', 'N/A')}")
    # print(f"Content of protected_attributes: {f_dataset.protected_attributes}")

    # Convert to lists if necessary
    instance_weights_list = f_dataset.instance_weights.flatten().tolist() if isinstance(f_dataset.instance_weights, np.ndarray) else f_dataset.instance_weights
    protected_attributes_list = f_dataset.protected_attributes.flatten().tolist() if isinstance(f_dataset.protected_attributes, np.ndarray) else f_dataset.protected_attributes

    # set weights and protected_attributes for the newly generated samples
    inc = X.shape[0]-f_dataset.features.shape[0]
    new_weights = [random.choice(instance_weights_list) for _ in range(inc)]
    new_attributes = [random.choice(protected_attributes_list) for _ in range(inc)]
    
    # new_attributes is 1D, reshape it to match the shape (n, 1)
    new_attributes = np.array(new_attributes).reshape(-1, 1)

    # compose transformed dataset
    dataset_transf_train.features = np.concatenate((uf_dataset.features, X))
    dataset_transf_train.labels = np.concatenate((uf_dataset.labels, y))
    dataset_transf_train.instance_weights = np.concatenate((uf_dataset.instance_weights, f_dataset.instance_weights, new_weights))
    dataset_transf_train.protected_attributes = np.concatenate((uf_dataset.protected_attributes, f_dataset.protected_attributes, new_attributes))

    # make a duplicate copy of the input data
    dataset_extra_train = dataset.copy()

    X_ex = X[-int(n_extra):]
    y_ex = y[-int(n_extra):]
    y_ex = y_ex.reshape(-1,1)
    
    # set weights and protected_attributes for the newly generated samples
    inc = int(n_extra)
    new_weights = [random.choice(instance_weights_list) for _ in range(inc)]
    new_attributes = [random.choice(protected_attributes_list) for _ in range(inc)]
    
    # new_attributes is 1D, reshape it to match the shape (n, 1)
    new_attributes = np.array(new_attributes).reshape(-1, 1)

    # compose extra dataset
    dataset_extra_train.features = X_ex
    dataset_extra_train.labels = y_ex
    dataset_extra_train.instance_weights = new_weights
    dataset_extra_train.protected_attributes = new_attributes

    # verifying
    #print(dataset_transf_train.features.shape)
    #print(dataset_transf_train.labels.shape)
    #print(dataset_transf_train.instance_weights.shape)
    #print(dataset_transf_train.protected_attributes.shape)

    # return favor and unfavored oversampling results
    return dataset_transf_train, dataset_extra_train

In [29]:
dataset_transf_train = synthetic(dataset_orig_train, unprivileged_groups, base_rate_privileged, base_rate_unprivileged, f_label, uf_label, os_mode = 2)

In [30]:
X = dataset_transf_train.features
y = dataset_transf_train.labels.ravel()
# Split dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=42, stratify=y)
X_val, X_test_us, y_val, y_test_us = train_test_split(X_test, y_test, test_size=0.2, random_state=42, stratify=y_test)
X_test = np.concatenate((X_train, X_test_us), axis=0)
y_test = np.concatenate((y_train, y_test_us), axis=0)
membership = ["in"] * X_train.shape[0] + ["out"] * X_test_us.shape[0]
# Create a DataFrame for the test dataset to recreate the BinaryLabelDataset
df_test = pd.DataFrame(X_test, columns=dataset.feature_names)
df_test['two_year_recid'] = y_test  # Add the label
# df_test['race'] = protected_test  # Add the protected attribute

# Recreate the BinaryLabelDataset for the test set
test_dataset = BinaryLabelDataset(
    favorable_label=1,
    unfavorable_label=0,
    df=df_test,
    label_names=['two_year_recid'],  # Label column name
    protected_attribute_names=['race']  # Protected attribute column name
)

In [31]:
# Optimize using Optuna
study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=15)

[I 2024-10-24 17:51:00,038] A new study created in memory with name: no-name-8620ec78-9263-421f-9785-02e90b18a9fd
[I 2024-10-24 17:51:02,400] Trial 0 finished with value: 0.8671918767507003 and parameters: {'depth': 2, 'l2_leaf_reg': 6.812345192740989, 'learning_rate': 0.01732099821758437, 'random_strength': 2.610958873921719, 'objective': 'Logloss', 'iterations': 780}. Best is trial 0 with value: 0.8671918767507003.
[I 2024-10-24 17:51:02,496] Trial 1 finished with value: 0.7905112044817928 and parameters: {'depth': 4, 'l2_leaf_reg': 4.985691686101141, 'learning_rate': 0.012561033331743273, 'random_strength': 1.1578588348274919, 'objective': 'Logloss', 'iterations': 1}. Best is trial 0 with value: 0.8671918767507003.
[I 2024-10-24 17:51:07,449] Trial 2 finished with value: 0.8072303921568628 and parameters: {'depth': 9, 'l2_leaf_reg': 1.0681159828568825, 'learning_rate': 0.37487084890893435, 'random_strength': 1.0497411719630407, 'objective': 'Logloss', 'iterations': 259}. Best is tri

In [32]:
# Get scores for validation and test set
y_score_val, y_score_test = detailed_objective(study.best_trial)

In [33]:
# Apply log-odds transformation
f_scores = lambda prob, l: (np.log(prob) - np.log(1 - prob)) * (2 * l - 1)
y_score_test = f_scores(y_score_test, y_test)
y_score_val = f_scores(y_score_val, y_val)

In [34]:
# Optimize quantile regression
study = optuna.create_study(direction="minimize", pruner=optuna.pruners.HyperbandPruner())
study.optimize(objective_2, n_trials=15, n_jobs=30)

[I 2024-10-24 17:51:58,659] A new study created in memory with name: no-name-cc6b9a8e-0d8a-43bd-9863-a570186826a1
[I 2024-10-24 17:51:59,262] Trial 6 finished with value: 1.9649004264810583 and parameters: {'depth': 1, 'l2_leaf_reg': 508.18547254244606, 'learning_rate': 0.00011328445525377983, 'iterations': 5}. Best is trial 6 with value: 1.9649004264810583.
[I 2024-10-24 17:52:00,666] Trial 10 finished with value: 1.7114137418036632 and parameters: {'depth': 6, 'l2_leaf_reg': 273.570791123805, 'learning_rate': 0.3166716725298157, 'iterations': 3}. Best is trial 10 with value: 1.7114137418036632.
[I 2024-10-24 17:52:01,125] Trial 9 finished with value: 1.9468043988274464 and parameters: {'depth': 2, 'l2_leaf_reg': 0.00028840034535457335, 'learning_rate': 0.00019280627405769953, 'iterations': 15}. Best is trial 10 with value: 1.7114137418036632.
[I 2024-10-24 17:52:01,131] Trial 5 finished with value: 1.9362733934175058 and parameters: {'depth': 6, 'l2_leaf_reg': 0.0016978988644780078, 

In [35]:
# Get confidence values for test set
y_conf = detailed_objective2(study.best_trial)

In [36]:
gaussian_pred = {}

gaussian_pred["score"] = y_score_test
gaussian_pred["mu"] = y_conf[:, 0]
gaussian_pred["log_sigma"] = y_conf[:, 1]
gaussian_pred["membership"] = membership

gaussian_pred = pd.DataFrame(gaussian_pred)

In [37]:
from sklearn.metrics import accuracy_score

# Step 1: Set the threshold as `mu`, and classify based on whether the actual score exceeds `mu`
predicted_membership = ["in" if score > mu else "out" for score, mu in zip(gaussian_pred["score"], gaussian_pred["mu"])]

# Step 2: Calculate the accuracy by comparing the predicted membership with the true membership
true_membership = gaussian_pred["membership"]


true_membership_numeric = [1 if m == "in" else 0 for m in true_membership]
predicted_membership_numeric = [1 if m == "in" else 0 for m in predicted_membership]

qmia_accuracy = accuracy_score(true_membership_numeric, predicted_membership_numeric)


print(f"QMIA Privacy Accuracy: {qmia_accuracy}")

QMIA Privacy Accuracy: 0.5884363429222443


In [38]:
def calculate_mia_accuracy_for_subpopulation(group_indices, gaussian_pred):
    sub_gaussian_pred = gaussian_pred.iloc[group_indices]
    predicted_membership = ["in" if score > mu else "out" for score, mu in zip(sub_gaussian_pred["score"], sub_gaussian_pred["mu"])]
    predicted_membership_numeric = [1 if member == "in" else 0 for member in predicted_membership]
    true_membership_numeric = [1 if member == "in" else 0 for member in sub_gaussian_pred["membership"]]
    return accuracy_score(true_membership_numeric, predicted_membership_numeric)

# Calculate subpopulation indices within test set
priv_fav_indices = np.where((test_dataset.protected_attributes.ravel() == 1) & (test_dataset.labels.ravel() == 1))[0]
priv_unfav_indices = np.where((test_dataset.protected_attributes.ravel() == 1) & (test_dataset.labels.ravel() == 0))[0]
unpriv_fav_indices = np.where((test_dataset.protected_attributes.ravel() == 0) & (test_dataset.labels.ravel() == 1))[0]
unpriv_unfav_indices = np.where((test_dataset.protected_attributes.ravel() == 0) & (test_dataset.labels.ravel() == 0))[0]

# Calculate MIA accuracy for each subpopulation within the test set
priv_fav_mia_accuracy = calculate_mia_accuracy_for_subpopulation(priv_fav_indices, gaussian_pred)
priv_unfav_mia_accuracy = calculate_mia_accuracy_for_subpopulation(priv_unfav_indices, gaussian_pred)
unpriv_fav_mia_accuracy = calculate_mia_accuracy_for_subpopulation(unpriv_fav_indices, gaussian_pred)
unpriv_unfav_mia_accuracy = calculate_mia_accuracy_for_subpopulation(unpriv_unfav_indices, gaussian_pred)

# Print MIA accuracy for each subpopulation
print(f"Privileged group with favorable outcome MIA accuracy: {priv_fav_mia_accuracy}")
print(f"Privileged group with unfavorable outcome MIA accuracy: {priv_unfav_mia_accuracy}")
print(f"Unprivileged group with favorable outcome MIA accuracy: {unpriv_fav_mia_accuracy}")
print(f"Unprivileged group with unfavorable outcome MIA accuracy: {unpriv_unfav_mia_accuracy}")


Privileged group with favorable outcome MIA accuracy: 0.4329501915708812
Privileged group with unfavorable outcome MIA accuracy: 0.17142857142857143
Unprivileged group with favorable outcome MIA accuracy: 0.6186507312470514
Unprivileged group with unfavorable outcome MIA accuracy: 0.1678832116788321


# Applying DIR

In [39]:
DIR = DisparateImpactRemover(sensitive_attribute='gender')

In [40]:
dataset_dir_train = DIR.fit_transform(dataset_orig_train)
dataset = dataset_dir_train

In [41]:
# Split dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=42, stratify=y)
X_val, X_test_us, y_val, y_test_us = train_test_split(X_test, y_test, test_size=0.2, random_state=42, stratify=y_test)
X_test = np.concatenate((X_train, X_test_us), axis=0)
y_test = np.concatenate((y_train, y_test_us), axis=0)
membership = ["in"] * X_train.shape[0] + ["out"] * X_test_us.shape[0]

In [42]:
# Create a DataFrame for the test dataset to recreate the BinaryLabelDataset
df_test = pd.DataFrame(X_test, columns=dataset.feature_names)
df_test['pass_bar'] = y_test  # Add the label
# df_test['race'] = protected_test  # Add the protected attribute

# Recreate the BinaryLabelDataset for the test set
test_dataset = BinaryLabelDataset(
    favorable_label=1,
    unfavorable_label=0,
    df=df_test,
    label_names=['pass_bar'],  # Label column name
    protected_attribute_names=['gender']  # Protected attribute column name
)

In [43]:
# Optimize using Optuna
study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=15)

[I 2024-10-24 17:53:28,186] A new study created in memory with name: no-name-07dfdcd0-13ff-46cf-806d-93440d5e2650
[I 2024-10-24 17:53:28,306] Trial 0 finished with value: 0.8589898459383754 and parameters: {'depth': 5, 'l2_leaf_reg': 1.2596145424272962, 'learning_rate': 0.7587504038166499, 'random_strength': 1.5986430301818266, 'objective': 'Logloss', 'iterations': 4}. Best is trial 0 with value: 0.8589898459383754.
[I 2024-10-24 17:53:30,264] Trial 1 finished with value: 0.860451680672269 and parameters: {'depth': 2, 'l2_leaf_reg': 1.1229511724692205, 'learning_rate': 0.530282382973134, 'random_strength': 6.455817200262652, 'objective': 'Logloss', 'iterations': 762}. Best is trial 1 with value: 0.860451680672269.
[I 2024-10-24 17:53:30,547] Trial 2 finished with value: 0.7574404761904762 and parameters: {'depth': 8, 'l2_leaf_reg': 7.316577729033343, 'learning_rate': 0.06825674029869885, 'random_strength': 6.333307587207756, 'objective': 'Logloss', 'iterations': 22}. Best is trial 1 wi

In [44]:
# Get scores for validation and test set
y_score_val, y_score_test = detailed_objective(study.best_trial)

In [45]:
# Apply log-odds transformation
f_scores = lambda prob, l: (np.log(prob) - np.log(1 - prob)) * (2 * l - 1)
y_score_test = f_scores(y_score_test, y_test)
y_score_val = f_scores(y_score_val, y_val)

In [46]:
# Optimize quantile regression
study = optuna.create_study(direction="minimize", pruner=optuna.pruners.HyperbandPruner())
study.optimize(objective_2, n_trials=15, n_jobs=30)

[I 2024-10-24 17:53:42,931] A new study created in memory with name: no-name-17389ecc-811d-401a-93a1-be622a7faf69
[I 2024-10-24 17:53:43,226] Trial 4 finished with value: 1.6260695056013044 and parameters: {'depth': 6, 'l2_leaf_reg': 4.928317947053785, 'learning_rate': 0.26924147988478336, 'iterations': 1}. Best is trial 4 with value: 1.6260695056013044.
[I 2024-10-24 17:53:43,404] Trial 1 finished with value: 695508497750.2505 and parameters: {'depth': 6, 'l2_leaf_reg': 0.06849041839425724, 'learning_rate': 0.8714204319646773, 'iterations': 3}. Best is trial 4 with value: 1.6260695056013044.
[I 2024-10-24 17:53:43,461] Trial 9 finished with value: 1.7223527093228501 and parameters: {'depth': 3, 'l2_leaf_reg': 0.007047905745244669, 'learning_rate': 0.00048788186963738555, 'iterations': 1}. Best is trial 4 with value: 1.6260695056013044.
[I 2024-10-24 17:53:44,120] Trial 6 finished with value: 1.6791248916646007 and parameters: {'depth': 5, 'l2_leaf_reg': 0.000842606810533249, 'learning

In [47]:
# Get confidence values for test set
y_conf = detailed_objective2(study.best_trial)

In [48]:
gaussian_pred = {}

gaussian_pred["score"] = y_score_test
gaussian_pred["mu"] = y_conf[:, 0]
gaussian_pred["log_sigma"] = y_conf[:, 1]
gaussian_pred["membership"] = membership

gaussian_pred = pd.DataFrame(gaussian_pred)

In [49]:
# Step 1: Set the threshold as `mu`, and classify based on whether the actual score exceeds `mu`
predicted_membership = ["in" if score > mu else "out" for score, mu in zip(gaussian_pred["score"], gaussian_pred["mu"])]

# Step 2: Calculate the accuracy by comparing the predicted membership with the true membership
true_membership = gaussian_pred["membership"]


true_membership_numeric = [1 if m == "in" else 0 for m in true_membership]
predicted_membership_numeric = [1 if m == "in" else 0 for m in predicted_membership]

qmia_accuracy = accuracy_score(true_membership_numeric, predicted_membership_numeric)


print(f"QMIA Privacy Accuracy: {qmia_accuracy}")

QMIA Privacy Accuracy: 0.6200512674451724


In [50]:
def calculate_mia_accuracy_for_subpopulation(group_indices, gaussian_pred):
    sub_gaussian_pred = gaussian_pred.iloc[group_indices]
    predicted_membership = ["in" if score > mu else "out" for score, mu in zip(sub_gaussian_pred["score"], sub_gaussian_pred["mu"])]
    predicted_membership_numeric = [1 if member == "in" else 0 for member in predicted_membership]
    true_membership_numeric = [1 if member == "in" else 0 for member in sub_gaussian_pred["membership"]]
    return accuracy_score(true_membership_numeric, predicted_membership_numeric)

# Calculate subpopulation indices within test set
priv_fav_indices = np.where((test_dataset.protected_attributes.ravel() == 1) & (test_dataset.labels.ravel() == 1))[0]
priv_unfav_indices = np.where((test_dataset.protected_attributes.ravel() == 1) & (test_dataset.labels.ravel() == 0))[0]
unpriv_fav_indices = np.where((test_dataset.protected_attributes.ravel() == 0) & (test_dataset.labels.ravel() == 1))[0]
unpriv_unfav_indices = np.where((test_dataset.protected_attributes.ravel() == 0) & (test_dataset.labels.ravel() == 0))[0]

# Calculate MIA accuracy for each subpopulation within the test set
priv_fav_mia_accuracy = calculate_mia_accuracy_for_subpopulation(priv_fav_indices, gaussian_pred)
priv_unfav_mia_accuracy = calculate_mia_accuracy_for_subpopulation(priv_unfav_indices, gaussian_pred)
unpriv_fav_mia_accuracy = calculate_mia_accuracy_for_subpopulation(unpriv_fav_indices, gaussian_pred)
unpriv_unfav_mia_accuracy = calculate_mia_accuracy_for_subpopulation(unpriv_unfav_indices, gaussian_pred)

# Print MIA accuracy for each subpopulation
print(f"Privileged group with favorable outcome MIA accuracy: {priv_fav_mia_accuracy}")
print(f"Privileged group with unfavorable outcome MIA accuracy: {priv_unfav_mia_accuracy}")
print(f"Unprivileged group with favorable outcome MIA accuracy: {unpriv_fav_mia_accuracy}")
print(f"Unprivileged group with unfavorable outcome MIA accuracy: {unpriv_unfav_mia_accuracy}")


Privileged group with favorable outcome MIA accuracy: 0.6246290801186943
Privileged group with unfavorable outcome MIA accuracy: 0.18543046357615894
Unprivileged group with favorable outcome MIA accuracy: 0.6548010768770566
Unprivileged group with unfavorable outcome MIA accuracy: 0.20253164556962025


# Applying Reweighing

In [51]:
RW = Reweighing(unprivileged_groups=unprivileged_groups,
                        privileged_groups=privileged_groups)

dataset_reweigh_train = RW.fit_transform(dataset_orig_train)
dataset = dataset_reweigh_train

In [52]:
# Split dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=42, stratify=y)
X_val, X_test_us, y_val, y_test_us = train_test_split(X_test, y_test, test_size=0.2, random_state=42, stratify=y_test)
X_test = np.concatenate((X_train, X_test_us), axis=0)
y_test = np.concatenate((y_train, y_test_us), axis=0)
membership = ["in"] * X_train.shape[0] + ["out"] * X_test_us.shape[0]

In [53]:
# Create a DataFrame for the test dataset to recreate the BinaryLabelDataset
df_test = pd.DataFrame(X_test, columns=dataset.feature_names)
df_test['pass_bar'] = y_test  # Add the label
# df_test['race'] = protected_test  # Add the protected attribute

# Recreate the BinaryLabelDataset for the test set
test_dataset = BinaryLabelDataset(
    favorable_label=1,
    unfavorable_label=0,
    df=df_test,
    label_names=['pass_bar'],  # Label column name
    protected_attribute_names=['gender']  # Protected attribute column name
)

In [54]:
# Optimize using Optuna
study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=15)

[I 2024-10-24 17:55:09,999] A new study created in memory with name: no-name-1a54a793-25f8-4c93-9cd3-b6cc991a82ef
[I 2024-10-24 17:55:10,119] Trial 0 finished with value: 0.8554796918767507 and parameters: {'depth': 4, 'l2_leaf_reg': 6.490379319662036, 'learning_rate': 0.4480671897016578, 'random_strength': 1.586130635772114, 'objective': 'Logloss', 'iterations': 6}. Best is trial 0 with value: 0.8554796918767507.
[I 2024-10-24 17:55:10,505] Trial 1 finished with value: 0.7733630952380953 and parameters: {'depth': 10, 'l2_leaf_reg': 2.086861066683219, 'learning_rate': 0.26886050121442395, 'random_strength': 6.70966404877289, 'objective': 'CrossEntropy', 'iterations': 8}. Best is trial 0 with value: 0.8554796918767507.
[I 2024-10-24 17:55:10,617] Trial 2 finished with value: 0.790765056022409 and parameters: {'depth': 1, 'l2_leaf_reg': 2.7934175635840566, 'learning_rate': 0.10680198456588656, 'random_strength': 6.683896428266283, 'objective': 'Logloss', 'iterations': 6}. Best is trial 0

In [55]:
# Get scores for validation and test set
y_score_val, y_score_test = detailed_objective(study.best_trial)

In [56]:
# Apply log-odds transformation
f_scores = lambda prob, l: (np.log(prob) - np.log(1 - prob)) * (2 * l - 1)
y_score_test = f_scores(y_score_test, y_test)
y_score_val = f_scores(y_score_val, y_val)

In [None]:
# Optimize quantile regression
study = optuna.create_study(direction="minimize", pruner=optuna.pruners.HyperbandPruner())
study.optimize(objective_2, n_trials=15, n_jobs=30)

[I 2024-10-24 17:55:27,661] A new study created in memory with name: no-name-a08ea94c-a931-4205-868e-344ff397f2e9
[I 2024-10-24 17:55:28,205] Trial 1 finished with value: 2.0328573534836782 and parameters: {'depth': 4, 'l2_leaf_reg': 1346.5976601682673, 'learning_rate': 0.00024337163417264274, 'iterations': 3}. Best is trial 1 with value: 2.0328573534836782.
[I 2024-10-24 17:55:29,249] Trial 7 finished with value: 2.0445144871980627 and parameters: {'depth': 4, 'l2_leaf_reg': 0.002672616240208124, 'learning_rate': 0.002148936323338968, 'iterations': 2}. Best is trial 1 with value: 2.0328573534836782.
[I 2024-10-24 17:55:29,256] Trial 14 finished with value: 2.0384238034747657 and parameters: {'depth': 7, 'l2_leaf_reg': 7.099771821360429, 'learning_rate': 0.00021729931719790475, 'iterations': 8}. Best is trial 1 with value: 2.0328573534836782.
[I 2024-10-24 17:55:29,272] Trial 13 finished with value: 2.0355251686160125 and parameters: {'depth': 2, 'l2_leaf_reg': 0.0002533573494158136, '

In [None]:
# Get confidence values for test set
y_conf = detailed_objective2(study.best_trial)

In [None]:
gaussian_pred = {}

gaussian_pred["score"] = y_score_test
gaussian_pred["mu"] = y_conf[:, 0]
gaussian_pred["log_sigma"] = y_conf[:, 1]
gaussian_pred["membership"] = membership

gaussian_pred = pd.DataFrame(gaussian_pred)

In [None]:
# Step 1: Set the threshold as `mu`, and classify based on whether the actual score exceeds `mu`
predicted_membership = ["in" if score > mu else "out" for score, mu in zip(gaussian_pred["score"], gaussian_pred["mu"])]

# Step 2: Calculate the accuracy by comparing the predicted membership with the true membership
true_membership = gaussian_pred["membership"]


true_membership_numeric = [1 if m == "in" else 0 for m in true_membership]
predicted_membership_numeric = [1 if m == "in" else 0 for m in predicted_membership]

qmia_accuracy = accuracy_score(true_membership_numeric, predicted_membership_numeric)


print(f"QMIA Privacy Accuracy: {qmia_accuracy}")

In [None]:
def calculate_mia_accuracy_for_subpopulation(group_indices, gaussian_pred):
    sub_gaussian_pred = gaussian_pred.iloc[group_indices]
    predicted_membership = ["in" if score > mu else "out" for score, mu in zip(sub_gaussian_pred["score"], sub_gaussian_pred["mu"])]
    predicted_membership_numeric = [1 if member == "in" else 0 for member in predicted_membership]
    true_membership_numeric = [1 if member == "in" else 0 for member in sub_gaussian_pred["membership"]]
    return accuracy_score(true_membership_numeric, predicted_membership_numeric)

# Calculate subpopulation indices within test set
priv_fav_indices = np.where((test_dataset.protected_attributes.ravel() == 1) & (test_dataset.labels.ravel() == 1))[0]
priv_unfav_indices = np.where((test_dataset.protected_attributes.ravel() == 1) & (test_dataset.labels.ravel() == 0))[0]
unpriv_fav_indices = np.where((test_dataset.protected_attributes.ravel() == 0) & (test_dataset.labels.ravel() == 1))[0]
unpriv_unfav_indices = np.where((test_dataset.protected_attributes.ravel() == 0) & (test_dataset.labels.ravel() == 0))[0]

# Calculate MIA accuracy for each subpopulation within the test set
priv_fav_mia_accuracy = calculate_mia_accuracy_for_subpopulation(priv_fav_indices, gaussian_pred)
priv_unfav_mia_accuracy = calculate_mia_accuracy_for_subpopulation(priv_unfav_indices, gaussian_pred)
unpriv_fav_mia_accuracy = calculate_mia_accuracy_for_subpopulation(unpriv_fav_indices, gaussian_pred)
unpriv_unfav_mia_accuracy = calculate_mia_accuracy_for_subpopulation(unpriv_unfav_indices, gaussian_pred)

# Print MIA accuracy for each subpopulation
print(f"Privileged group with favorable outcome MIA accuracy: {priv_fav_mia_accuracy}")
print(f"Privileged group with unfavorable outcome MIA accuracy: {priv_unfav_mia_accuracy}")
print(f"Unprivileged group with favorable outcome MIA accuracy: {unpriv_fav_mia_accuracy}")
print(f"Unprivileged group with unfavorable outcome MIA accuracy: {unpriv_unfav_mia_accuracy}")
