In [1]:
import random
import time
import copy
from collections import Counter
import csv
import scipy


from utils import *
from datasets import *
from mdav import *
from train import *
from models import *
from attacks import *

import numpy as np
import pandas as pd
from pandas.api.types import CategoricalDtype

import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim.lr_scheduler import StepLR
from torch.utils.data import DataLoader, TensorDataset, Subset



from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.pipeline import FeatureUnion
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.metrics import accuracy_score

import os
os.environ['CUDA_LAUNCH_BLOCKING'] = "1"

import xgboost as xgb
from xgboost import XGBClassifier
from sklearn import metrics

%matplotlib inline

In [2]:
import warnings
from sklearn.exceptions import ConvergenceWarning, FitFailedWarning

# Filter out ConvergenceWarning and FitFailedWarning
warnings.filterwarnings("ignore", category=ConvergenceWarning)
warnings.filterwarnings("ignore", category=FitFailedWarning)
warnings.filterwarnings("ignore", category=UserWarning)

# Assuming y_test and y_forget are arrays of class indices
encoder = OneHotEncoder(sparse_output=False, categories="auto")


In [3]:
# def seed_everything(seed=7):
#     np.random.seed(seed)
#     np.random.seed(seed)
#     random.seed(seed)
#     torch.manual_seed(seed)
#     torch.cuda.manual_seed(seed)
#     torch.backends.cudnn.deterministic = True
    
# seed_everything(seed=7)

In [4]:
# Step 1: Get dataset

df=pd.read_csv('data/heart/cardio_train.csv', sep=';')
df.drop(columns=['id'], inplace=True)
df.dropna(inplace=True)

split_ratio = 0.8  # 80% for the first DataFrame, 20% for the second
# Perform the random split
mask = np.random.rand(len(df)) < split_ratio
trainset = df[mask]
testset = df[~mask]
# Reset the index of the new DataFrames if needed
trainset.reset_index(drop=True, inplace=True)
testset.reset_index(drop=True, inplace=True)


X_train = trainset.iloc[:,:-1].values
y_train = trainset.iloc[:,-1].values
X_test = testset.iloc[:,:-1].values
y_test = testset.iloc[:,-1].values

SC = StandardScaler()
X_train = SC.fit_transform(X_train)
X_test = SC.transform(X_test)

counter = Counter(y_train)
for k,v in counter.items():
    per = v / len(y_train) * 100
    print('Class=%s, Count=%d, Percentage=%.2f%%' % (k, v, per))
    
num_features = X_train.shape[-1]
num_classes = len(set(y_train))


device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
initial_model = XGBClassifier(num_classes= num_classes, reg_lambda=5, 
                              learning_rate=0.5, max_depth=7, n_estimators=200, device = device)
n_repeat = 3

Class=0, Count=28071, Percentage=50.06%
Class=1, Count=28003, Percentage=49.94%


In [5]:
# Randomly sample retain and forget sets
forget_ratios = [0.05]
for forget_ratio in forget_ratios:
    idxs = np.arange(len(y_train))
    random.shuffle(idxs)
    m = int(len(y_train)*forget_ratio)
    retain_idxs = idxs[m:]
    forget_idxs = idxs[:m]
    X_retain = X_train[retain_idxs]
    y_retain = y_train[retain_idxs]
    X_forget = X_train[forget_idxs]
    y_forget = y_train[forget_idxs]


    # Step 2: Define and train M on D
    train_accs = []
    test_accs = []
    mia_aucs = []
    mia_advs = []
    runtimes = []

    for r in range(n_repeat):
        model = copy.deepcopy(initial_model)
        t0 = time.time()
        torch.cuda.empty_cache()
        model.fit(X_train, y_train)
        t1 = time.time()
        rt = t1-t0
        runtimes.append(rt)

        # Evaluate the model accuracy, and MIA
        # Accuracy
        train_acc = metrics.accuracy_score(y_train, model.predict(X_train))
        test_acc = metrics.accuracy_score(y_test, model.predict(X_test))
        train_accs.append(100.0*train_acc)
        test_accs.append(100.0*test_acc)
        #MIA

        test_preds = model.predict_proba(X_test)
        forget_preds = model.predict_proba(X_forget)

        # Convert class indices to one-hot encoding
        y_test_one_hot = encoder.fit_transform(y_test.reshape(-1, 1))
        y_forget_one_hot = encoder.transform(y_forget.reshape(-1, 1))

        loss_test = np.array([metrics.log_loss(y_test_one_hot[i], test_preds[i]) for i in range(len(y_test))])
        loss_forget = np.array([metrics.log_loss(y_forget_one_hot[i], forget_preds[i]) for i in range(len(y_forget))])

        attack_result = tf_attack(logits_train = forget_preds, logits_test = test_preds, 
                                  loss_train = loss_forget, loss_test = loss_test, 
                                  train_labels = y_forget, test_labels = y_test)

        auc = attack_result.get_result_with_max_auc().get_auc()
        adv = attack_result.get_result_with_max_attacker_advantage().get_attacker_advantage()
        mia_aucs.append(100.0*auc)
        mia_advs.append(100.0*adv)

    mean_runtime = np.mean(runtimes)
    std_runtime = np.std(runtimes)
    mean_train_acc = np.mean(train_accs)
    std_train_acc = np.std(train_accs)
    mean_test_acc = np.mean(test_accs)
    std_test_acc = np.std(test_accs)
    mean_mia_auc = np.mean(mia_aucs)
    std_mia_auc = np.std(mia_aucs)
    mean_mia_adv = np.mean(mia_advs)
    std_mia_adv = np.std(mia_advs)

    # Print the results
    print('Training M on D time:{:0.2f}(±{:0.2f}) seconds'.format(mean_runtime, std_runtime))
    print('Train accuracy:{:0.2f}(±{:0.2f})%'.format(mean_train_acc, std_train_acc))
    print('Test accuracy:{:0.2f}(±{:0.2f})%'.format(mean_test_acc, std_test_acc))
    print('MIA AUC:{:0.2f}(±{:0.2f})%'.format(mean_mia_auc, std_mia_auc))
    print('MIA Advantage:{:0.2f}(±{:0.2f})%'.format(mean_mia_adv, std_mia_adv))

    # Save to CSV
    csv_file_path = 'results/heart/xgb_m_d_fr={}.csv'.format(forget_ratio)

    with open(csv_file_path, mode='w', newline='') as file:
        writer = csv.writer(file)
        writer.writerow(['Metric', 'Mean', 'Standard Deviation'])
        writer.writerow(['Training Time', mean_runtime, std_runtime])
        writer.writerow(['Train Accuracy', mean_train_acc, std_train_acc])
        writer.writerow(['Test Accuracy', mean_test_acc, std_test_acc])
        writer.writerow(['MIA AUC', mean_mia_auc, std_mia_auc])
        writer.writerow(['MIA Advantage', mean_mia_adv, std_mia_adv])

    ######################################################################################################
    # Step 3: Train M_retain on D_retain
    retain_accs = []
    forget_accs = []
    test_accs = []
    mia_aucs = []
    mia_advs = []
    runtimes = []
    for r in range(n_repeat):
        model_ret = copy.deepcopy(initial_model)
        t0 = time.time()
        torch.cuda.empty_cache()
        model_ret.fit(X_retain, y_retain)
        t1 = time.time()
        rt = t1-t0
        runtimes.append(rt)

        # Evaluate the model accuracy, and MIA
        # Accuracy
        retain_acc = metrics.accuracy_score(y_retain, model_ret.predict(X_retain))
        forget_acc = metrics.accuracy_score(y_forget, model_ret.predict(X_forget))
        test_acc = metrics.accuracy_score(y_test, model_ret.predict(X_test))
        retain_accs.append(100.0*retain_acc)
        forget_accs.append(100.0*forget_acc)
        test_accs.append(100.0*test_acc)
        #MIA

        test_preds = model_ret.predict_proba(X_test)
        forget_preds = model_ret.predict_proba(X_forget)
        loss_test = np.array([metrics.log_loss(y_test_one_hot[i], test_preds[i]) for i in range(len(y_test))])
        loss_forget = np.array([metrics.log_loss(y_forget_one_hot[i], forget_preds[i]) for i in range(len(y_forget))])

        attack_result = tf_attack(logits_train = forget_preds, logits_test = test_preds, 
                                  loss_train = loss_forget, loss_test = loss_test, 
                                  train_labels = y_forget, test_labels = y_test)

        auc = attack_result.get_result_with_max_auc().get_auc()
        adv = attack_result.get_result_with_max_attacker_advantage().get_attacker_advantage()
        mia_aucs.append(100.0*auc)
        mia_advs.append(100.0*adv)


    mean_retrain_runtime = np.mean(runtimes)
    std_retrain_runtime = np.std(runtimes)
    mean_retain_acc = np.mean(retain_accs)
    std_retain_acc = np.std(retain_accs)
    mean_forget_acc = np.mean(forget_accs)
    std_forget_acc = np.std(forget_accs)
    mean_retrain_test_acc = np.mean(test_accs)
    std_retrain_test_acc = np.std(test_accs)
    mean_retrain_mia_auc = np.mean(mia_aucs)
    std_retrain_mia_auc = np.std(mia_aucs)
    mean_retrain_mia_adv = np.mean(mia_advs)
    std_retrain_mia_adv = np.std(mia_advs)

    # Print the results
    print('Retraining M on D_ret time:{:0.2f}(±{:0.2f}) seconds'.format(mean_retrain_runtime, std_retrain_runtime))
    print('Retain accuracy:{:0.2f}(±{:0.2f})%'.format(mean_retain_acc, std_retain_acc))
    print('Forget accuracy:{:0.2f}(±{:0.2f})%'.format(mean_forget_acc, std_forget_acc))
    print('Test accuracy:{:0.2f}(±{:0.2f})%'.format(mean_retrain_test_acc, std_retrain_test_acc))
    print('MIA AUC:{:0.2f}(±{:0.2f})%'.format(mean_retrain_mia_auc, std_retrain_mia_auc))
    print('MIA Advantage:{:0.2f}(±{:0.2f})%'.format(mean_retrain_mia_adv, std_retrain_mia_adv))

    # Save to CSV
    csv_retrain_file_path = 'results/heart/xgb_mret_dret_fr={}.csv'.format(forget_ratio)

    with open(csv_retrain_file_path, mode='w', newline='') as file:
        writer = csv.writer(file)
        writer.writerow(['Metric', 'Mean', 'Standard Deviation'])
        writer.writerow(['Retraining Time', mean_retrain_runtime, std_retrain_runtime])
        writer.writerow(['Retain Accuracy', mean_retain_acc, std_retain_acc])
        writer.writerow(['Forget Accuracy', mean_forget_acc, std_forget_acc])
        writer.writerow(['Test Accuracy', mean_retrain_test_acc, std_retrain_test_acc])
        writer.writerow(['MIA AUC', mean_retrain_mia_auc, std_retrain_mia_auc])
        writer.writerow(['MIA Advantage', mean_retrain_mia_adv, std_retrain_mia_adv])
        
######################################################################################################
    # Step 1: k-anonymize and prepare D_k
    ft_epochs_list = [5]
    for ft_epochs in ft_epochs_list:
        K = [3, 5, 10, 20, 80, 160, 320, 640]
        for k in K:
            runtimes_k = []
            t0 = time.time()
            centroids, clusters, labels, X_train_k, y_train_k = mdav(copy.deepcopy(X_train), copy.deepcopy(y_train), k)
            print_stats(clusters, centroids)
            print('Shape of X_train_k:{}, y_train_k:{}'.format(X_train_k.shape, y_train_k.shape))
             # Create TensorDatasets
            t1 = time.time()
            rt_k = t1- t0
            runtimes_k.append(rt_k)

            train_accs_k = []
            test_accs_k = []
            mia_aucs_k = []
            mia_advs_k = []
            runtimes_train_k = []

            train_accs_k_D = []
            test_accs_k_D = []
            mia_aucs_k_D = []
            mia_advs_k_D = []
            runtimes_train_k_D = []

            retain_accs_k_ret = []
            forget_accs_k_ret = []
            test_accs_k_ret = []
            mia_aucs_k_ret = []
            mia_advs_k_ret = []
            runtimes_train_k_ret = []

            for r in range(n_repeat):
                model_k = copy.deepcopy(initial_model)
                t0 = time.time()
                torch.cuda.empty_cache()
                model_k.fit(X_train_k, y_train_k)
                t1 = time.time()
                rt_train = t1- t0
                runtimes_train_k.append(rt_train)

                # Evaluate the model accuracy, and MIA
                # Accuracy
                train_acc = metrics.accuracy_score(y_train_k, model_k.predict(X_train_k))
                test_acc = metrics.accuracy_score(y_test, model_k.predict(X_test))
                train_accs_k.append(100.0*train_acc)
                test_accs_k.append(100.0*test_acc)
                #MIA

                test_preds = model_k.predict_proba(X_test)
                forget_preds = model_k.predict_proba(X_forget)

                # Convert class indices to one-hot encoding
                y_test_one_hot = encoder.fit_transform(y_test.reshape(-1, 1))
                y_forget_one_hot = encoder.transform(y_forget.reshape(-1, 1))

                loss_test = np.array([metrics.log_loss(y_test_one_hot[i], test_preds[i]) for i in range(len(y_test))])
                loss_forget = np.array([metrics.log_loss(y_forget_one_hot[i], forget_preds[i]) for i in range(len(y_forget))])

                attack_result = tf_attack(logits_train = forget_preds, logits_test = test_preds, 
                                          loss_train = loss_forget, loss_test = loss_test, 
                                          train_labels = y_forget, test_labels = y_test)

                auc = attack_result.get_result_with_max_auc().get_auc()
                adv = attack_result.get_result_with_max_attacker_advantage().get_attacker_advantage()
                mia_aucs_k.append(100.0*auc)
                mia_advs_k.append(100.0*adv)

                model_k_D = copy.deepcopy(initial_model)
                model_k_D.set_params(learning_rate = 0.5, n_estimators=ft_epochs)
                t0 = time.time()
                torch.cuda.empty_cache()
                model_k_D.fit(X_train, y_train, xgb_model=model_k)
                t1 = time.time()
                rt = t1-t0
                runtimes_train_k_D.append(rt)

                # Evaluate the model accuracy, and MIA
                # Accuracy
                train_acc = metrics.accuracy_score(y_train, model_k_D.predict(X_train))
                test_acc = metrics.accuracy_score(y_test, model_k_D.predict(X_test))
                train_accs_k_D.append(100.0*train_acc)
                test_accs_k_D.append(100.0*test_acc)
                #MIA

                test_preds = model_k_D.predict_proba(X_test)
                forget_preds = model_k_D.predict_proba(X_forget)

                # Convert class indices to one-hot encoding
                y_test_one_hot = encoder.fit_transform(y_test.reshape(-1, 1))
                y_forget_one_hot = encoder.transform(y_forget.reshape(-1, 1))

                loss_test = np.array([metrics.log_loss(y_test_one_hot[i], test_preds[i]) for i in range(len(y_test))])
                loss_forget = np.array([metrics.log_loss(y_forget_one_hot[i], forget_preds[i]) for i in range(len(y_forget))])

                attack_result = tf_attack(logits_train = forget_preds, logits_test = test_preds, 
                                          loss_train = loss_forget, loss_test = loss_test, 
                                          train_labels = y_forget, test_labels = y_test)

                auc = attack_result.get_result_with_max_auc().get_auc()
                adv = attack_result.get_result_with_max_attacker_advantage().get_attacker_advantage()
                mia_aucs_k_D.append(100.0*auc)
                mia_advs_k_D.append(100.0*adv)

                model_k_ret = copy.deepcopy(initial_model)
                model_k_ret.set_params(learning_rate = 0.5, n_estimators=ft_epochs)
                t0 = time.time()
                torch.cuda.empty_cache()
                model_k_ret.fit(X_retain, y_retain, xgb_model=model_k)
                t1 = time.time()
                rt = t1-t0
                runtimes_train_k_ret.append(rt)
                # Evaluate the model accuracy, and MIA
                # Accuracy
                retain_acc = metrics.accuracy_score(y_retain, model_k_ret.predict(X_retain))
                forget_acc = metrics.accuracy_score(y_forget, model_k_ret.predict(X_forget))
                test_acc = metrics.accuracy_score(y_test, model_k_ret.predict(X_test))
                retain_accs_k_ret.append(100.0*retain_acc)
                forget_accs_k_ret.append(100.0*forget_acc)
                test_accs_k_ret.append(100.0*test_acc)
                #MIA

                test_preds = model_k_ret.predict_proba(X_test)
                forget_preds = model_k_ret.predict_proba(X_forget)
                loss_test = np.array([metrics.log_loss(y_test_one_hot[i], test_preds[i]) for i in range(len(y_test))])
                loss_forget = np.array([metrics.log_loss(y_forget_one_hot[i], forget_preds[i]) for i in range(len(y_forget))])

                attack_result = tf_attack(logits_train = forget_preds, logits_test = test_preds, 
                                          loss_train = loss_forget, loss_test = loss_test, 
                                          train_labels = y_forget, test_labels = y_test)

                auc = attack_result.get_result_with_max_auc().get_auc()
                adv = attack_result.get_result_with_max_attacker_advantage().get_attacker_advantage()
                mia_aucs_k_ret.append(100.0*auc)
                mia_advs_k_ret.append(100.0*adv)


            # Anonymizing D and training M_k on D_k
            mean_anonymize_time = np.mean(runtimes_k)
            std_anonymize_time = np.std(runtimes_k)
            mean_train_k_time = np.mean(runtimes_train_k)
            std_train_k_time = np.std(runtimes_train_k)
            mean_train_k_acc = np.mean(train_accs_k)
            std_train_k_acc = np.std(train_accs_k)
            mean_test_k_acc = np.mean(test_accs_k)
            std_test_k_acc = np.std(test_accs_k)
            mean_mia_k_auc = np.mean(mia_aucs_k)
            std_mia_k_auc = np.std(mia_aucs_k)
            mean_mia_k_adv = np.mean(mia_advs_k)
            std_mia_k_adv = np.std(mia_advs_k)

            # Finetuning M_k on D
            mean_finetune_D_time = np.mean(runtimes_train_k_D)
            std_finetune_D_time = np.std(runtimes_train_k_D)
            mean_finetune_D_train_acc = np.mean(train_accs_k_D)
            std_finetune_D_train_acc = np.std(train_accs_k_D)
            mean_finetune_D_test_acc = np.mean(test_accs_k_D)
            std_finetune_D_test_acc = np.std(test_accs_k_D)
            mean_finetune_D_mia_auc = np.mean(mia_aucs_k_D)
            std_finetune_D_mia_auc = np.std(mia_aucs_k_D)
            mean_finetune_D_mia_adv = np.mean(mia_advs_k_D)
            std_finetune_D_mia_adv = np.std(mia_advs_k_D)

            # Finetuning M_k on D_ret
            mean_finetune_D_ret_time = np.mean(runtimes_train_k_ret)
            std_finetune_D_ret_time = np.std(runtimes_train_k_ret)
            mean_finetune_D_ret_train_acc = np.mean(retain_accs_k_ret)
            std_finetune_D_ret_train_acc = np.std(retain_accs_k_ret)
            mean_finetune_D_ret_forget_acc = np.mean(forget_accs_k_ret)
            std_finetune_D_ret_forget_acc = np.std(forget_accs_k_ret)
            mean_finetune_D_ret_test_acc = np.mean(test_accs_k_ret)
            std_finetune_D_ret_test_acc = np.std(test_accs_k_ret)
            mean_finetune_D_ret_mia_auc = np.mean(mia_aucs_k_ret)
            std_finetune_D_ret_mia_auc = np.std(mia_aucs_k_ret)
            mean_finetune_D_ret_mia_adv = np.mean(mia_advs_k_ret)
            std_finetune_D_ret_mia_adv = np.std(mia_advs_k_ret)


            # Print the results
            print('----------------------------------------')
            print('k=', k, 'Fine-tuning epochs=', ft_epochs)
            print('----------------------------------------')
            print('-----Anonymizing D and training M_k on D_k-----')
            print('Anonymizing D time:{:0.2f}(±{:0.2f})'.format(mean_anonymize_time, std_anonymize_time))
            print('Training M_k on D_k time:{:0.2f}(±{:0.2f})'.format(mean_train_k_time, std_train_k_time))
            print('Train accuracy:{:0.2f}(±{:0.2f})%'.format(mean_train_k_acc, std_train_k_acc))
            print('Test accuracy:{:0.2f}(±{:0.2f})%'.format(mean_test_k_acc, std_test_k_acc))
            print('MIA AUC:{:0.2f}(±{:0.2f})%'.format(mean_mia_k_auc, std_mia_k_auc))
            print('MIA Advantage:{:0.2f}(±{:0.2f})%'.format(mean_mia_k_adv, std_mia_k_adv))

            print('-----Finetuning M_k on D-----')
            print('Training M_k on D time:{:0.2f}(±{:0.2f})'.format(mean_finetune_D_time, std_finetune_D_time))
            print('Train accuracy:{:0.2f}(±{:0.2f})%'.format(mean_finetune_D_train_acc, std_finetune_D_train_acc))
            print('Test accuracy:{:0.2f}(±{:0.2f})%'.format(mean_finetune_D_test_acc, std_finetune_D_test_acc))
            print('MIA AUC:{:0.2f}(±{:0.2f})%'.format(mean_finetune_D_mia_auc, std_finetune_D_mia_auc))
            print('MIA Advantage:{:0.2f}(±{:0.2f})%'.format(mean_finetune_D_mia_adv, std_finetune_D_mia_adv))

            print('-----Finetuning M_k on D_ret-----')
            print('Finetuning M_k on D_retain time:{:0.2f}(±{:0.2f}) seconds'.format(mean_finetune_D_ret_time, std_finetune_D_ret_time))
            print('Retain accuracy:{:0.2f}(±{:0.2f})%'.format(mean_finetune_D_ret_train_acc, std_finetune_D_ret_train_acc))
            print('Forget accuracy:{:0.2f}(±{:0.2f})%'.format(mean_finetune_D_ret_forget_acc, std_finetune_D_ret_forget_acc))
            print('Test accuracy:{:0.2f}(±{:0.2f})%'.format(mean_finetune_D_ret_test_acc, std_finetune_D_ret_test_acc))
            print('MIA AUC:{:0.2f}(±{:0.2f})%'.format(mean_finetune_D_ret_mia_auc, std_finetune_D_ret_mia_auc))
            print('MIA Advantage:{:0.2f}(±{:0.2f})%'.format(mean_finetune_D_ret_mia_adv, std_finetune_D_ret_mia_adv))
            print('----------------------------------------')

            # Save to CSV
            csv_anonymize_file_path = 'results/heart/xgb_mk={}_dk_fr={}.csv'.format(k, forget_ratio)
            csv_finetune_D_file_path = 'results/heart/xgb_mk={}_d_fr={}_epochs={}.csv'.format(k, forget_ratio, ft_epochs)
            csv_finetune_D_ret_file_path = 'results/heart/xgb_mk={}_dret_fr={}_epochs={}.csv'.format(k, forget_ratio, ft_epochs)

            # Writing to CSV for anonymizing, finetuning on D, and finetuning on D_ret
            with open(csv_anonymize_file_path, mode='w', newline='') as file:
                writer = csv.writer(file)
                writer.writerow(['Metric', 'Mean', 'Standard Deviation'])
                writer.writerow(['Anonymizing Time', mean_anonymize_time, std_anonymize_time])
                writer.writerow(['Training M_k on D_k Time', mean_train_k_time, std_train_k_time])
                writer.writerow(['Train Accuracy', mean_train_k_acc, std_train_k_acc])
                writer.writerow(['Test Accuracy', mean_test_k_acc, std_test_k_acc])
                writer.writerow(['MIA AUC', mean_mia_k_auc, std_mia_k_auc])
                writer.writerow(['MIA Advantage', mean_mia_k_adv, std_mia_k_adv])

            with open(csv_finetune_D_file_path, mode='w', newline='') as file:
                writer = csv.writer(file)
                writer.writerow(['Metric', 'Mean', 'Standard Deviation'])
                writer.writerow(['Training M_k on D Time', mean_finetune_D_time, std_finetune_D_time])
                writer.writerow(['Train Accuracy', mean_finetune_D_train_acc, std_finetune_D_train_acc])
                writer.writerow(['Test Accuracy', mean_finetune_D_test_acc, std_finetune_D_test_acc])
                writer.writerow(['MIA AUC', mean_finetune_D_mia_auc, std_finetune_D_mia_auc])
                writer.writerow(['MIA Advantage', mean_finetune_D_mia_adv, std_finetune_D_mia_adv])

            with open(csv_finetune_D_ret_file_path, mode='w', newline='') as file:
                writer = csv.writer(file)
                writer.writerow(['Metric', 'Mean', 'Standard Deviation'])
                writer.writerow(['Finetuning M_k on D_retain Time', mean_finetune_D_ret_time, std_finetune_D_ret_time])
                writer.writerow(['Retain Accuracy', mean_finetune_D_ret_train_acc, std_finetune_D_ret_train_acc])
                writer.writerow(['Forget Accuracy', mean_finetune_D_ret_forget_acc, std_finetune_D_ret_forget_acc])
                writer.writerow(['Test Accuracy', mean_finetune_D_ret_test_acc, std_finetune_D_ret_test_acc])
                writer.writerow(['MIA AUC', mean_finetune_D_ret_mia_auc, std_finetune_D_ret_mia_auc])
                writer.writerow(['MIA Advantage', mean_finetune_D_ret_mia_adv, std_finetune_D_ret_mia_adv])


Training M on D time:1.95(±0.47) seconds
Train accuracy:81.88(±0.00)%
Test accuracy:72.78(±0.00)%
MIA AUC:58.20(±2.56)%
MIA Advantage:20.75(±5.28)%
Retraining M on D_ret time:2.23(±0.05) seconds
Retain accuracy:82.25(±0.00)%
Forget accuracy:71.46(±0.00)%
Test accuracy:72.30(±0.00)%
MIA AUC:52.24(±3.04)%
MIA Advantage:18.34(±3.28)%


  data_k = np.vstack(np.repeat(c.mean(0).reshape(1, -1), len(c), axis = 0) for c in clusters)
100%|███████████████████████████████████████████████████████████████████████████| 56074/56074 [01:31<00:00, 615.99it/s]


Number of clusters: 18691
Mean of mean distances to centroids: 3.9142376999754136
Shape of X_train_k:(56074, 11), y_train_k:(56074,)
----------------------------------------
k= 3 Fine-tuning epochs= 5
----------------------------------------
-----Anonymizing D and training M_k on D_k-----
Anonymizing D time:91.45(±0.00)
Training M_k on D_k time:2.33(±0.08)
Train accuracy:77.31(±0.00)%
Test accuracy:71.76(±0.00)%
MIA AUC:51.41(±0.31)%
MIA Advantage:15.88(±6.88)%
-----Finetuning M_k on D-----
Training M_k on D time:0.23(±0.00)
Train accuracy:74.23(±0.00)%
Test accuracy:72.30(±0.00)%
MIA AUC:54.18(±2.48)%
MIA Advantage:19.61(±4.45)%
-----Finetuning M_k on D_ret-----
Finetuning M_k on D_retain time:0.23(±0.00) seconds
Retain accuracy:74.19(±0.00)%
Forget accuracy:73.85(±0.00)%
Test accuracy:72.20(±0.00)%
MIA AUC:52.41(±1.84)%
MIA Advantage:16.56(±5.95)%
----------------------------------------


  data_k = np.vstack(np.repeat(c.mean(0).reshape(1, -1), len(c), axis = 0) for c in clusters)
100%|███████████████████████████████████████████████████████████████████████████| 56074/56074 [00:57<00:00, 976.24it/s]


Number of clusters: 11214
Mean of mean distances to centroids: 3.905743138894533
Shape of X_train_k:(56074, 11), y_train_k:(56074,)
----------------------------------------
k= 5 Fine-tuning epochs= 5
----------------------------------------
-----Anonymizing D and training M_k on D_k-----
Anonymizing D time:57.70(±0.00)
Training M_k on D_k time:2.40(±0.04)
Train accuracy:74.71(±0.00)%
Test accuracy:71.92(±0.00)%
MIA AUC:54.08(±0.73)%
MIA Advantage:19.96(±4.08)%
-----Finetuning M_k on D-----
Training M_k on D time:0.24(±0.01)
Train accuracy:73.64(±0.00)%
Test accuracy:72.70(±0.00)%
MIA AUC:52.72(±2.53)%
MIA Advantage:27.98(±9.17)%
-----Finetuning M_k on D_ret-----
Finetuning M_k on D_retain time:0.24(±0.00) seconds
Retain accuracy:73.61(±0.00)%
Forget accuracy:72.81(±0.00)%
Test accuracy:72.58(±0.00)%
MIA AUC:50.48(±0.41)%
MIA Advantage:23.97(±2.99)%
----------------------------------------


  data_k = np.vstack(np.repeat(c.mean(0).reshape(1, -1), len(c), axis = 0) for c in clusters)
100%|██████████████████████████████████████████████████████████████████████████| 56074/56074 [00:29<00:00, 1903.48it/s]


Number of clusters: 5607
Mean of mean distances to centroids: 3.891002645095943
Shape of X_train_k:(56074, 11), y_train_k:(56074,)
----------------------------------------
k= 10 Fine-tuning epochs= 5
----------------------------------------
-----Anonymizing D and training M_k on D_k-----
Anonymizing D time:29.60(±0.00)
Training M_k on D_k time:2.33(±0.02)
Train accuracy:70.82(±0.00)%
Test accuracy:72.27(±0.00)%
MIA AUC:52.65(±1.09)%
MIA Advantage:21.36(±5.43)%
-----Finetuning M_k on D-----
Training M_k on D time:0.24(±0.00)
Train accuracy:73.59(±0.00)%
Test accuracy:72.85(±0.00)%
MIA AUC:52.18(±2.17)%
MIA Advantage:24.50(±1.65)%
-----Finetuning M_k on D_ret-----
Finetuning M_k on D_retain time:0.24(±0.00) seconds
Retain accuracy:73.70(±0.00)%
Forget accuracy:72.46(±0.00)%
Test accuracy:72.99(±0.00)%
MIA AUC:56.03(±4.42)%
MIA Advantage:24.50(±5.47)%
----------------------------------------


  data_k = np.vstack(np.repeat(c.mean(0).reshape(1, -1), len(c), axis = 0) for c in clusters)
100%|██████████████████████████████████████████████████████████████████████████| 56074/56074 [00:14<00:00, 3769.81it/s]


Number of clusters: 2803
Mean of mean distances to centroids: 3.8704355821712815
Shape of X_train_k:(56074, 11), y_train_k:(56074,)
----------------------------------------
k= 20 Fine-tuning epochs= 5
----------------------------------------
-----Anonymizing D and training M_k on D_k-----
Anonymizing D time:14.95(±0.00)
Training M_k on D_k time:2.24(±0.02)
Train accuracy:68.25(±0.00)%
Test accuracy:72.29(±0.00)%
MIA AUC:50.90(±0.35)%
MIA Advantage:17.68(±2.96)%
-----Finetuning M_k on D-----
Training M_k on D time:0.25(±0.00)
Train accuracy:73.93(±0.00)%
Test accuracy:73.22(±0.00)%
MIA AUC:53.51(±2.36)%
MIA Advantage:17.77(±4.96)%
-----Finetuning M_k on D_ret-----
Finetuning M_k on D_retain time:0.24(±0.00) seconds
Retain accuracy:73.99(±0.00)%
Forget accuracy:71.99(±0.00)%
Test accuracy:73.06(±0.00)%
MIA AUC:52.54(±1.49)%
MIA Advantage:24.20(±6.67)%
----------------------------------------


  data_k = np.vstack(np.repeat(c.mean(0).reshape(1, -1), len(c), axis = 0) for c in clusters)
100%|█████████████████████████████████████████████████████████████████████████| 56074/56074 [00:03<00:00, 15223.11it/s]


Number of clusters: 700
Mean of mean distances to centroids: 3.814035685806171
Shape of X_train_k:(56074, 11), y_train_k:(56074,)
----------------------------------------
k= 80 Fine-tuning epochs= 5
----------------------------------------
-----Anonymizing D and training M_k on D_k-----
Anonymizing D time:3.71(±0.00)
Training M_k on D_k time:1.26(±0.02)
Train accuracy:65.29(±0.00)%
Test accuracy:72.03(±0.00)%
MIA AUC:52.86(±2.50)%
MIA Advantage:19.27(±5.86)%
-----Finetuning M_k on D-----
Training M_k on D time:0.19(±0.00)
Train accuracy:73.99(±0.00)%
Test accuracy:73.45(±0.00)%
MIA AUC:52.58(±1.43)%
MIA Advantage:22.99(±2.71)%
-----Finetuning M_k on D_ret-----
Finetuning M_k on D_retain time:0.20(±0.01) seconds
Retain accuracy:74.05(±0.00)%
Forget accuracy:72.99(±0.00)%
Test accuracy:73.62(±0.00)%
MIA AUC:53.82(±2.99)%
MIA Advantage:23.04(±0.57)%
----------------------------------------


  data_k = np.vstack(np.repeat(c.mean(0).reshape(1, -1), len(c), axis = 0) for c in clusters)
100%|█████████████████████████████████████████████████████████████████████████| 56074/56074 [00:01<00:00, 30264.33it/s]


Number of clusters: 350
Mean of mean distances to centroids: 3.760483100624267
Shape of X_train_k:(56074, 11), y_train_k:(56074,)
----------------------------------------
k= 160 Fine-tuning epochs= 5
----------------------------------------
-----Anonymizing D and training M_k on D_k-----
Anonymizing D time:1.87(±0.00)
Training M_k on D_k time:0.97(±0.02)
Train accuracy:64.15(±0.00)%
Test accuracy:72.97(±0.00)%
MIA AUC:53.80(±2.89)%
MIA Advantage:25.41(±1.60)%
-----Finetuning M_k on D-----
Training M_k on D time:0.18(±0.00)
Train accuracy:74.11(±0.00)%
Test accuracy:73.78(±0.00)%
MIA AUC:50.49(±0.28)%
MIA Advantage:13.87(±2.98)%
-----Finetuning M_k on D_ret-----
Finetuning M_k on D_retain time:0.18(±0.00) seconds
Retain accuracy:74.10(±0.00)%
Forget accuracy:73.17(±0.00)%
Test accuracy:73.82(±0.00)%
MIA AUC:59.37(±8.31)%
MIA Advantage:25.68(±11.69)%
----------------------------------------


  data_k = np.vstack(np.repeat(c.mean(0).reshape(1, -1), len(c), axis = 0) for c in clusters)
100%|█████████████████████████████████████████████████████████████████████████| 56074/56074 [00:00<00:00, 58684.12it/s]


Number of clusters: 175
Mean of mean distances to centroids: 3.713880157518731
Shape of X_train_k:(56074, 11), y_train_k:(56074,)
----------------------------------------
k= 320 Fine-tuning epochs= 5
----------------------------------------
-----Anonymizing D and training M_k on D_k-----
Anonymizing D time:0.97(±0.00)
Training M_k on D_k time:0.79(±0.01)
Train accuracy:63.41(±0.00)%
Test accuracy:71.99(±0.00)%
MIA AUC:53.11(±0.38)%
MIA Advantage:15.90(±4.27)%
-----Finetuning M_k on D-----
Training M_k on D time:0.17(±0.00)
Train accuracy:73.97(±0.00)%
Test accuracy:73.80(±0.00)%
MIA AUC:50.46(±0.63)%
MIA Advantage:19.10(±6.55)%
-----Finetuning M_k on D_ret-----
Finetuning M_k on D_retain time:0.17(±0.00) seconds
Retain accuracy:74.24(±0.00)%
Forget accuracy:72.89(±0.00)%
Test accuracy:73.72(±0.00)%
MIA AUC:54.03(±2.80)%
MIA Advantage:26.68(±1.77)%
----------------------------------------


  data_k = np.vstack(np.repeat(c.mean(0).reshape(1, -1), len(c), axis = 0) for c in clusters)
100%|████████████████████████████████████████████████████████████████████████| 56074/56074 [00:00<00:00, 111633.26it/s]


Number of clusters: 87
Mean of mean distances to centroids: 3.638771284768852
Shape of X_train_k:(56074, 11), y_train_k:(56074,)
----------------------------------------
k= 640 Fine-tuning epochs= 5
----------------------------------------
-----Anonymizing D and training M_k on D_k-----
Anonymizing D time:0.52(±0.00)
Training M_k on D_k time:0.70(±0.01)
Train accuracy:62.28(±0.00)%
Test accuracy:71.69(±0.00)%
MIA AUC:51.66(±1.08)%
MIA Advantage:17.06(±4.35)%
-----Finetuning M_k on D-----
Training M_k on D time:0.17(±0.00)
Train accuracy:74.14(±0.00)%
Test accuracy:73.79(±0.00)%
MIA AUC:52.00(±2.26)%
MIA Advantage:21.45(±6.92)%
-----Finetuning M_k on D_ret-----
Finetuning M_k on D_retain time:0.17(±0.00) seconds
Retain accuracy:74.10(±0.00)%
Forget accuracy:72.81(±0.00)%
Test accuracy:73.79(±0.00)%
MIA AUC:50.31(±0.42)%
MIA Advantage:27.44(±2.11)%
----------------------------------------


# Differential privacy

In [6]:
# Randomly sample retain and forget sets
forget_ratios = [0.05]
for forget_ratio in forget_ratios:
    idxs = np.arange(len(y_train))
    random.shuffle(idxs)
    m = int(len(y_train)*forget_ratio)
    retain_idxs = idxs[m:]
    forget_idxs = idxs[:m]
    X_retain = X_train[retain_idxs]
    y_retain = y_train[retain_idxs]
    X_forget = X_train[forget_idxs]
    y_forget = y_train[forget_idxs]

    for r in range(n_repeat):
        ft_epochs_list = [5]
        for ft_epochs in ft_epochs_list:
            EPS = [0.5, 2.5, 5., 25., 50., 100., 250, 500, 1000]
            for eps in EPS:
                dp_train_data = pd.read_csv('dp_data/heart/dp_heart_eps={}.csv'.format(eps), sep=r' *, *', engine='python', na_values='?')
                dp_train_data = pd.read_csv('dp_data/heart/dp_heart_eps={}.csv'.format(eps), sep=',')
                # Drop useless columns
                dp_train_data.dropna(inplace=True)
                # convert the income column to 0 or 1 and then drop the column for the feature vectors
                # creating the feature vector 
                X_train_dp = dp_train_data.drop('cardio', axis =1)
                # target values
                y_train_dp = dp_train_data['cardio'].values
                # pass the data through the full_pipeline
                X_train_dp = SC.fit_transform(X_train_dp)
                print(forget_ratio, ft_epochs, eps, X_train_dp.shape, y_train_dp.shape)
                train_accs_k = []
                test_accs_k = []
                mia_aucs_k = []
                mia_advs_k = []
                runtimes_train_k = []

                train_accs_k_D = []
                test_accs_k_D = []
                mia_aucs_k_D = []
                mia_advs_k_D = []
                runtimes_train_k_D = []

                retain_accs_k_ret = []
                forget_accs_k_ret = []
                test_accs_k_ret = []
                mia_aucs_k_ret = []
                mia_advs_k_ret = []
                runtimes_train_k_ret = []

                for r in range(n_repeat):
                    model_k = copy.deepcopy(initial_model)
                    t0 = time.time()
                    torch.cuda.empty_cache()
                    model_k.fit(X_train_dp, y_train_dp)
                    t1 = time.time()
                    rt_train = t1- t0
                    runtimes_train_k.append(rt_train)

                    # Evaluate the model accuracy, and MIA
                    # Accuracy
                    train_acc = metrics.accuracy_score(y_train_dp, model_k.predict(X_train_dp))
                    test_acc = metrics.accuracy_score(y_test, model_k.predict(X_test))
                    train_accs_k.append(100.0*train_acc)
                    test_accs_k.append(100.0*test_acc)
                    #MIA

                    test_preds = model_k.predict_proba(X_test)
                    forget_preds = model_k.predict_proba(X_forget)

                    # Convert class indices to one-hot encoding
                    y_test_one_hot = encoder.fit_transform(y_test.reshape(-1, 1))
                    y_forget_one_hot = encoder.transform(y_forget.reshape(-1, 1))

                    loss_test = np.array([metrics.log_loss(y_test_one_hot[i], test_preds[i]) for i in range(len(y_test))])
                    loss_forget = np.array([metrics.log_loss(y_forget_one_hot[i], forget_preds[i]) for i in range(len(y_forget))])

                    attack_result = tf_attack(logits_train = forget_preds, logits_test = test_preds, 
                                              loss_train = loss_forget, loss_test = loss_test, 
                                              train_labels = y_forget, test_labels = y_test)

                    auc = attack_result.get_result_with_max_auc().get_auc()
                    adv = attack_result.get_result_with_max_attacker_advantage().get_attacker_advantage()
                    mia_aucs_k.append(100.0*auc)
                    mia_advs_k.append(100.0*adv)

                    model_k_D = copy.deepcopy(initial_model)
                    model_k_D.set_params(learning_rate = 0.5, n_estimators=ft_epochs)
                    t0 = time.time()
                    torch.cuda.empty_cache()
                    model_k_D.fit(X_train, y_train, xgb_model=model_k)
                    t1 = time.time()
                    rt = t1-t0
                    runtimes_train_k_D.append(rt)

                    # Evaluate the model accuracy, and MIA
                    # Accuracy
                    train_acc = metrics.accuracy_score(y_train, model_k_D.predict(X_train))
                    test_acc = metrics.accuracy_score(y_test, model_k_D.predict(X_test))
                    train_accs_k_D.append(100.0*train_acc)
                    test_accs_k_D.append(100.0*test_acc)
                    #MIA
                

                    test_preds = model_k_D.predict_proba(X_test)
                    forget_preds = model_k_D.predict_proba(X_forget)

                    # Convert class indices to one-hot encoding
                    y_test_one_hot = encoder.fit_transform(y_test.reshape(-1, 1))
                    y_forget_one_hot = encoder.transform(y_forget.reshape(-1, 1))

                    loss_test = np.array([metrics.log_loss(y_test_one_hot[i], test_preds[i]) for i in range(len(y_test))])
                    loss_forget = np.array([metrics.log_loss(y_forget_one_hot[i], forget_preds[i]) for i in range(len(y_forget))])

                    attack_result = tf_attack(logits_train = forget_preds, logits_test = test_preds, 
                                              loss_train = loss_forget, loss_test = loss_test, 
                                              train_labels = y_forget, test_labels = y_test)

                    auc = attack_result.get_result_with_max_auc().get_auc()
                    adv = attack_result.get_result_with_max_attacker_advantage().get_attacker_advantage()
                    mia_aucs_k_D.append(100.0*auc)
                    mia_advs_k_D.append(100.0*adv)

                    model_k_ret = copy.deepcopy(initial_model)
                    model_k_ret.set_params(learning_rate = 0.5, n_estimators=ft_epochs)
                    t0 = time.time()
                    torch.cuda.empty_cache()
                    model_k_ret.fit(X_retain, y_retain, xgb_model=model_k)
                    t1 = time.time()
                    rt = t1-t0
                    runtimes_train_k_ret.append(rt)
                    # Evaluate the model accuracy, and MIA
                    # Accuracy
                    retain_acc = metrics.accuracy_score(y_retain, model_k_ret.predict(X_retain))
                    forget_acc = metrics.accuracy_score(y_forget, model_k_ret.predict(X_forget))
                    test_acc = metrics.accuracy_score(y_test, model_k_ret.predict(X_test))
                    retain_accs_k_ret.append(100.0*retain_acc)
                    forget_accs_k_ret.append(100.0*forget_acc)
                    test_accs_k_ret.append(100.0*test_acc)
                    #MIA

                    test_preds = model_k_ret.predict_proba(X_test)
                    forget_preds = model_k_ret.predict_proba(X_forget)
                    loss_test = np.array([metrics.log_loss(y_test_one_hot[i], test_preds[i]) for i in range(len(y_test))])
                    loss_forget = np.array([metrics.log_loss(y_forget_one_hot[i], forget_preds[i]) for i in range(len(y_forget))])

                    attack_result = tf_attack(logits_train = forget_preds, logits_test = test_preds, 
                                              loss_train = loss_forget, loss_test = loss_test, 
                                              train_labels = y_forget, test_labels = y_test)

                    auc = attack_result.get_result_with_max_auc().get_auc()
                    adv = attack_result.get_result_with_max_attacker_advantage().get_attacker_advantage()
                    mia_aucs_k_ret.append(100.0*auc)
                    mia_advs_k_ret.append(100.0*adv)


                # Anonymizing D and training M_k on D_k
                mean_train_k_time = np.mean(runtimes_train_k)
                std_train_k_time = np.std(runtimes_train_k)
                mean_train_k_acc = np.mean(train_accs_k)
                std_train_k_acc = np.std(train_accs_k)
                mean_test_k_acc = np.mean(test_accs_k)
                std_test_k_acc = np.std(test_accs_k)
                mean_mia_k_auc = np.mean(mia_aucs_k)
                std_mia_k_auc = np.std(mia_aucs_k)
                mean_mia_k_adv = np.mean(mia_advs_k)
                std_mia_k_adv = np.std(mia_advs_k)

                # Finetuning M_k on D
                mean_finetune_D_time = np.mean(runtimes_train_k_D)
                std_finetune_D_time = np.std(runtimes_train_k_D)
                mean_finetune_D_train_acc = np.mean(train_accs_k_D)
                std_finetune_D_train_acc = np.std(train_accs_k_D)
                mean_finetune_D_test_acc = np.mean(test_accs_k_D)
                std_finetune_D_test_acc = np.std(test_accs_k_D)
                mean_finetune_D_mia_auc = np.mean(mia_aucs_k_D)
                std_finetune_D_mia_auc = np.std(mia_aucs_k_D)
                mean_finetune_D_mia_adv = np.mean(mia_advs_k_D)
                std_finetune_D_mia_adv = np.std(mia_advs_k_D)

                # Finetuning M_k on D_ret
                mean_finetune_D_ret_time = np.mean(runtimes_train_k_ret)
                std_finetune_D_ret_time = np.std(runtimes_train_k_ret)
                mean_finetune_D_ret_train_acc = np.mean(retain_accs_k_ret)
                std_finetune_D_ret_train_acc = np.std(retain_accs_k_ret)
                mean_finetune_D_ret_forget_acc = np.mean(forget_accs_k_ret)
                std_finetune_D_ret_forget_acc = np.std(forget_accs_k_ret)
                mean_finetune_D_ret_test_acc = np.mean(test_accs_k_ret)
                std_finetune_D_ret_test_acc = np.std(test_accs_k_ret)
                mean_finetune_D_ret_mia_auc = np.mean(mia_aucs_k_ret)
                std_finetune_D_ret_mia_auc = np.std(mia_aucs_k_ret)
                mean_finetune_D_ret_mia_adv = np.mean(mia_advs_k_ret)
                std_finetune_D_ret_mia_adv = np.std(mia_advs_k_ret)


                # Print the results
                print('----------------------------------------')
                print('Epsilon=', eps, 'Fine-tuning epochs=', ft_epochs)
                print('----------------------------------------')
                print('-----Anonymizing D and training M_k on D_k-----')
                print('Training M_k on D_k time:{:0.2f}(±{:0.2f})'.format(mean_train_k_time, std_train_k_time))
                print('Train accuracy:{:0.2f}(±{:0.2f})%'.format(mean_train_k_acc, std_train_k_acc))
                print('Test accuracy:{:0.2f}(±{:0.2f})%'.format(mean_test_k_acc, std_test_k_acc))
                print('MIA AUC:{:0.2f}(±{:0.2f})%'.format(mean_mia_k_auc, std_mia_k_auc))
                print('MIA Advantage:{:0.2f}(±{:0.2f})%'.format(mean_mia_k_adv, std_mia_k_adv))

                print('-----Finetuning M_k on D-----')
                print('Training M_k on D time:{:0.2f}(±{:0.2f})'.format(mean_finetune_D_time, std_finetune_D_time))
                print('Train accuracy:{:0.2f}(±{:0.2f})%'.format(mean_finetune_D_train_acc, std_finetune_D_train_acc))
                print('Test accuracy:{:0.2f}(±{:0.2f})%'.format(mean_finetune_D_test_acc, std_finetune_D_test_acc))
                print('MIA AUC:{:0.2f}(±{:0.2f})%'.format(mean_finetune_D_mia_auc, std_finetune_D_mia_auc))
                print('MIA Advantage:{:0.2f}(±{:0.2f})%'.format(mean_finetune_D_mia_adv, std_finetune_D_mia_adv))

                print('-----Finetuning M_k on D_ret-----')
                print('Finetuning M_k on D_retain time:{:0.2f}(±{:0.2f}) seconds'.format(mean_finetune_D_ret_time, std_finetune_D_ret_time))
                print('Retain accuracy:{:0.2f}(±{:0.2f})%'.format(mean_finetune_D_ret_train_acc, std_finetune_D_ret_train_acc))
                print('Forget accuracy:{:0.2f}(±{:0.2f})%'.format(mean_finetune_D_ret_forget_acc, std_finetune_D_ret_forget_acc))
                print('Test accuracy:{:0.2f}(±{:0.2f})%'.format(mean_finetune_D_ret_test_acc, std_finetune_D_ret_test_acc))
                print('MIA AUC:{:0.2f}(±{:0.2f})%'.format(mean_finetune_D_ret_mia_auc, std_finetune_D_ret_mia_auc))
                print('MIA Advantage:{:0.2f}(±{:0.2f})%'.format(mean_finetune_D_ret_mia_adv, std_finetune_D_ret_mia_adv))
                print('----------------------------------------')

                # Save to CSV
                csv_anonymize_file_path = 'results/heart/xgb_mdp_eps={}_fr={}.csv'.format(eps, forget_ratio)
                csv_finetune_D_file_path = 'results/heart/xgb_mdpd_eps={}_fr={}_epochs={}.csv'.format(eps, forget_ratio, ft_epochs)
                csv_finetune_D_ret_file_path = 'results/heart/xgb_mdpret_eps={}_fr={}_epochs={}.csv'.format(eps, forget_ratio, ft_epochs)

                # Writing to CSV for anonymizing, finetuning on D, and finetuning on D_ret
                with open(csv_anonymize_file_path, mode='w', newline='') as file:
                    writer = csv.writer(file)
                    writer.writerow(['Metric', 'Mean', 'Standard Deviation'])
                    writer.writerow(['Training M_k on D_k Time', mean_train_k_time, std_train_k_time])
                    writer.writerow(['Train Accuracy', mean_train_k_acc, std_train_k_acc])
                    writer.writerow(['Test Accuracy', mean_test_k_acc, std_test_k_acc])
                    writer.writerow(['MIA AUC', mean_mia_k_auc, std_mia_k_auc])
                    writer.writerow(['MIA Advantage', mean_mia_k_adv, std_mia_k_adv])

                with open(csv_finetune_D_file_path, mode='w', newline='') as file:
                    writer = csv.writer(file)
                    writer.writerow(['Metric', 'Mean', 'Standard Deviation'])
                    writer.writerow(['Training M_k on D Time', mean_finetune_D_time, std_finetune_D_time])
                    writer.writerow(['Train Accuracy', mean_finetune_D_train_acc, std_finetune_D_train_acc])
                    writer.writerow(['Test Accuracy', mean_finetune_D_test_acc, std_finetune_D_test_acc])
                    writer.writerow(['MIA AUC', mean_finetune_D_mia_auc, std_finetune_D_mia_auc])
                    writer.writerow(['MIA Advantage', mean_finetune_D_mia_adv, std_finetune_D_mia_adv])

                with open(csv_finetune_D_ret_file_path, mode='w', newline='') as file:
                    writer = csv.writer(file)
                    writer.writerow(['Metric', 'Mean', 'Standard Deviation'])
                    writer.writerow(['Finetuning M_k on D_retain Time', mean_finetune_D_ret_time, std_finetune_D_ret_time])
                    writer.writerow(['Retain Accuracy', mean_finetune_D_ret_train_acc, std_finetune_D_ret_train_acc])
                    writer.writerow(['Forget Accuracy', mean_finetune_D_ret_forget_acc, std_finetune_D_ret_forget_acc])
                    writer.writerow(['Test Accuracy', mean_finetune_D_ret_test_acc, std_finetune_D_ret_test_acc])
                    writer.writerow(['MIA AUC', mean_finetune_D_ret_mia_auc, std_finetune_D_ret_mia_auc])
                    writer.writerow(['MIA Advantage', mean_finetune_D_ret_mia_adv, std_finetune_D_ret_mia_adv])

0.05 5 0.5 (70000, 11) (70000,)
----------------------------------------
Epsilon= 0.5 Fine-tuning epochs= 5
----------------------------------------
-----Anonymizing D and training M_k on D_k-----
Training M_k on D_k time:2.14(±0.05)
Train accuracy:59.31(±0.00)%
Test accuracy:49.08(±0.00)%
MIA AUC:50.97(±1.10)%
MIA Advantage:17.61(±3.43)%
-----Finetuning M_k on D-----
Training M_k on D time:0.24(±0.01)
Train accuracy:69.40(±0.00)%
Test accuracy:68.92(±0.00)%
MIA AUC:52.29(±1.77)%
MIA Advantage:18.34(±4.22)%
-----Finetuning M_k on D_ret-----
Finetuning M_k on D_retain time:0.25(±0.00) seconds
Retain accuracy:69.31(±0.00)%
Forget accuracy:68.36(±0.00)%
Test accuracy:69.03(±0.00)%
MIA AUC:51.28(±1.17)%
MIA Advantage:22.40(±4.94)%
----------------------------------------
0.05 5 2.5 (70000, 11) (70000,)
----------------------------------------
Epsilon= 2.5 Fine-tuning epochs= 5
----------------------------------------
-----Anonymizing D and training M_k on D_k-----
Training M_k on D_k time:

0.05 5 5.0 (70000, 11) (70000,)
----------------------------------------
Epsilon= 5.0 Fine-tuning epochs= 5
----------------------------------------
-----Anonymizing D and training M_k on D_k-----
Training M_k on D_k time:2.49(±0.04)
Train accuracy:72.73(±0.00)%
Test accuracy:49.22(±0.00)%
MIA AUC:50.95(±0.45)%
MIA Advantage:19.40(±2.63)%
-----Finetuning M_k on D-----
Training M_k on D time:0.26(±0.00)
Train accuracy:71.46(±0.00)%
Test accuracy:71.00(±0.00)%
MIA AUC:52.97(±2.32)%
MIA Advantage:27.25(±9.14)%
-----Finetuning M_k on D_ret-----
Finetuning M_k on D_retain time:0.26(±0.00) seconds
Retain accuracy:71.60(±0.00)%
Forget accuracy:70.46(±0.00)%
Test accuracy:71.08(±0.00)%
MIA AUC:60.20(±7.48)%
MIA Advantage:30.33(±4.90)%
----------------------------------------
0.05 5 25.0 (70000, 11) (70000,)
----------------------------------------
Epsilon= 25.0 Fine-tuning epochs= 5
----------------------------------------
-----Anonymizing D and training M_k on D_k-----
Training M_k on D_k tim

0.05 5 50.0 (70000, 11) (70000,)
----------------------------------------
Epsilon= 50.0 Fine-tuning epochs= 5
----------------------------------------
-----Anonymizing D and training M_k on D_k-----
Training M_k on D_k time:2.35(±0.34)
Train accuracy:82.60(±0.00)%
Test accuracy:56.87(±0.00)%
MIA AUC:57.79(±8.55)%
MIA Advantage:27.35(±9.23)%
-----Finetuning M_k on D-----
Training M_k on D time:0.25(±0.01)
Train accuracy:72.57(±0.00)%
Test accuracy:72.30(±0.00)%
MIA AUC:50.83(±0.87)%
MIA Advantage:19.97(±11.97)%
-----Finetuning M_k on D_ret-----
Finetuning M_k on D_retain time:0.23(±0.03) seconds
Retain accuracy:72.64(±0.00)%
Forget accuracy:71.10(±0.00)%
Test accuracy:72.15(±0.00)%
MIA AUC:54.02(±0.53)%
MIA Advantage:15.44(±6.80)%
----------------------------------------
0.05 5 100.0 (70000, 11) (70000,)
----------------------------------------
Epsilon= 100.0 Fine-tuning epochs= 5
----------------------------------------
-----Anonymizing D and training M_k on D_k-----
Training M_k on D_