# Reproduction of Adult dataset experiments

In this notebook we reproduce the results from Table 2 of the DECAF paper. We compare various methods for generating debiased data using the DECAF model against synthetic data generated using benchmark models GAN, WGAN-GP and FairGAN. As described in the paper we run all experiments (as implemented in this notebook) 10 times and avarage the results.

In [1]:
from sklearn.metrics import precision_score, recall_score, roc_auc_score
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier

from data import load_adult, preprocess_adult
from metrics import DP, FTU

# from train import train_decaf, train_fairgan, train_vanilla_gan, train_wgan_gp

import os
import warnings

import numpy as np
import pandas as pd
import torch

import matplotlib.pyplot as plt
import pytorch_lightning as pl

from data import DataModule, inject_synth_bias, load_credit, preprocess_credit
from metrics import eval_model
from models.DECAF import DECAF

warnings.filterwarnings("ignore")

  from .autonotebook import tqdm as notebook_tqdm
  warn(


In [2]:
# goggle evaluation
from synthcity.plugins.core.dataloader import GenericDataLoader
from synthcity.metrics import eval_detection, eval_performance, eval_statistical



## Loading data

In [3]:
dataset = load_adult()
dataset.head()

Unnamed: 0,age,workclass,fnlwgt,education,education-num,marital-status,occupation,relationship,race,sex,capital-gain,capital-loss,hours-per-week,native-country,income
0,39,State-gov,77516.0,Bachelors,13.0,Never-married,Adm-clerical,Not-in-family,White,Male,2174.0,0.0,40.0,United-States,<=50K
1,50,Self-emp-not-inc,83311.0,Bachelors,13.0,Married-civ-spouse,Exec-managerial,Husband,White,Male,0.0,0.0,13.0,United-States,<=50K
2,38,Private,215646.0,HS-grad,9.0,Divorced,Handlers-cleaners,Not-in-family,White,Male,0.0,0.0,40.0,United-States,<=50K
3,53,Private,234721.0,11th,7.0,Married-civ-spouse,Handlers-cleaners,Husband,Black,Male,0.0,0.0,40.0,United-States,<=50K
4,28,Private,338409.0,Bachelors,13.0,Married-civ-spouse,Prof-specialty,Wife,Black,Female,0.0,0.0,40.0,Cuba,<=50K


Preprocess the data next in order to make it suitable for training models on.

In [4]:
dataset = preprocess_adult(dataset)
dataset.head()

Unnamed: 0,age,workclass,fnlwgt,education,education-num,marital-status,occupation,relationship,race,sex,capital-gain,capital-loss,hours-per-week,native-country,income
0,0.30137,0.833333,0.04335,0.0,0.8,0.333333,0.615385,0.6,0.0,1.0,0.02174,0.0,0.397959,0.0,1.0
1,0.452055,0.166667,0.047274,0.0,0.8,0.0,0.307692,0.4,0.0,1.0,0.0,0.0,0.122449,0.0,1.0
2,0.287671,0.0,0.136877,0.2,0.533333,0.166667,0.461538,0.6,0.0,1.0,0.0,0.0,0.397959,0.0,1.0
3,0.493151,0.0,0.149792,0.133333,0.4,0.0,0.461538,0.4,1.0,1.0,0.0,0.0,0.397959,0.0,1.0
4,0.150685,0.0,0.219998,0.0,0.8,0.0,0.384615,0.0,1.0,0.0,0.0,0.0,0.397959,0.3,1.0


Split the dataset into train and test folds. Test fold size is 2000.

In [5]:
# Split data into train and testing sets
dataset_train, dataset_test = train_test_split(dataset, test_size=2000,
                                               stratify=dataset['income'])

print('Size of train set:', len(dataset_train))
print('Size of test set:', len(dataset_test))

Size of train set: 43222
Size of test set: 2000


### Defining the DAG

We need to define a DAG which captures the biases of the dataset. As described in the DECAF paper normally a causal discovery algorithm is used. In this notebook we simply copy the DAG which as described in the Zhang et al. paper which is the one also used in the DECAF paper.

In [6]:
# Define DAG for Adult dataset
dag = [
    # Edges from race
    ['race', 'occupation'],
    ['race', 'income'],
    ['race', 'hours-per-week'],
    ['race', 'education'],
    ['race', 'marital-status'],

    # Edges from age
    ['age', 'occupation'],
    ['age', 'hours-per-week'],
    ['age', 'income'],
    ['age', 'workclass'],
    ['age', 'marital-status'],
    ['age', 'education'],
    ['age', 'relationship'],
    
    # Edges from sex
    ['sex', 'occupation'],
    ['sex', 'marital-status'],
    ['sex', 'income'],
    ['sex', 'workclass'],
    ['sex', 'education'],
    ['sex', 'relationship'],
    
    # Edges from native country
    ['native-country', 'marital-status'],
    ['native-country', 'hours-per-week'],
    ['native-country', 'education'],
    ['native-country', 'workclass'],
    ['native-country', 'income'],
    ['native-country', 'relationship'],
    
    # Edges from marital status
    ['marital-status', 'occupation'],
    ['marital-status', 'hours-per-week'],
    ['marital-status', 'income'],
    ['marital-status', 'workclass'],
    ['marital-status', 'relationship'],
    ['marital-status', 'education'],
    
    # Edges from education
    ['education', 'occupation'],
    ['education', 'hours-per-week'],
    ['education', 'income'],
    ['education', 'workclass'],
    ['education', 'relationship'],
    
    # All remaining edges
    ['occupation', 'income'],
    ['hours-per-week', 'income'],
    ['workclass', 'income'],
    ['relationship', 'income'],
]

def dag_to_idx(df, dag):
    """Convert columns in a DAG to the corresponding indices."""

    dag_idx = []
    for edge in dag:
        dag_idx.append([df.columns.get_loc(edge[0]), df.columns.get_loc(edge[1])])

    return dag_idx

# Convert the DAG to one that can be provided to the DECAF model
dag_seed = dag_to_idx(dataset, dag)
print(dag_seed)

[[8, 6], [8, 14], [8, 12], [8, 3], [8, 5], [0, 6], [0, 12], [0, 14], [0, 1], [0, 5], [0, 3], [0, 7], [9, 6], [9, 5], [9, 14], [9, 1], [9, 3], [9, 7], [13, 5], [13, 12], [13, 3], [13, 1], [13, 14], [13, 7], [5, 6], [5, 12], [5, 14], [5, 1], [5, 7], [5, 3], [3, 6], [3, 12], [3, 14], [3, 1], [3, 7], [6, 14], [12, 14], [1, 14], [7, 14]]


It's also necessary to define edges we want to remove from the DAG in order to meet the various fairness criteria described in the paper.

In [7]:
def create_bias_dict(df, edge_map):
    """
    Convert the given edge tuples to a bias dict used for generating
    debiased synthetic data.
    """
    bias_dict = {}
    for key, val in edge_map.items():
        bias_dict[df.columns.get_loc(key)] = [df.columns.get_loc(f) for f in val]
    
    return bias_dict

# Bias dictionary to satisfy FTU
bias_dict_ftu = create_bias_dict(dataset, {'income': ['sex']})
print('Bias dict FTU:', bias_dict_ftu)

# Bias dictionary to satisfy DP
bias_dict_dp = create_bias_dict(dataset, {'income': [
    'occupation', 'hours-per-week', 'marital-status', 'education', 'sex',
    'workclass', 'relationship']})
print('Bias dict DP:', bias_dict_dp)

# Bias dictionary to satisfy CF
bias_dict_cf = create_bias_dict(dataset, {'income': [
    'marital-status', 'sex']})
print('Bias dict CF:', bias_dict_cf)

Bias dict FTU: {14: [9]}
Bias dict DP: {14: [6, 12, 5, 3, 9, 1, 7]}
Bias dict CF: {14: [5, 9]}


## Experiments

We have loaded and preprocessed the data and we are ready to run the experiments. For each experiment we train a generative model, sample synthetic data from the trained model and then obtain metrics by training and evaluating a downstream multi-layer perceptron using the test fold we generated in the previous section. We use the MLP model from `sklearn` with default parameters which matches the settings described in Appendix D of the paper.

In [8]:
models_dir = './cache/'

def train_decaf(train_dataset, dag_seed, test_dataset, biased_edges={}, h_dim=200, lr=0.5e-3,
                batch_size=64, lambda_privacy=0, lambda_gp=10, d_updates=10,
                alpha=2, rho=2, weight_decay=1e-2, grad_dag_loss=False, l1_g=0,
                l1_W=1e-4, p_gen=-1, use_mask=True, epochs=50, model_name='decaf', generate_test=False):
    
    model_filename = os.path.join(models_dir, f'{model_name}.pkl')

    dm = DataModule(train_dataset.values)
    dm_test = DataModule(test_dataset.values)
     
    model = DECAF(
        dm.dims[0],
        dag_seed=dag_seed,
        h_dim=h_dim,
        lr=lr,
        batch_size=batch_size,
        lambda_privacy=lambda_privacy,
        lambda_gp=lambda_gp,
        d_updates=d_updates,
        alpha=alpha,
        rho=rho,
        weight_decay=weight_decay,
        grad_dag_loss=grad_dag_loss,
        l1_g=l1_g,
        l1_W=l1_W,
        p_gen=p_gen,
        use_mask=use_mask,
    )

    if os.path.exists(model_filename):
        model = torch.load(model_filename)
    else:
        trainer = pl.Trainer(max_epochs=epochs, logger=False)
        trainer.fit(model, dm)
        torch.save(model, model_filename)

    # Generate synthetic data
    synth_dataset = (
        model.gen_synthetic(
            dm.dataset.x,
            gen_order=model.get_gen_order(),
            biased_edges=biased_edges,
        )
        .detach()
        .numpy()
    )
    synth_dataset[:, -1] = synth_dataset[:, -1].astype(np.int8)

    synth_dataset = pd.DataFrame(synth_dataset,
                                 index=train_dataset.index,
                                 columns=train_dataset.columns)
    if generate_test:
        synth_dataset_x_test = (
            model.gen_synthetic(
                dm_test.dataset.x,
                gen_order=model.get_gen_order(),
                biased_edges=biased_edges,
            )
            .detach()
            .numpy()
        )
        synth_dataset_x_test[:, -1] = synth_dataset_x_test[:, -1].astype(np.int8)

        synth_dataset_x_test = pd.DataFrame(synth_dataset_x_test,
                                    index=test_dataset.index,
                                    columns=test_dataset.columns)

    if 'approved' in synth_dataset.columns:
        # Binarise columns for credit dataset
        synth_dataset['ethnicity'] = np.round(synth_dataset['ethnicity'])
        synth_dataset['approved'] = np.round(synth_dataset['approved'])
    else:
        # Binarise columns for adult dataset
        synth_dataset['sex'] = np.round(synth_dataset['sex'])
        synth_dataset['income'] = np.round(synth_dataset['income'])

        synth_dataset_x_test['sex'] = np.round(synth_dataset_x_test['sex'])
        synth_dataset_x_test['income'] = np.round(synth_dataset_x_test['income'])

    return synth_dataset, synth_dataset_x_test

In [8]:
def eval_model(dataset_train, dataset_test):
    """Helper function that prints evaluation metrics."""

    X_train, y_train = dataset_train.drop(columns=['income']), dataset_train['income']
    X_test, y_test = dataset_test.drop(columns=['income']), dataset_test['income']

    clf = MLPClassifier()
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)

    precision = precision_score(y_test, y_pred)
    recall = recall_score(y_test, y_pred)
    auroc = roc_auc_score(y_test, y_pred)
    dp = DP(clf, X_test)
    ftu = FTU(clf, X_test)

    return {'precision': precision, 'recall': recall, 'auroc': auroc,
            'dp': dp, 'ftu': ftu}

In [9]:
def eval_goggle(X_test, X_synth):
    quality_evaluator = eval_statistical.AlphaPrecision()
    qual_res = quality_evaluator.evaluate(X_test, X_synth)
    qual_res = {
        k: v for (k, v) in qual_res.items() if "naive" in k
    }  # use the naive implementation of AlphaPrecision
    qual_score = np.mean(list(qual_res.values()))

    xgb_evaluator = eval_performance.PerformanceEvaluatorXGB()
    linear_evaluator = eval_performance.PerformanceEvaluatorLinear()
    mlp_evaluator = eval_performance.PerformanceEvaluatorMLP()
    xgb_score = xgb_evaluator.evaluate(X_test, X_synth)
    linear_score = linear_evaluator.evaluate(X_test, X_synth)
    mlp_score = mlp_evaluator.evaluate(X_test, X_synth)
    gt_perf = (xgb_score["gt"] + linear_score["gt"] + mlp_score["gt"]) / 3
    synth_perf = (
        xgb_score["syn_ood"] + linear_score["syn_ood"] + mlp_score["syn_ood"]
    ) / 3

    xgb_detector = eval_detection.SyntheticDetectionXGB()
    mlp_detector = eval_detection.SyntheticDetectionMLP()
    gmm_detector = eval_detection.SyntheticDetectionGMM()
    xgb_det = xgb_detector.evaluate(X_test, X_synth)
    mlp_det = mlp_detector.evaluate(X_test, X_synth)
    gmm_det = gmm_detector.evaluate(X_test, X_synth)
    det_score = (xgb_det["mean"] + mlp_det["mean"] + gmm_det["mean"]) / 3

    return qual_score, (gt_perf, synth_perf), det_score

### Original dataset

As a benchmark we want to first train the downstream model on the original dataset.

In [11]:
eval_model(dataset_train, dataset_test)

{'precision': 0.8834394904458599,
 'recall': 0.9222074468085106,
 'auroc': 0.7766279169526424,
 'dp': 0.19207586973309976,
 'ftu': 0.02300000000000002}

In the following sections we train various models in order to reproduce the results from Table 2 of the DECAF paper.

### GAN

In [31]:
synth_data = train_vanilla_gan(dataset_train)
synth_data.head()

2023-07-24 13:43:38.290828: I tensorflow/compiler/jit/xla_cpu_device.cc:41] Not creating XLA devices, tf_xla_enable_xla_devices not set
2023-07-24 13:43:38.557989: W tensorflow/stream_executor/platform/default/dso_loader.cc:60] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory
2023-07-24 13:43:38.564195: W tensorflow/stream_executor/cuda/cuda_driver.cc:326] failed call to cuInit: UNKNOWN ERROR (303)
2023-07-24 13:43:38.573554: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (LAPTOP-V7B5NBEP): /proc/driver/nvidia/version does not exist
2023-07-24 13:43:40.115972: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F FMA
To enable them in other operations, rebuild TensorFlow 

Epoch 17:  40%|████      | 271/676 [116:04:05<173:27:34, 1541.86s/it, loss=-0.173]


  0%|          | 0/50 [00:00<?, ?it/s]2023-07-24 13:43:47.621733: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:116] None of the MLIR optimization passes are enabled (registered 2)
2023-07-24 13:43:47.655306: I tensorflow/core/platform/profile_utils/cpu_utils.cc:112] CPU Frequency: 2419205000 Hz


0 [D loss: 0.000159, acc.: 100.00%] [G loss: 40.029396]


  2%|▏         | 1/50 [00:27<22:31, 27.58s/it]

generated_data


  4%|▍         | 2/50 [00:49<19:33, 24.45s/it]

1 [D loss: 0.000004, acc.: 100.00%] [G loss: 69.241051]


  6%|▌         | 3/50 [01:13<18:56, 24.19s/it]

2 [D loss: 0.000001, acc.: 100.00%] [G loss: 89.717522]


  8%|▊         | 4/50 [01:36<18:15, 23.82s/it]

3 [D loss: 0.000001, acc.: 100.00%] [G loss: 112.376114]


 10%|█         | 5/50 [02:01<18:04, 24.09s/it]

4 [D loss: 0.000001, acc.: 100.00%] [G loss: 126.050262]


 12%|█▏        | 6/50 [02:22<16:57, 23.12s/it]

5 [D loss: 0.000000, acc.: 100.00%] [G loss: 128.806671]


 14%|█▍        | 7/50 [02:41<15:38, 21.83s/it]

6 [D loss: 0.000003, acc.: 100.00%] [G loss: 138.328171]


 16%|█▌        | 8/50 [03:01<14:47, 21.12s/it]

7 [D loss: 0.000000, acc.: 100.00%] [G loss: 151.230377]


 18%|█▊        | 9/50 [03:23<14:33, 21.31s/it]

8 [D loss: 0.000000, acc.: 100.00%] [G loss: 164.476807]


 20%|██        | 10/50 [03:44<14:10, 21.26s/it]

9 [D loss: 0.000000, acc.: 100.00%] [G loss: 184.275070]


 22%|██▏       | 11/50 [04:07<14:15, 21.93s/it]

10 [D loss: 0.000000, acc.: 100.00%] [G loss: 171.705780]


 24%|██▍       | 12/50 [04:29<13:49, 21.84s/it]

11 [D loss: 0.000000, acc.: 100.00%] [G loss: 196.511871]


 26%|██▌       | 13/50 [04:48<13:00, 21.10s/it]

12 [D loss: 0.000000, acc.: 100.00%] [G loss: 198.392395]


 28%|██▊       | 14/50 [05:08<12:20, 20.57s/it]

13 [D loss: 0.000001, acc.: 100.00%] [G loss: 201.853607]


 30%|███       | 15/50 [05:29<12:07, 20.78s/it]

14 [D loss: 0.000000, acc.: 100.00%] [G loss: 223.310440]


 32%|███▏      | 16/50 [05:52<12:11, 21.51s/it]

15 [D loss: 0.000004, acc.: 100.00%] [G loss: 228.240662]


 34%|███▍      | 17/50 [06:14<11:51, 21.55s/it]

16 [D loss: 0.000000, acc.: 100.00%] [G loss: 221.520203]


 36%|███▌      | 18/50 [06:37<11:40, 21.88s/it]

17 [D loss: 0.000000, acc.: 100.00%] [G loss: 224.175293]


 38%|███▊      | 19/50 [07:04<12:08, 23.49s/it]

18 [D loss: 0.000001, acc.: 100.00%] [G loss: 230.776550]


 40%|████      | 20/50 [07:29<12:04, 24.15s/it]

19 [D loss: 0.000000, acc.: 100.00%] [G loss: 250.125916]


 42%|████▏     | 21/50 [07:50<11:12, 23.20s/it]

20 [D loss: 0.000000, acc.: 100.00%] [G loss: 249.634354]


 44%|████▍     | 22/50 [08:09<10:09, 21.75s/it]

21 [D loss: 0.000000, acc.: 100.00%] [G loss: 258.159851]


 46%|████▌     | 23/50 [08:27<09:19, 20.74s/it]

22 [D loss: 0.000000, acc.: 100.00%] [G loss: 269.017944]


 48%|████▊     | 24/50 [08:49<09:07, 21.05s/it]

23 [D loss: 0.000000, acc.: 100.00%] [G loss: 257.279541]


 50%|█████     | 25/50 [09:13<09:10, 22.01s/it]

24 [D loss: 0.000003, acc.: 100.00%] [G loss: 265.822815]


 52%|█████▏    | 26/50 [09:31<08:21, 20.89s/it]

25 [D loss: 0.000000, acc.: 100.00%] [G loss: 303.679993]


 54%|█████▍    | 27/50 [09:47<07:22, 19.25s/it]

26 [D loss: 0.000000, acc.: 100.00%] [G loss: 277.542603]


 56%|█████▌    | 28/50 [10:02<06:38, 18.13s/it]

27 [D loss: 0.000000, acc.: 100.00%] [G loss: 286.166229]


 58%|█████▊    | 29/50 [10:18<06:04, 17.35s/it]

28 [D loss: 0.000000, acc.: 100.00%] [G loss: 260.385101]


 60%|██████    | 30/50 [10:34<05:39, 16.95s/it]

29 [D loss: 0.000000, acc.: 100.00%] [G loss: 270.920013]


 62%|██████▏   | 31/50 [10:49<05:10, 16.35s/it]

30 [D loss: 0.000000, acc.: 100.00%] [G loss: 316.056274]


 64%|██████▍   | 32/50 [11:04<04:46, 15.89s/it]

31 [D loss: 0.000000, acc.: 100.00%] [G loss: 296.471771]


 66%|██████▌   | 33/50 [11:19<04:26, 15.67s/it]

32 [D loss: 0.000000, acc.: 100.00%] [G loss: 287.187866]


 68%|██████▊   | 34/50 [11:35<04:11, 15.75s/it]

33 [D loss: 0.000000, acc.: 100.00%] [G loss: 307.802979]


 70%|███████   | 35/50 [11:50<03:54, 15.64s/it]

34 [D loss: 0.000001, acc.: 100.00%] [G loss: 298.326385]


 72%|███████▏  | 36/50 [12:09<03:52, 16.60s/it]

35 [D loss: 0.000000, acc.: 100.00%] [G loss: 339.156494]


 74%|███████▍  | 37/50 [12:25<03:31, 16.29s/it]

36 [D loss: 0.052112, acc.: 99.61%] [G loss: 330.582794]


 76%|███████▌  | 38/50 [12:45<03:28, 17.39s/it]

37 [D loss: 0.000000, acc.: 100.00%] [G loss: 316.654327]


 78%|███████▊  | 39/50 [13:05<03:21, 18.28s/it]

38 [D loss: 0.000000, acc.: 100.00%] [G loss: 347.652893]


 80%|████████  | 40/50 [13:23<03:02, 18.25s/it]

39 [D loss: 0.000000, acc.: 100.00%] [G loss: 324.904053]


 82%|████████▏ | 41/50 [13:41<02:42, 18.11s/it]

40 [D loss: 0.000000, acc.: 100.00%] [G loss: 366.127411]


 84%|████████▍ | 42/50 [13:59<02:24, 18.06s/it]

41 [D loss: 0.000000, acc.: 100.00%] [G loss: 324.657715]


 86%|████████▌ | 43/50 [14:18<02:08, 18.29s/it]

42 [D loss: 0.000001, acc.: 100.00%] [G loss: 356.669373]


 88%|████████▊ | 44/50 [14:34<01:45, 17.59s/it]

43 [D loss: 0.000000, acc.: 100.00%] [G loss: 347.197571]


 90%|█████████ | 45/50 [14:48<01:23, 16.68s/it]

44 [D loss: 0.000000, acc.: 100.00%] [G loss: 354.199738]


 92%|█████████▏| 46/50 [15:05<01:06, 16.67s/it]

45 [D loss: 0.000000, acc.: 100.00%] [G loss: 363.670044]


 94%|█████████▍| 47/50 [15:30<00:57, 19.26s/it]

46 [D loss: 0.000000, acc.: 100.00%] [G loss: 329.782135]


 96%|█████████▌| 48/50 [15:54<00:41, 20.62s/it]

47 [D loss: 0.000000, acc.: 100.00%] [G loss: 361.226196]


 98%|█████████▊| 49/50 [16:15<00:20, 20.86s/it]

48 [D loss: 0.000000, acc.: 100.00%] [G loss: 356.410614]


100%|██████████| 50/50 [16:36<00:00, 19.94s/it]


49 [D loss: 0.000000, acc.: 100.00%] [G loss: 381.815826]


Synthetic data generation: 100%|██████████| 338/338 [00:06<00:00, 53.68it/s]


Unnamed: 0,age,workclass,fnlwgt,education,education-num,marital-status,occupation,relationship,race,sex,capital-gain,capital-loss,hours-per-week,native-country,income
0,-0.000553,0.666667,0.014988,0.333333,0.133333,0.333333,0.846154,0.6,0.0,1.0,0.0261,-0.034176,-0.017084,0.425,0.0
1,-0.000553,0.333333,0.014988,0.666667,0.533333,0.333333,0.846154,1.0,0.75,1.0,0.0261,-0.034176,-0.017084,0.275,1.0
2,-0.000553,0.833333,0.014988,0.066667,0.933333,0.666667,0.0,0.2,0.25,1.0,0.0261,-0.034176,-0.017084,0.15,1.0
3,-0.000553,0.166667,0.014988,0.466667,0.8,0.166667,0.230769,0.4,0.0,0.0,0.0261,-0.034176,-0.017084,0.35,1.0
4,-0.000553,0.833333,0.014988,0.4,0.533333,0.5,0.923077,0.0,0.5,0.0,0.0261,-0.034176,-0.017084,0.475,1.0


In [10]:
eval_model(synth_data, dataset_test)

{'precision': 0.6971736204576043,
 'recall': 0.6888297872340425,
 'auroc': 0.39078586135895677,
 'dp': 0.7117996939976097,
 'ftu': 0.47}

### WGAN-GP

In [11]:
synth_data = train_wgan_gp(dataset_train)
synth_data.head()

  2%|▏         | 1/50 [00:05<04:48,  5.88s/it]

Epoch: 0 | disc_loss: 0.15245389938354492 | gen_loss: -0.06989498436450958


  4%|▍         | 2/50 [00:10<03:54,  4.89s/it]

Epoch: 1 | disc_loss: 0.0009180586785078049 | gen_loss: -0.0033318146597594023


  6%|▌         | 3/50 [00:14<03:36,  4.60s/it]

Epoch: 2 | disc_loss: -0.004699191078543663 | gen_loss: 0.013895398937165737


  8%|▊         | 4/50 [00:18<03:23,  4.42s/it]

Epoch: 3 | disc_loss: 0.4139225482940674 | gen_loss: 0.014603140763938427


 10%|█         | 5/50 [00:22<03:14,  4.33s/it]

Epoch: 4 | disc_loss: 1.4078881740570068 | gen_loss: 0.008926557376980782


 12%|█▏        | 6/50 [00:27<03:11,  4.34s/it]

Epoch: 5 | disc_loss: -0.012374036014080048 | gen_loss: 0.04716317355632782


 14%|█▍        | 7/50 [00:32<03:16,  4.57s/it]

Epoch: 6 | disc_loss: 1.080539345741272 | gen_loss: -0.0200481116771698


 16%|█▌        | 8/50 [00:36<03:09,  4.50s/it]

Epoch: 7 | disc_loss: 0.46246033906936646 | gen_loss: 0.014931818470358849


 18%|█▊        | 9/50 [00:40<03:02,  4.46s/it]

Epoch: 8 | disc_loss: 1.791371464729309 | gen_loss: 0.03856757655739784


 20%|██        | 10/50 [00:45<03:01,  4.54s/it]

Epoch: 9 | disc_loss: 0.133454367518425 | gen_loss: 0.07898350059986115


 22%|██▏       | 11/50 [00:49<02:54,  4.47s/it]

Epoch: 10 | disc_loss: -0.056419532746076584 | gen_loss: 0.029874488711357117


 24%|██▍       | 12/50 [00:54<02:48,  4.43s/it]

Epoch: 11 | disc_loss: 3.5388360023498535 | gen_loss: 0.047961827367544174


 26%|██▌       | 13/50 [00:59<02:49,  4.58s/it]

Epoch: 12 | disc_loss: -0.036514509469270706 | gen_loss: 0.0739879384636879


 28%|██▊       | 14/50 [01:03<02:44,  4.57s/it]

Epoch: 13 | disc_loss: -0.043887440115213394 | gen_loss: 0.05410666763782501


 30%|███       | 15/50 [01:10<02:59,  5.14s/it]

Epoch: 14 | disc_loss: -0.060908351093530655 | gen_loss: 0.06259451061487198


 32%|███▏      | 16/50 [01:16<03:09,  5.57s/it]

Epoch: 15 | disc_loss: -0.008532661944627762 | gen_loss: 0.06303340196609497


 34%|███▍      | 17/50 [01:21<02:53,  5.24s/it]

Epoch: 16 | disc_loss: -0.05893365666270256 | gen_loss: 0.05481185391545296


 36%|███▌      | 18/50 [01:25<02:37,  4.91s/it]

Epoch: 17 | disc_loss: -0.06201693415641785 | gen_loss: -0.008117406629025936


 38%|███▊      | 19/50 [01:29<02:25,  4.70s/it]

Epoch: 18 | disc_loss: -0.07819495350122452 | gen_loss: 0.014502013102173805


 40%|████      | 20/50 [01:33<02:15,  4.52s/it]

Epoch: 19 | disc_loss: -0.06925803422927856 | gen_loss: 0.02424549125134945


 42%|████▏     | 21/50 [01:38<02:12,  4.59s/it]

Epoch: 20 | disc_loss: 0.05352470278739929 | gen_loss: 0.04082176834344864


 44%|████▍     | 22/50 [01:42<02:08,  4.58s/it]

Epoch: 21 | disc_loss: 0.19452141225337982 | gen_loss: 0.017852380871772766


 46%|████▌     | 23/50 [01:47<02:04,  4.62s/it]

Epoch: 22 | disc_loss: 0.7697275876998901 | gen_loss: -0.018885279074311256


 48%|████▊     | 24/50 [01:52<02:01,  4.67s/it]

Epoch: 23 | disc_loss: -0.07032063603401184 | gen_loss: 0.07426878809928894


 50%|█████     | 25/50 [01:56<01:54,  4.57s/it]

Epoch: 24 | disc_loss: -0.07268654555082321 | gen_loss: 0.036055222153663635


 52%|█████▏    | 26/50 [02:00<01:47,  4.47s/it]

Epoch: 25 | disc_loss: 4.07672643661499 | gen_loss: 0.02717735432088375


 54%|█████▍    | 27/50 [02:05<01:42,  4.47s/it]

Epoch: 26 | disc_loss: -0.08391407877206802 | gen_loss: 0.046227939426898956


 56%|█████▌    | 28/50 [02:10<01:39,  4.53s/it]

Epoch: 27 | disc_loss: -0.02380479872226715 | gen_loss: 0.03740321844816208


 58%|█████▊    | 29/50 [02:15<01:40,  4.81s/it]

Epoch: 28 | disc_loss: 0.025824055075645447 | gen_loss: -0.0008147454354912043


 60%|██████    | 30/50 [02:21<01:40,  5.01s/it]

Epoch: 29 | disc_loss: 0.019960414618253708 | gen_loss: 0.0661730095744133


 62%|██████▏   | 31/50 [02:26<01:36,  5.05s/it]

Epoch: 30 | disc_loss: -0.06524480134248734 | gen_loss: -0.016044048592448235


 64%|██████▍   | 32/50 [02:31<01:29,  5.00s/it]

Epoch: 31 | disc_loss: -0.08846060186624527 | gen_loss: 0.03986186906695366


 66%|██████▌   | 33/50 [02:35<01:23,  4.92s/it]

Epoch: 32 | disc_loss: -0.08835487812757492 | gen_loss: 0.03278104215860367


 68%|██████▊   | 34/50 [02:40<01:17,  4.82s/it]

Epoch: 33 | disc_loss: -0.060576777905225754 | gen_loss: 0.03870237246155739


 70%|███████   | 35/50 [02:44<01:10,  4.69s/it]

Epoch: 34 | disc_loss: 0.2938733696937561 | gen_loss: 0.04040674865245819


 72%|███████▏  | 36/50 [02:49<01:07,  4.80s/it]

Epoch: 35 | disc_loss: -0.07837940007448196 | gen_loss: 0.04813133180141449


 74%|███████▍  | 37/50 [02:54<01:02,  4.80s/it]

Epoch: 36 | disc_loss: 0.9505723714828491 | gen_loss: -0.008810448460280895


 76%|███████▌  | 38/50 [02:59<00:57,  4.82s/it]

Epoch: 37 | disc_loss: -0.0900762602686882 | gen_loss: 0.04091276228427887


 78%|███████▊  | 39/50 [03:03<00:51,  4.64s/it]

Epoch: 38 | disc_loss: 0.31794506311416626 | gen_loss: 0.0437326580286026


 80%|████████  | 40/50 [03:07<00:44,  4.47s/it]

Epoch: 39 | disc_loss: -0.09149685502052307 | gen_loss: 0.02371656522154808


 82%|████████▏ | 41/50 [03:11<00:39,  4.37s/it]

Epoch: 40 | disc_loss: 0.10043340921401978 | gen_loss: 0.1226792186498642


 84%|████████▍ | 42/50 [03:15<00:34,  4.25s/it]

Epoch: 41 | disc_loss: -0.05715509504079819 | gen_loss: 0.04945666715502739


 86%|████████▌ | 43/50 [03:20<00:29,  4.22s/it]

Epoch: 42 | disc_loss: -0.08194859325885773 | gen_loss: 0.015959346666932106


 88%|████████▊ | 44/50 [03:24<00:25,  4.30s/it]

Epoch: 43 | disc_loss: -0.05691428855061531 | gen_loss: 0.04214879125356674


 90%|█████████ | 45/50 [03:29<00:22,  4.57s/it]

Epoch: 44 | disc_loss: -0.05352303385734558 | gen_loss: 0.04443015158176422


 92%|█████████▏| 46/50 [03:34<00:17,  4.49s/it]

Epoch: 45 | disc_loss: -0.07117447257041931 | gen_loss: 0.08183664828538895


 94%|█████████▍| 47/50 [03:38<00:13,  4.39s/it]

Epoch: 46 | disc_loss: -0.0995122417807579 | gen_loss: 0.05079025402665138


 96%|█████████▌| 48/50 [03:42<00:08,  4.33s/it]

Epoch: 47 | disc_loss: 0.03659405559301376 | gen_loss: 0.04000601917505264


 98%|█████████▊| 49/50 [03:46<00:04,  4.28s/it]

Epoch: 48 | disc_loss: -0.08710119128227234 | gen_loss: 0.041870132088661194


100%|██████████| 50/50 [03:51<00:00,  4.62s/it]


Epoch: 49 | disc_loss: -0.10412894934415817 | gen_loss: 0.05354977026581764


Synthetic data generation: 100%|██████████| 87/87 [00:01<00:00, 69.38it/s]


Unnamed: 0,age,workclass,fnlwgt,education,education-num,marital-status,occupation,relationship,race,sex,capital-gain,capital-loss,hours-per-week,native-country,income
0,0.41516,0.333333,0.1362,0.333333,0.6,0.166667,0.307692,1.0,1.0,1.0,2.1e-05,0.04194,0.407321,0.375,0.0
1,0.452994,0.666667,0.128935,0.4,0.8,0.5,0.846154,0.2,0.75,0.0,-0.026776,0.029268,0.403623,0.1,0.0
2,0.500516,0.166667,0.204948,0.066667,0.933333,0.666667,1.0,0.0,0.0,1.0,-0.028636,0.001469,0.480637,0.4,0.0
3,0.208567,0.5,0.118594,0.6,0.8,0.833333,0.307692,0.0,1.0,0.0,-0.027901,0.015777,0.280495,0.475,0.0
4,0.430072,0.166667,0.126853,0.666667,0.466667,0.0,0.384615,0.0,0.0,0.0,-0.030149,0.020552,0.380429,0.65,1.0


In [12]:
eval_model(synth_data, dataset_test)

{'precision': 0.7207890743550834,
 'recall': 0.3158244680851064,
 'auroc': 0.47242836307481123,
 'dp': 0.10060642419000054,
 'ftu': 0.14}

### FairGAN

In [13]:
synth_data = train_fairgan(dataset_train)
synth_data.head()

Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
cache/adult.npy


2022-02-04 17:11:43.219249: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:196] None of the MLIR optimization passes are enabled (registered 0 passes)


Pretrain_Epoch:0, trainLoss:0.018377, validLoss:0.007400, validReverseLoss:0.000000
Pretrain_Epoch:1, trainLoss:0.007770, validLoss:0.007489, validReverseLoss:0.000000
Pretrain_Epoch:2, trainLoss:0.007857, validLoss:0.007542, validReverseLoss:0.000000
Pretrain_Epoch:3, trainLoss:0.007895, validLoss:0.007547, validReverseLoss:0.000000
Pretrain_Epoch:4, trainLoss:0.007891, validLoss:0.006571, validReverseLoss:0.000000
Pretrain_Epoch:5, trainLoss:0.002940, validLoss:0.002490, validReverseLoss:0.000000
Pretrain_Epoch:6, trainLoss:0.002588, validLoss:0.002484, validReverseLoss:0.000000
Pretrain_Epoch:7, trainLoss:0.002579, validLoss:0.002462, validReverseLoss:0.000000
Pretrain_Epoch:8, trainLoss:0.002581, validLoss:0.002461, validReverseLoss:0.000000
Pretrain_Epoch:9, trainLoss:0.002567, validLoss:0.002443, validReverseLoss:0.000000
Pretrain_Epoch:10, trainLoss:0.002573, validLoss:0.002476, validReverseLoss:0.000000
Pretrain_Epoch:11, trainLoss:0.002570, validLoss:0.002426, validReverseLoss



Unnamed: 0,age,workclass,fnlwgt,education,education-num,marital-status,occupation,relationship,race,sex,capital-gain,capital-loss,hours-per-week,native-country,income
0,0.0,0.313098,0.0,0.447853,0.307014,0.633629,0.370685,0.0,0.414991,0.0,0.0,0.369372,0.526167,1.0481,1.0
1,0.162797,0.153582,0.032848,0.780304,0.193229,0.46195,0.398713,0.0,0.813397,0.0,0.0,0.385241,0.0,1.102639,1.0
2,0.0,0.208462,0.239274,0.839121,0.0,0.545892,0.40626,0.0,1.180958,0.0,0.0,0.410443,0.0,0.0,1.0
3,0.14955,0.366827,0.022638,0.792689,0.08744,0.2444,0.490716,0.0,0.860048,0.0,0.0,0.332419,0.0,0.363599,1.0
4,0.06353,0.319478,0.718697,0.598897,0.166194,0.573069,0.62833,0.0,1.016383,0.0,0.070529,0.514503,0.0,0.772208,1.0


In [14]:
eval_model(synth_data, dataset_test)

{'precision': 0.773604590505999,
 'recall': 0.9860372340425532,
 'auroc': 0.5555186170212766,
 'dp': 0.01400030693248877,
 'ftu': 0.030999999999999917}

### DECAF

#### DECAF-ND

In [13]:
_, synth_data= train_decaf(dataset_train, dag_seed, dataset_test, generate_test=True)
synth_data.head()

# save synthetic sample
dataset_test.to_csv('x_test_adult.csv')
synth_data.to_csv('x_synth_adult.csv')


Initialised adjacency matrix as parsed:
 Parameter containing:
tensor([[0., 1., 0., 1., 0., 1., 1., 1., 0., 0., 0., 0., 1., 0., 1.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 1., 0., 0., 0., 0., 1., 1., 0., 0., 0., 0., 1., 0., 1.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 1., 0., 1., 0., 0., 1., 1., 0., 0., 0., 0., 1., 0., 1.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1.],
        [0., 0., 0., 1., 0., 1., 1., 0., 0., 0., 0., 0., 1., 0., 1.],
        [0., 1., 0., 1., 0., 1., 1., 1., 0., 0., 0., 0., 0., 0., 1.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1.],
        [0., 1., 0., 1., 0.

In [21]:
eval_model(synth_data, dataset_test)

{'precision': 0.8819599109131403,
 'recall': 0.7898936170212766,
 'auroc': 0.7346645504461221,
 'dp': 0.32534373,
 'ftu': 0.099999964}

In [10]:
# load synthetic and test
X_test = pd.read_csv('x_test_adult.csv', index_col=0)
X_synth_test = pd.read_csv('x_synth_adult.csv', index_col=0)

In [11]:
label = "income"

X_synth_loader = GenericDataLoader(
    X_synth_test,
    target_column=label,
)
X_test_loader = GenericDataLoader(
    X_test,
    target_column=label,
)

res = eval_goggle(X_synth_loader, X_test_loader)

print(f"Quality: {res[0]:.3f}")
print(f"Detection: {res[2]:.3f}")
print(
f"Performance on real: {res[1][0]:.3f}, on synth: {res[1][1]:.3f}, diff: {(res[1][0] - res[1][1]):.3f}")

Quality: 0.572
Detection: 0.698
Performance on real: 0.718, on synth: 0.640, diff: 0.078


#### DECAF-FTU

In [12]:
_, synth_data = train_decaf(dataset_train, dag_seed, dataset_test, biased_edges=bias_dict_ftu, generate_test=True)
synth_data.head()

Initialised adjacency matrix as parsed:
 Parameter containing:
tensor([[0., 1., 0., 1., 0., 1., 1., 1., 0., 0., 0., 0., 1., 0., 1.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 1., 0., 0., 0., 0., 1., 1., 0., 0., 0., 0., 1., 0., 1.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 1., 0., 1., 0., 0., 1., 1., 0., 0., 0., 0., 1., 0., 1.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1.],
        [0., 0., 0., 1., 0., 1., 1., 0., 0., 0., 0., 0., 1., 0., 1.],
        [0., 1., 0., 1., 0., 1., 1., 1., 0., 0., 0., 0., 0., 0., 1.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1.],
        [0., 1., 0., 1., 0.

Unnamed: 0,age,workclass,fnlwgt,education,education-num,marital-status,occupation,relationship,race,sex,capital-gain,capital-loss,hours-per-week,native-country,income
11086,0.495013,1.072265e-07,0.035992,0.269277,0.502386,0.001513,0.066416,0.366714,1.881836e-30,1.0,6e-06,1.258836e-13,0.566528,1.836281e-06,0.0
13040,0.494979,9.606715e-06,0.06779,0.170352,0.508076,0.426382,0.460906,0.980978,0.9967096,0.0,0.010387,0.04444226,0.3388,2.679395e-05,0.0
12915,0.148479,3.297513e-07,0.03157,0.274266,0.811098,0.406156,0.095585,0.312192,0.9700151,1.0,0.000444,7.830165e-11,0.468205,1.945665e-11,1.0
8828,0.132333,0.5430275,0.041796,0.60231,0.509497,0.387588,0.148159,0.82886,4.58491e-29,1.0,0.000146,3.218246e-07,0.306507,0.002295997,1.0
12594,0.170732,0.008479881,0.157833,0.208199,0.650439,0.162866,0.227585,0.257204,1.04455e-05,1.0,0.000869,6.504069e-13,0.297163,2.7777880000000004e-17,1.0


In [13]:
# save synthetic sample
dataset_test.to_csv('x_test_adult_ftu.csv')
synth_data.to_csv('x_synth_adult_ftu.csv')

In [23]:
eval_model(synth_data, dataset_test)

{'precision': 0.8622412562455389,
 'recall': 0.8031914893617021,
 'auroc': 0.7070392930679478,
 'dp': 0.2167862,
 'ftu': 0.0015000105}

In [12]:
# load synthetic and test
X_test = pd.read_csv('x_test_adult_ftu.csv', index_col=0)
X_synth_test = pd.read_csv('x_synth_adult_ftu.csv', index_col=0)

In [13]:
label = "income"

X_synth_loader = GenericDataLoader(
    X_synth_test,
    target_column=label,
)
X_test_loader = GenericDataLoader(
    X_test,
    target_column=label,
)

res = eval_goggle(X_synth_loader, X_test_loader)

print(f"Quality: {res[0]:.3f}")
print(f"Detection: {res[2]:.3f}")
print(
f"Performance on real: {res[1][0]:.3f}, on synth: {res[1][1]:.3f}, diff: {(res[1][0] - res[1][1]):.3f}")

Quality: 0.567
Detection: 0.698
Performance on real: 0.677, on synth: 0.671, diff: 0.006


#### DECAF-CF

In [14]:
_, synth_data = train_decaf(dataset_train, dag_seed, dataset_test, biased_edges=bias_dict_cf, generate_test=True)
synth_data.head()

Initialised adjacency matrix as parsed:
 Parameter containing:
tensor([[0., 1., 0., 1., 0., 1., 1., 1., 0., 0., 0., 0., 1., 0., 1.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 1., 0., 0., 0., 0., 1., 1., 0., 0., 0., 0., 1., 0., 1.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 1., 0., 1., 0., 0., 1., 1., 0., 0., 0., 0., 1., 0., 1.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1.],
        [0., 0., 0., 1., 0., 1., 1., 0., 0., 0., 0., 0., 1., 0., 1.],
        [0., 1., 0., 1., 0., 1., 1., 1., 0., 0., 0., 0., 0., 0., 1.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1.],
        [0., 1., 0., 1., 0.

Unnamed: 0,age,workclass,fnlwgt,education,education-num,marital-status,occupation,relationship,race,sex,capital-gain,capital-loss,hours-per-week,native-country,income
11086,0.494885,1.496933e-05,0.03708,0.068646,0.509228,0.000149,0.419497,0.39327,3.492381e-15,1.0,1.011338e-07,5.85165e-15,0.566989,2.633086e-19,0.0
13040,0.494936,4.529162e-05,0.03578,0.131511,0.692952,0.309115,0.464737,0.440604,0.0002211955,1.0,7.582552e-07,1.849083e-06,0.260473,1.530963e-12,1.0
12915,0.070165,0.001562485,0.230961,0.169594,0.580689,0.194124,0.039334,0.278739,5.385244e-37,1.0,1.844534e-05,1.88866e-12,0.233035,4.324961e-05,1.0
8828,0.494861,5.945465e-07,0.028139,0.09503,0.499025,0.003266,0.117451,0.449909,5.955176e-28,1.0,0.00120484,3.345086e-16,0.57202,1.9139829999999998e-20,0.0
12594,0.494926,0.7365664,0.135249,0.711014,0.771396,6e-06,0.343281,0.350653,8.121139e-31,1.0,2.099754e-07,2.198741e-06,0.420037,5.05975e-11,1.0


In [15]:
# save synthetic sample
dataset_test.to_csv('x_test_adult_cf.csv')
synth_data.to_csv('x_synth_adult_cf.csv')

In [20]:
eval_model(synth_data, dataset_test)

{'precision': 0.7692307692307693,
 'recall': 0.9441489361702128,
 'auroc': 0.5426389842141386,
 'dp': 0.0018555522,
 'ftu': 0.07699996}

In [14]:
# load synthetic and test
X_test = pd.read_csv('x_test_adult_cf.csv', index_col=0)
X_synth_test = pd.read_csv('x_synth_adult_cf.csv', index_col=0)

In [15]:
label = "income"

X_synth_loader = GenericDataLoader(
    X_synth_test,
    target_column=label,
)
X_test_loader = GenericDataLoader(
    X_test,
    target_column=label,
)

res = eval_goggle(X_synth_loader, X_test_loader)

print(f"Quality: {res[0]:.3f}")
print(f"Detection: {res[2]:.3f}")
print(
f"Performance on real: {res[1][0]:.3f}, on synth: {res[1][1]:.3f}, diff: {(res[1][0] - res[1][1]):.3f}")

Quality: 0.571
Detection: 0.708
Performance on real: 0.549, on synth: 0.551, diff: -0.002


#### DECAF-DP

In [9]:
_, synth_data = train_decaf(dataset_train, dag_seed, dataset_test, biased_edges=bias_dict_dp, generate_test=True)
synth_data.head()

Initialised adjacency matrix as parsed:
 Parameter containing:
tensor([[0., 1., 0., 1., 0., 1., 1., 1., 0., 0., 0., 0., 1., 0., 1.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 1., 0., 0., 0., 0., 1., 1., 0., 0., 0., 0., 1., 0., 1.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 1., 0., 1., 0., 0., 1., 1., 0., 0., 0., 0., 1., 0., 1.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1.],
        [0., 0., 0., 1., 0., 1., 1., 0., 0., 0., 0., 0., 1., 0., 1.],
        [0., 1., 0., 1., 0., 1., 1., 1., 0., 0., 0., 0., 0., 0., 1.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1.],
        [0., 1., 0., 1., 0.

Unnamed: 0,age,workclass,fnlwgt,education,education-num,marital-status,occupation,relationship,race,sex,capital-gain,capital-loss,hours-per-week,native-country,income
10434,0.165051,0.004011,0.233374,0.084554,0.509286,0.000177,0.219202,0.338384,1.7636350000000002e-27,1.0,6.160838e-06,0.03714814,0.403032,2.8434000000000002e-18,1.0
7471,0.442097,0.301965,0.038136,0.148809,0.509486,0.009816,0.103833,0.387248,1.8137529999999999e-37,1.0,1.659088e-10,8.341237e-16,0.294574,3.544383e-18,0.0
4377,0.493849,7.6e-05,0.039821,0.130236,0.508173,0.004529,0.700785,0.40584,6.14648e-23,1.0,1.918529e-06,1.647496e-10,0.32796,8.76126e-11,0.0
30452,0.241193,0.016223,0.174061,0.886941,0.699391,0.37164,0.11659,0.771863,0.8329799,1.0,8.071691e-06,4.995499e-12,0.536204,0.0002946659,1.0
9045,0.494885,0.741972,0.077021,0.199371,0.660638,8.6e-05,0.832221,0.372516,0.6276626,1.0,7.832788e-07,0.003580257,0.26929,2.492106e-15,1.0


In [10]:
# save synthetic sample
dataset_test.to_csv('x_test_adult_dp.csv')
synth_data.to_csv('x_synth_adult_dp.csv')

In [22]:
# decaf evaluation
eval_model(synth_data, dataset_test)

{'precision': 0.7696105320899616,
 'recall': 0.932845744680851,
 'auroc': 0.543035775566232,
 'dp': 0.045112073,
 'ftu': 0.021500051}

In [16]:
# load synthetic and test
X_test = pd.read_csv('x_test_adult_dp.csv', index_col=0)
X_synth_test = pd.read_csv('x_synth_adult_dp.csv', index_col=0)

In [17]:
label = "income"

X_synth_loader = GenericDataLoader(
    X_synth_test,
    target_column=label,
)
X_test_loader = GenericDataLoader(
    X_test,
    target_column=label,
)

res = eval_goggle(X_synth_loader, X_test_loader)

print(f"Quality: {res[0]:.3f}")
print(f"Detection: {res[2]:.3f}")
print(
f"Performance on real: {res[1][0]:.3f}, on synth: {res[1][1]:.3f}, diff: {(res[1][0] - res[1][1]):.3f}")

Quality: 0.560
Detection: 0.696
Performance on real: 0.555, on synth: 0.508, diff: 0.047
