<a href="https://colab.research.google.com/github/humayraR/fact-ai_Repl/blob/main/FACT_GAN2_colab.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# from google.colab import drive
# drive.mount('/content/drive')

In [None]:
# !pip install table_evaluator
# !pip install ctgan
# !pip install synthcity
# !pip3 install torch torchvision torchaudio

In [None]:
! git clone https://github.com/humayraR/fact-ai_Repl.git

In [4]:
import sys
sys.path.insert(0,'/content/fact-ai_Repl')

In [None]:
%cd fact-ai_Repl

In [6]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch import nn, optim
from torch.autograd import Variable

from sklearn.model_selection import train_test_split
from sklearn.decomposition import PCA

import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from sklearn import preprocessing
from sklearn.neural_network import MLPClassifier
from sklearn.datasets import make_classification
from sklearn.metrics import precision_score, recall_score, roc_auc_score

from table_evaluator import TableEvaluator
from ctgan import CTGAN

from sklearn.utils._testing import ignore_warnings
from sklearn.exceptions import ConvergenceWarning

import os.path,sys

import warnings

# synthcity absolute
import synthcity.logger as log
from synthcity.plugins.core.dataloader import GenericDataLoader

import PreProcessData

from Metrics import get_metrics

In [7]:
log.add(sink=sys.stderr, level="INFO")
sys.path.append(os.getcwd())

In [8]:
discrete_columns = [
    'workclass',
    'education',
    'marital-status',
    'occupation',
    'relationship',
    'race',
    'sex',
    'native-country',
    'label'
]

# Node order contains the order in which to generate the data, starting with the root nodes
node_order = [['race','age','sex','native-country'],['marital-status'],['education'],['occupation','hours-per-week','workclass','relationship'],['label']]
node_order_nl = ['race','age','sex','native-country','marital-status','education','occupation','hours-per-week','workclass','relationship','label']

# List of connections; key is receiving node
node_connections_normal = {'label':['occupation','race','hours-per-week','age','marital-status','education','sex','workclass','native-country','relatinship'],
                    'occupation':['race','age','sex','marital-status','education'],
                    'hours-per-week':['race','age','marital-status','native-country','education','sex'],
                    'workclass':['age','marital-status','sex','education','native-country'],
                    'relationship':['marital-status','education','age','sex','native-country'],
                    'education':['race','age','marital-status','sex','native-country'],
                    'marital-status':['race','age','sex','native-country']
                    }

'''
Connections are removed according to the privacy criterion
'''
node_connections_FTU = {'label':['occupation','race','hours-per-week','age','marital-status','education','workclass','native-country','relationship'],
                    'occupation':['race','age','sex','marital-status','education'],
                    'hours-per-week':['race','age','marital-status','native-country','education','sex'],
                    'workclass':['age','marital-status','sex','education','native-country'],
                    'relationship':['marital-status','education','age','sex','native-country'],
                    'education':['race','age','marital-status','sex','native-country'],
                    'marital-status':['race','age','sex','native-country']
                    }

node_connections_DP = {'label':['race','age','native-country'],
                    'occupation':['race','age','sex','marital-status','education'],
                    'hours-per-week':['race','age','marital-status','native-country','education','sex'],
                    'workclass':['age','marital-status','sex','education','native-country'],
                    'relationship':['marital-status','education','age','sex','native-country'],
                    'education':['race','age','marital-status','sex','native-country'],
                    'marital-status':['race','age','sex','native-country']
                    }

node_connections_CF = {'label':['occupation','race','hours-per-week','age','education','workclass','native-country',],
                    'occupation':['race','age','sex','marital-status','education'],
                    'hours-per-week':['race','age','marital-status','native-country','education','sex'],
                    'workclass':['age','marital-status','sex','education','native-country'],
                    'relationship':['marital-status','education','age','sex','native-country'],
                    'education':['race','age','marital-status','sex','native-country'],
                    'marital-status':['race','age','sex','native-country']
                    }

In [9]:
@ignore_warnings(category=ConvergenceWarning)
def generate_data(df,mode):
    print("Generating Data for CTGAN-",mode,"...")
    ctgan = CTGAN(epochs=10, verbose=True)
    # How much more data the synthetic dataset should contain that the OG data (This is to ensure we can
    # take a sample that looks like the original data)
    factor = 4

    # Define the privacy measure
    if mode == 'FTU':
        node_connections = node_connections_FTU
    elif mode == 'DP':
        node_connections = node_connections_DP
    elif mode == 'CF':
        node_connections = node_connections_CF
    else:
        print('Mode is not correct!')

    model_name = 'CTGANrootnodes' + str(mode) + '.pkl'
    path = 'Models/' + model_name
    if os.path.isfile(path):
        ctgan = ctgan.load(path)
    else:
        # DF to fit the first model on
        start_df = df[['race','age','sex','native-country']]
        temp_discrete = ['race','age','sex','native-country']

        print("Fitting root nodes...")
        ctgan.fit(start_df, temp_discrete)
        ctgan.save('Models/'+model_name)

    print("Sampling root...")
    synth_df = ctgan.sample(factor * len(df.index))
    #print('Initial nodes loaded for mode',mode)

    # Iteratively generate the data
    for node in node_order_nl:
         # If the node has not been generated yet
        if node not in synth_df.columns:
            # Grab the old data
            empty_df = df[[node]]

            # Grab the attributes that need to be looked at when generating data
            if node in node_connections.keys():
                attributes = node_connections[node]
            else:
                attributes = []
                for n in node_order_nl:
                    attributes.append(n)
                    if n == node:
                        break

            model_name = 'CTGAN' + str(node) + str(mode) + '.pkl'
            path = 'Models/' + model_name
            if os.path.isfile(path):
                ctgan = ctgan.load(path)
                print("Sampling for node ", node, "...")
                generated_data = ctgan.sample(len(synth_df.index))
            else:
                # Grab the attributes from the final df
                gen_df = synth_df.loc[:,synth_df.columns.isin(attributes)]

                # Add the old attribute to the current dataframe
                at = df[attributes]
                empty_df = empty_df.join(at)

                temp_discrete = []
                for d in discrete_columns:
                    if d in gen_df.columns:
                        temp_discrete.append(d)

                print("Fitting for node ", node, "...")
                ctgan.fit(empty_df, temp_discrete)

                model_name = str(node) + str(mode)
                ctgan.save('Models/CTGAN' + model_name + '.pkl')
                print("Sampling for node ", node, "...")
                generated_data = ctgan.sample(len(synth_df.index))

            # Add the generated data to the output
            for attribute in attributes + [node]:
                if attribute not in synth_df.columns:
                    synth_df[attribute] = generated_data[attribute].values
            #print('Finished node',node,'for',mode)
    # Finally, we have to manually add the label
    return synth_df

In [10]:
'''
FTU results for adult dataset
'''
def run_experiment_CTGAN(mode):
    # Generate the synthetic a data
    df = PreProcessData.clean_df('data/adult.data')
    synthetic = generate_data(df,mode)
    # get_metrics(mode,df,synthetic)
    # #reverse metrics
    # get_metrics(mode,synthetic,df)
    return df, synthetic

clean_orig_df = PreProcessData.clean_df('data/adult.data')
traindf_orig_df, test_orig_df = train_test_split(clean_orig_df, test_size=0.95)
test_loader = GenericDataLoader(
    test_orig_df,
    target_column="label",
    sensitive_columns=["race","sex","native-country"],
)

In [11]:
# generate fact gan synthetic data
print("------------------------------------------CTGAN_FTU------------------------------------------------")
orig_ftu, ctgan_ftu_synth = run_experiment_CTGAN('FTU')
print("------------------------------------------CTGAN_DP------------------------------------------------")
orig_dp, ctgan_dp_synth = run_experiment_CTGAN('DP')
print("------------------------------------------CTGAN_CF------------------------------------------------")
orig_cf, ctgan_cf_synth = run_experiment_CTGAN('CF')

------------------------------------------CTGAN_FTU------------------------------------------------
Generating Data for CTGAN- FTU ...
Sampling root...
Sampling for node  marital-status ...
Sampling for node  education ...
Sampling for node  occupation ...
Sampling for node  hours-per-week ...
Sampling for node  workclass ...
Sampling for node  relationship ...
Sampling for node  label ...
------------------------------------------CTGAN_DP------------------------------------------------
Generating Data for CTGAN- DP ...
Sampling root...
Sampling for node  marital-status ...
Sampling for node  education ...
Sampling for node  occupation ...
Sampling for node  hours-per-week ...
Sampling for node  workclass ...
Sampling for node  relationship ...
Sampling for node  label ...
------------------------------------------CTGAN_CF------------------------------------------------
Generating Data for CTGAN- CF ...
Sampling root...
Sampling for node  marital-status ...
Sampling for node  education

In [14]:
# train and test DPGAN on FACTGAN synthetic data
#clean_df = PreProcessData.clean_df('data/adult.data')
from synthcity.plugins.privacy import plugin_dpgan
from synthcity.benchmark import Benchmarks

ftu_loader = GenericDataLoader(
    ctgan_ftu_synth,
    target_column="label",
    sensitive_columns=["race","sex","native-country"],
)

print("------------------------------------------DPGAN_e01_FACT_FTU------------------------------------------------")

ftu_dpgan_e01 = plugin_dpgan.DPGANPlugin(n_iter = 10, n_iter_min=1, epsilon=0.1)
ftu_dpgan_e01.fit(ftu_loader)
ftu_dpgan_e01_df = ftu_dpgan_e01.generate(count=70000).dataframe()
# ftu_dpgan_e01_df.head()
get_metrics("fact_ftu_dpgan_e01",  ftu_dpgan_e01_df, clean_orig_df)
ftu_dpgan_e01_score = Benchmarks.evaluate(
    [("fact_ftu_dpgan_eps_0.1", "dpgan", {"epsilon": 0.1, "n_iter": 10, "n_iter_min": 1})],
    ftu_loader,
    test_loader,
    synthetic_size=70000,
    repeats=1,
    metrics={
        'stats': ['wasserstein_dist', 'prdc'],
        'performance': ['linear_model', 'mlp'],
        'detection': ['detection_mlp','detection_linear'],
        'privacy': ['delta-presence', 'identifiability_score', 'DomiasMIA_prior']
    }
)
Benchmarks.print(ftu_dpgan_e01_score)
print("------------------------------------------DPGAN_e1_FACT_FTU------------------------------------------------")
ftu_dpgan_e1 = plugin_dpgan.DPGANPlugin(n_iter = 10, n_iter_min=1, epsilon=1)
ftu_dpgan_e1.fit(ftu_loader)
ftu_dpgan_e1_df = ftu_dpgan_e1.generate(count=70000).dataframe()
# ftu_dpgan_e1_df.head()
get_metrics("fact_ftu_dpgan_e1", ftu_dpgan_e1_df, clean_orig_df)
ftu_dpgan_e1_score = Benchmarks.evaluate(
    [("fact_ftu_dpgan_eps_1", "dpgan", {"epsilon": 1, "n_iter": 10, "n_iter_min": 1})],
    ftu_loader,
    test_loader,
    synthetic_size=70000,
    repeats=1,
    metrics={
        'stats': ['wasserstein_dist', 'prdc'],
        'performance': ['linear_model', 'mlp'],
        'detection': ['detection_mlp','detection_linear'],
        'privacy': ['delta-presence', 'identifiability_score', 'DomiasMIA_prior']
    }
)
Benchmarks.print(ftu_dpgan_e1_score)
print("------------------------------------------DPGAN_e10_FACT_FTU------------------------------------------------")
ftu_dpgan_e10 = plugin_dpgan.DPGANPlugin(n_iter = 10, n_iter_min=1, epsilon=10)
ftu_dpgan_e10.fit(ftu_loader)
ftu_dpgan_e10_df = ftu_dpgan_e10.generate(count=70000).dataframe()
# ftu_dpgan_e10_df.head()
get_metrics("fact_ftu_dpgan_e10",  ftu_dpgan_e10_df, clean_orig_df)
ftu_dpgan_e10_score = Benchmarks.evaluate(
    [("fact_ftu_dpgan_eps_10", "dpgan", {"epsilon": 10, "n_iter": 10, "n_iter_min": 1})],
    ftu_loader,
    test_loader,
    synthetic_size=70000,
    repeats=1,
    metrics={
        'stats': ['wasserstein_dist', 'prdc'],
        'performance': ['linear_model', 'mlp'],
        'detection': ['detection_mlp','detection_linear'],
        'privacy': ['delta-presence', 'identifiability_score', 'DomiasMIA_prior']
    }
)
Benchmarks.print(ftu_dpgan_e10_score)

#ftu_loader.dataframe().head()

[2023-12-11T21:22:51.430058+0000][5081][INFO] Encoding race 980575506603611130
[2023-12-11T21:22:51.450052+0000][5081][INFO] Encoding age 3143074270685962713


------------------------------------------DPGAN_e01_FACT_FTU------------------------------------------------


[2023-12-11T21:23:00.492437+0000][5081][INFO] Encoding sex 1402589912293389816
[2023-12-11T21:23:00.520616+0000][5081][INFO] Encoding native-country 5076965975144299161
[2023-12-11T21:23:10.143343+0000][5081][INFO] Encoding marital-status 5421424948744979794
[2023-12-11T21:23:10.165935+0000][5081][INFO] Encoding education 27105306833250211
[2023-12-11T21:23:20.480758+0000][5081][INFO] Encoding occupation 4305482576687293178
[2023-12-11T21:23:24.399453+0000][5081][INFO] Encoding hours-per-week 4538452450745217915
[2023-12-11T21:23:32.993494+0000][5081][INFO] Encoding workclass 6553555682093064898
[2023-12-11T21:23:33.036665+0000][5081][INFO] Encoding relationship 4858523675424451021
[2023-12-11T21:23:33.063035+0000][5081][INFO] Encoding label 5518780529886777186
[2023-12-11T21:23:42.882990+0000][5081][INFO] Training GAN on device cpu. features = 58
100%|██████████| 10/10 [46:17<00:00, 277.75s/it]
[2023-12-11T22:10:27.441519+0000][5081][INFO] Testcase : fact_ftu_dpgan_eps_0.1
[2023-12-11

Statistics for dataset for mode: fact_ftu_dpgan_e01
Precision: 0.9285714285714286
Recall: 0.0007651559741024132
AUROC: 0.5001161954617449
FTU: 0.00019892580067634773
DP: -0.0009179068357987283


[2023-12-11T22:10:27.552647+0000][5081][CRITICAL] module disabled: /usr/local/lib/python3.10/dist-packages/synthcity/plugins/generic/plugin_goggle.py
[2023-12-11T22:10:27.552647+0000][5081][CRITICAL] module disabled: /usr/local/lib/python3.10/dist-packages/synthcity/plugins/generic/plugin_goggle.py
[2023-12-11T22:10:36.161564+0000][5081][INFO] Encoding race 5903607830127731946
[2023-12-11T22:10:36.188855+0000][5081][INFO] Encoding age 1368797033530855308
[2023-12-11T22:10:45.574372+0000][5081][INFO] Encoding sex 3212648940009032919
[2023-12-11T22:10:45.605054+0000][5081][INFO] Encoding native-country 4216672985389816999
[2023-12-11T22:10:55.365446+0000][5081][INFO] Encoding marital-status 554911518498903176
[2023-12-11T22:10:55.391556+0000][5081][INFO] Encoding education 5822440566463200974
[2023-12-11T22:11:02.839633+0000][5081][INFO] Encoding occupation 3596874738891532207
[2023-12-11T22:11:10.028673+0000][5081][INFO] Encoding hours-per-week 252749584808491449
[2023-12-11T22:11:13.97


[4m[1mPlugin : fact_ftu_dpgan_eps_0.1[0m[0m


Unnamed: 0,min,max,mean,stddev,median,iqr,rounds,errors,durations
stats.prdc.precision,0.255322,0.255322,0.255322,0.0,0.255322,0.0,1,0,90.06
stats.prdc.recall,0.321386,0.321386,0.321386,0.0,0.321386,0.0,1,0,90.06
stats.prdc.density,0.101563,0.101563,0.101563,0.0,0.101563,0.0,1,0,90.06
stats.prdc.coverage,0.009144,0.009144,0.009144,0.0,0.009144,0.0,1,0,90.06
performance.linear_model.gt,0.81418,0.81418,0.81418,0.0,0.81418,0.0,1,0,2.48
performance.linear_model.syn_id,0.508603,0.508603,0.508603,0.0,0.508603,0.0,1,0,2.48
performance.linear_model.syn_ood,0.501654,0.501654,0.501654,0.0,0.501654,0.0,1,0,2.48
performance.mlp.gt,0.499622,0.499622,0.499622,0.0,0.499622,0.0,1,0,287.05
performance.mlp.syn_id,0.629957,0.629957,0.629957,0.0,0.629957,0.0,1,0,287.05
performance.mlp.syn_ood,0.640993,0.640993,0.640993,0.0,0.640993,0.0,1,0,287.05


[2023-12-11T22:57:27.789079+0000][5081][INFO] Encoding race 980575506603611130
[2023-12-11T22:57:27.811405+0000][5081][INFO] Encoding age 3143074270685962713



------------------------------------------DPGAN_e1_FACT_FTU------------------------------------------------


[2023-12-11T22:57:37.213905+0000][5081][INFO] Encoding sex 1402589912293389816
[2023-12-11T22:57:37.238807+0000][5081][INFO] Encoding native-country 5076965975144299161
[2023-12-11T22:57:45.929975+0000][5081][INFO] Encoding marital-status 5421424948744979794
[2023-12-11T22:57:45.958115+0000][5081][INFO] Encoding education 27105306833250211
[2023-12-11T22:57:55.798413+0000][5081][INFO] Encoding occupation 4305482576687293178
[2023-12-11T22:57:58.555583+0000][5081][INFO] Encoding hours-per-week 4538452450745217915
[2023-12-11T22:58:03.680072+0000][5081][INFO] Encoding workclass 6553555682093064898
[2023-12-11T22:58:03.714411+0000][5081][INFO] Encoding relationship 4858523675424451021
[2023-12-11T22:58:03.740173+0000][5081][INFO] Encoding label 5518780529886777186
[2023-12-11T22:58:11.846979+0000][5081][INFO] Training GAN on device cpu. features = 58
100%|██████████| 10/10 [52:07<00:00, 312.78s/it]
[2023-12-11T23:58:35.525903+0000][5081][INFO] Testcase : fact_ftu_dpgan_eps_1
[2023-12-11T2

Statistics for dataset for mode: fact_ftu_dpgan_e1
Precision: 0.0
Recall: 0.0
AUROC: 0.5
FTU: 0.0
DP: 0.0


[2023-12-11T23:58:35.680446+0000][5081][INFO] Encoding race 5903607830127731946
[2023-12-11T23:58:35.698668+0000][5081][INFO] Encoding age 1368797033530855308
[2023-12-11T23:58:42.933582+0000][5081][INFO] Encoding sex 3212648940009032919
[2023-12-11T23:58:42.955838+0000][5081][INFO] Encoding native-country 4216672985389816999
[2023-12-11T23:58:50.048887+0000][5081][INFO] Encoding marital-status 554911518498903176
[2023-12-11T23:58:50.072305+0000][5081][INFO] Encoding education 5822440566463200974
[2023-12-11T23:58:57.536891+0000][5081][INFO] Encoding occupation 3596874738891532207
[2023-12-11T23:59:04.230373+0000][5081][INFO] Encoding hours-per-week 252749584808491449
[2023-12-11T23:59:08.140905+0000][5081][INFO] Encoding workclass 1828369493076866575
[2023-12-11T23:59:08.182837+0000][5081][INFO] Encoding relationship 3691594330515470789
[2023-12-11T23:59:08.203849+0000][5081][INFO] Encoding label 4288111986279088948
[2023-12-11T23:59:13.561848+0000][5081][INFO] Training GAN on device 


[4m[1mPlugin : fact_ftu_dpgan_eps_1[0m[0m


Unnamed: 0,min,max,mean,stddev,median,iqr,rounds,errors,durations
stats.prdc.precision,0.367977,0.367977,0.367977,0.0,0.367977,0.0,1,0,74.48
stats.prdc.recall,0.530746,0.530746,0.530746,0.0,0.530746,0.0,1,0,74.48
stats.prdc.density,0.175263,0.175263,0.175263,0.0,0.175263,0.0,1,0,74.48
stats.prdc.coverage,0.010505,0.010505,0.010505,0.0,0.010505,0.0,1,0,74.48
performance.linear_model.gt,0.81418,0.81418,0.81418,0.0,0.81418,0.0,1,0,2.34
performance.linear_model.syn_id,0.406184,0.406184,0.406184,0.0,0.406184,0.0,1,0,2.34
performance.linear_model.syn_ood,0.406405,0.406405,0.406405,0.0,0.406405,0.0,1,0,2.34
performance.mlp.gt,0.499622,0.499622,0.499622,0.0,0.499622,0.0,1,0,305.96
performance.mlp.syn_id,0.668977,0.668977,0.668977,0.0,0.668977,0.0,1,0,305.96
performance.mlp.syn_ood,0.68408,0.68408,0.68408,0.0,0.68408,0.0,1,0,305.96


[2023-12-12T01:01:14.886845+0000][5081][INFO] Encoding race 980575506603611130
[2023-12-12T01:01:14.906960+0000][5081][INFO] Encoding age 3143074270685962713



------------------------------------------DPGAN_e10_FACT_FTU------------------------------------------------


[2023-12-12T01:01:24.030335+0000][5081][INFO] Encoding sex 1402589912293389816
[2023-12-12T01:01:24.054682+0000][5081][INFO] Encoding native-country 5076965975144299161
[2023-12-12T01:01:32.602309+0000][5081][INFO] Encoding marital-status 5421424948744979794
[2023-12-12T01:01:32.630926+0000][5081][INFO] Encoding education 27105306833250211
[2023-12-12T01:01:42.998685+0000][5081][INFO] Encoding occupation 4305482576687293178
[2023-12-12T01:01:45.600686+0000][5081][INFO] Encoding hours-per-week 4538452450745217915
[2023-12-12T01:01:50.521398+0000][5081][INFO] Encoding workclass 6553555682093064898
[2023-12-12T01:01:50.549061+0000][5081][INFO] Encoding relationship 4858523675424451021
[2023-12-12T01:01:50.576778+0000][5081][INFO] Encoding label 5518780529886777186
[2023-12-12T01:01:58.577430+0000][5081][INFO] Training GAN on device cpu. features = 58
100%|██████████| 10/10 [52:29<00:00, 314.91s/it]
[2023-12-12T01:59:20.942447+0000][5081][INFO] Testcase : fact_ftu_dpgan_eps_10
[2023-12-12T

Statistics for dataset for mode: fact_ftu_dpgan_e10
Precision: 0.8165137614678899
Recall: 0.005238375515008829
AUROC: 0.49729153725137754
FTU: 0.007691797626152112
DP: 0.00023908537607000152


[2023-12-12T01:59:21.102381+0000][5081][INFO] Encoding race 5903607830127731946
[2023-12-12T01:59:21.118275+0000][5081][INFO] Encoding age 1368797033530855308
[2023-12-12T01:59:27.901205+0000][5081][INFO] Encoding sex 3212648940009032919
[2023-12-12T01:59:27.922940+0000][5081][INFO] Encoding native-country 4216672985389816999
[2023-12-12T01:59:35.537136+0000][5081][INFO] Encoding marital-status 554911518498903176
[2023-12-12T01:59:35.560980+0000][5081][INFO] Encoding education 5822440566463200974
[2023-12-12T01:59:42.657891+0000][5081][INFO] Encoding occupation 3596874738891532207
[2023-12-12T01:59:49.848329+0000][5081][INFO] Encoding hours-per-week 252749584808491449
[2023-12-12T01:59:53.506063+0000][5081][INFO] Encoding workclass 1828369493076866575
[2023-12-12T01:59:53.527570+0000][5081][INFO] Encoding relationship 3691594330515470789
[2023-12-12T01:59:53.551473+0000][5081][INFO] Encoding label 4288111986279088948
[2023-12-12T01:59:58.930399+0000][5081][INFO] Training GAN on device 


[4m[1mPlugin : fact_ftu_dpgan_eps_10[0m[0m


Unnamed: 0,min,max,mean,stddev,median,iqr,rounds,errors,durations
stats.prdc.precision,0.275284,0.275284,0.275284,0.0,0.275284,0.0,1,0,61.46
stats.prdc.recall,0.30952,0.30952,0.30952,0.0,0.30952,0.0,1,0,61.46
stats.prdc.density,0.088923,0.088923,0.088923,0.0,0.088923,0.0,1,0,61.46
stats.prdc.coverage,0.014623,0.014623,0.014623,0.0,0.014623,0.0,1,0,61.46
performance.linear_model.gt,0.81418,0.81418,0.81418,0.0,0.81418,0.0,1,0,2.45
performance.linear_model.syn_id,0.580049,0.580049,0.580049,0.0,0.580049,0.0,1,0,2.45
performance.linear_model.syn_ood,0.583225,0.583225,0.583225,0.0,0.583225,0.0,1,0,2.45
performance.mlp.gt,0.499622,0.499622,0.499622,0.0,0.499622,0.0,1,0,265.07
performance.mlp.syn_id,0.678078,0.678078,0.678078,0.0,0.678078,0.0,1,0,265.07
performance.mlp.syn_ood,0.685756,0.685756,0.685756,0.0,0.685756,0.0,1,0,265.07





In [15]:
# DPGAN test for DP fairness
dp_loader = GenericDataLoader(
    ctgan_dp_synth,
    target_column="label",
    sensitive_columns=["race","sex","native-country"],
)
print("------------------------------------------DPGAN_e01_FACT_DP------------------------------------------------")

dp_dpgan_e01 = plugin_dpgan.DPGANPlugin(n_iter = 10, n_iter_min=1, epsilon=0.1)
dp_dpgan_e01.fit(dp_loader)
dp_dpgan_e01_df = dp_dpgan_e01.generate(count=70000).dataframe()
# dp_dpgan_e01_df.head()
get_metrics("fact_dp_dpgan_e01",  dp_dpgan_e01_df, clean_orig_df)
print("real Vs synthetic")
get_metrics("fact_dp_dpgan_e01", clean_orig_df, dp_dpgan_e01_df)
dp_dpgan_e01_score = Benchmarks.evaluate(
    [("fact_dp_dpgan_eps_0.1", "dpgan", {"epsilon": 0.1, "n_iter": 10, "n_iter_min": 1})],
    dp_loader,
    test_loader,
    synthetic_size=70000,
    repeats=1,
    metrics={
        'stats': ['wasserstein_dist', 'prdc'],
        'performance': ['linear_model', 'mlp'],
        'detection': ['detection_mlp','detection_linear'],
        'privacy': ['delta-presence', 'identifiability_score', 'DomiasMIA_prior']
    }
)
Benchmarks.print(dp_dpgan_e01_score)
print("------------------------------------------DPGAN_e1_FACT_dp------------------------------------------------")
dp_dpgan_e1 = plugin_dpgan.DPGANPlugin(n_iter = 10, n_iter_min=1, epsilon=1)
dp_dpgan_e1.fit(dp_loader)
dp_dpgan_e1_df = dp_dpgan_e1.generate(count=70000).dataframe()
# dp_dpgan_e1_df.head()
get_metrics("fact_dp_dpgan_e1", dp_dpgan_e1_df, clean_orig_df)
print("real Vs synthetic")
get_metrics("fact_dp_dpgan_e1", clean_orig_df, dp_dpgan_e1_df)
dp_dpgan_e1_score = Benchmarks.evaluate(
    [("fact_dp_dpgan_eps_1", "dpgan", {"epsilon": 1, "n_iter": 10, "n_iter_min": 1})],
    dp_loader,
    test_loader,
    synthetic_size=70000,
    repeats=1,
    metrics={
        'stats': ['wasserstein_dist', 'prdc'],
        'performance': ['linear_model', 'mlp'],
        'detection': ['detection_mlp','detection_linear'],
        'privacy': ['delta-presence', 'identifiability_score', 'DomiasMIA_prior']
    }
)
Benchmarks.print(dp_dpgan_e1_score)
print("------------------------------------------DPGAN_e10_FACT_DP------------------------------------------------")
dp_dpgan_e10 = plugin_dpgan.DPGANPlugin(n_iter = 10, n_iter_min=1, epsilon=10)
dp_dpgan_e10.fit(dp_loader)
dp_dpgan_e10_df = dp_dpgan_e10.generate(count=70000).dataframe()
# dp_dpgan_e10_df.head()
get_metrics("fact_dp_dpgan_e10",  dp_dpgan_e10_df, clean_orig_df)
print("real Vs synthetic")
get_metrics("fact_dp_dpgan_e1", clean_orig_df, dp_dpgan_e10_df)
dp_dpgan_e10_score = Benchmarks.evaluate(
    [("fact_dp_dpgan_eps_10", "dpgan", {"epsilon": 10, "n_iter": 10, "n_iter_min": 1})],
    dp_loader,
    test_loader,
    synthetic_size=70000,
    repeats=1,
    metrics={
        'stats': ['wasserstein_dist', 'prdc'],
        'performance': ['linear_model', 'mlp'],
        'detection': ['detection_mlp','detection_linear'],
        'privacy': ['delta-presence', 'identifiability_score', 'DomiasMIA_prior']
    }
)
Benchmarks.print(dp_dpgan_e10_score)


[2023-12-12T05:09:08.672652+0000][5081][INFO] Encoding race 9065076356728488696
[2023-12-12T05:09:08.698631+0000][5081][INFO] Encoding age 6208747529374880019


------------------------------------------DPGAN_e01_FACT_DP------------------------------------------------


[2023-12-12T05:09:17.445224+0000][5081][INFO] Encoding sex 1851472951615219107
[2023-12-12T05:09:17.470302+0000][5081][INFO] Encoding native-country 7036255289221006500
[2023-12-12T05:09:26.434457+0000][5081][INFO] Encoding marital-status 3218313200385875299
[2023-12-12T05:09:26.481524+0000][5081][INFO] Encoding education 2868315221185268437
[2023-12-12T05:09:29.199984+0000][5081][INFO] Encoding occupation 6063568195674813464
[2023-12-12T05:09:35.196195+0000][5081][INFO] Encoding hours-per-week 914016541816534349
[2023-12-12T05:09:38.367070+0000][5081][INFO] Encoding workclass 8163714818108708022
[2023-12-12T05:09:38.390202+0000][5081][INFO] Encoding relationship 4083672952038004195
[2023-12-12T05:09:38.416625+0000][5081][INFO] Encoding label 1610488302958999244
[2023-12-12T05:09:46.413336+0000][5081][INFO] Training GAN on device cpu. features = 58
100%|██████████| 10/10 [38:06<00:00, 228.61s/it]


Statistics for dataset for mode: fact_dp_dpgan_e01
Precision: 0.0
Recall: 0.0
AUROC: 0.5
FTU: 0.0
DP: 0.0
real Vs synthetic


[2023-12-12T05:48:32.502575+0000][5081][INFO] Testcase : fact_dp_dpgan_eps_0.1
[2023-12-12T05:48:32.506939+0000][5081][INFO] [testcase] Experiment repeat: 0 task type: classification Train df hash = 1376007990276252368
[2023-12-12T05:48:32.510160+0000][5081][CRITICAL] module disabled: /usr/local/lib/python3.10/dist-packages/synthcity/plugins/generic/plugin_goggle.py
[2023-12-12T05:48:32.510160+0000][5081][CRITICAL] module disabled: /usr/local/lib/python3.10/dist-packages/synthcity/plugins/generic/plugin_goggle.py
[2023-12-12T05:48:32.510160+0000][5081][CRITICAL] module disabled: /usr/local/lib/python3.10/dist-packages/synthcity/plugins/generic/plugin_goggle.py


Statistics for dataset for mode: fact_dp_dpgan_e01
Precision: 0.05397805590781276
Recall: 0.9989795918367347
AUROC: 0.499955616344593
FTU: 0.0030428571428571427
DP: 0.0029892297888787223


[2023-12-12T05:48:32.648037+0000][5081][INFO] Encoding race 5451338623320239768
[2023-12-12T05:48:32.664683+0000][5081][INFO] Encoding age 707540295154257336
[2023-12-12T05:48:39.131292+0000][5081][INFO] Encoding sex 5148996566422987368
[2023-12-12T05:48:39.154358+0000][5081][INFO] Encoding native-country 1125116509362128978
[2023-12-12T05:48:46.276170+0000][5081][INFO] Encoding marital-status 4727374605210109014
[2023-12-12T05:48:46.300292+0000][5081][INFO] Encoding education 8858064146752350924
[2023-12-12T05:48:48.540947+0000][5081][INFO] Encoding occupation 1017428231169796906
[2023-12-12T05:48:55.108709+0000][5081][INFO] Encoding hours-per-week 8092328960570839087
[2023-12-12T05:48:57.633294+0000][5081][INFO] Encoding workclass 3846893311200031661
[2023-12-12T05:48:57.651926+0000][5081][INFO] Encoding relationship 2576186314039511931
[2023-12-12T05:48:57.674501+0000][5081][INFO] Encoding label 3188358937655774923
[2023-12-12T05:49:02.953186+0000][5081][INFO] Training GAN on device


[4m[1mPlugin : fact_dp_dpgan_eps_0.1[0m[0m


Unnamed: 0,min,max,mean,stddev,median,iqr,rounds,errors,durations
stats.prdc.precision,0.225867,0.225867,0.225867,0.0,0.225867,0.0,1,0,67.57
stats.prdc.recall,0.541984,0.541984,0.541984,0.0,0.541984,0.0,1,0,67.57
stats.prdc.density,0.066825,0.066825,0.066825,0.0,0.066825,0.0,1,0,67.57
stats.prdc.coverage,0.010854,0.010854,0.010854,0.0,0.010854,0.0,1,0,67.57
performance.linear_model.gt,0.81418,0.81418,0.81418,0.0,0.81418,0.0,1,0,2.34
performance.linear_model.syn_id,0.482374,0.482374,0.482374,0.0,0.482374,0.0,1,0,2.34
performance.linear_model.syn_ood,0.47944,0.47944,0.47944,0.0,0.47944,0.0,1,0,2.34
performance.mlp.gt,0.499622,0.499622,0.499622,0.0,0.499622,0.0,1,0,266.28
performance.mlp.syn_id,0.637659,0.637659,0.637659,0.0,0.637659,0.0,1,0,266.28
performance.mlp.syn_ood,0.646301,0.646301,0.646301,0.0,0.646301,0.0,1,0,266.28


[2023-12-12T06:30:44.521903+0000][5081][INFO] Encoding race 9065076356728488696
[2023-12-12T06:30:44.541465+0000][5081][INFO] Encoding age 6208747529374880019



------------------------------------------DPGAN_e1_FACT_dp------------------------------------------------


[2023-12-12T06:30:52.799523+0000][5081][INFO] Encoding sex 1851472951615219107
[2023-12-12T06:30:52.824545+0000][5081][INFO] Encoding native-country 7036255289221006500
[2023-12-12T06:31:04.977246+0000][5081][INFO] Encoding marital-status 3218313200385875299
[2023-12-12T06:31:05.005321+0000][5081][INFO] Encoding education 2868315221185268437
[2023-12-12T06:31:07.628571+0000][5081][INFO] Encoding occupation 6063568195674813464
[2023-12-12T06:31:13.734068+0000][5081][INFO] Encoding hours-per-week 914016541816534349
[2023-12-12T06:31:18.889681+0000][5081][INFO] Encoding workclass 8163714818108708022
[2023-12-12T06:31:18.920536+0000][5081][INFO] Encoding relationship 4083672952038004195
[2023-12-12T06:31:18.955025+0000][5081][INFO] Encoding label 1610488302958999244
[2023-12-12T06:31:30.753122+0000][5081][INFO] Training GAN on device cpu. features = 58
100%|██████████| 10/10 [47:02<00:00, 282.27s/it]


Statistics for dataset for mode: fact_dp_dpgan_e1
Precision: 0.8635743519781719
Recall: 0.07451442024720424
AUROC: 0.4839807050623342
FTU: 0.019693654266958426
DP: -0.01049468744254349
real Vs synthetic


[2023-12-12T07:35:24.405541+0000][5081][INFO] Testcase : fact_dp_dpgan_eps_1
[2023-12-12T07:35:24.409683+0000][5081][INFO] [testcase] Experiment repeat: 0 task type: classification Train df hash = 1376007990276252368
[2023-12-12T07:35:24.415113+0000][5081][CRITICAL] module disabled: /usr/local/lib/python3.10/dist-packages/synthcity/plugins/generic/plugin_goggle.py
[2023-12-12T07:35:24.415113+0000][5081][CRITICAL] module disabled: /usr/local/lib/python3.10/dist-packages/synthcity/plugins/generic/plugin_goggle.py
[2023-12-12T07:35:24.415113+0000][5081][CRITICAL] module disabled: /usr/local/lib/python3.10/dist-packages/synthcity/plugins/generic/plugin_goggle.py


Statistics for dataset for mode: fact_dp_dpgan_e1
Precision: 0.15942302847064752
Recall: 0.8022187004754359
AUROC: 0.49543692805872963
FTU: 0.11827142857142857
DP: 0.12625089073753737


[2023-12-12T07:35:24.567553+0000][5081][INFO] Encoding race 5451338623320239768
[2023-12-12T07:35:24.582256+0000][5081][INFO] Encoding age 707540295154257336
[2023-12-12T07:35:31.441311+0000][5081][INFO] Encoding sex 5148996566422987368
[2023-12-12T07:35:31.461347+0000][5081][INFO] Encoding native-country 1125116509362128978
[2023-12-12T07:35:38.279566+0000][5081][INFO] Encoding marital-status 4727374605210109014
[2023-12-12T07:35:38.304194+0000][5081][INFO] Encoding education 8858064146752350924
[2023-12-12T07:35:40.471103+0000][5081][INFO] Encoding occupation 1017428231169796906
[2023-12-12T07:35:47.832042+0000][5081][INFO] Encoding hours-per-week 8092328960570839087
[2023-12-12T07:35:50.368046+0000][5081][INFO] Encoding workclass 3846893311200031661
[2023-12-12T07:35:50.388124+0000][5081][INFO] Encoding relationship 2576186314039511931
[2023-12-12T07:35:50.409530+0000][5081][INFO] Encoding label 3188358937655774923
[2023-12-12T07:35:55.744145+0000][5081][INFO] Training GAN on device


[4m[1mPlugin : fact_dp_dpgan_eps_1[0m[0m


Unnamed: 0,min,max,mean,stddev,median,iqr,rounds,errors,durations
stats.prdc.precision,0.223983,0.223983,0.223983,0.0,0.223983,0.0,1,0,68.84
stats.prdc.recall,0.556188,0.556188,0.556188,0.0,0.556188,0.0,1,0,68.84
stats.prdc.density,0.080317,0.080317,0.080317,0.0,0.080317,0.0,1,0,68.84
stats.prdc.coverage,0.021742,0.021742,0.021742,0.0,0.021742,0.0,1,0,68.84
performance.linear_model.gt,0.81418,0.81418,0.81418,0.0,0.81418,0.0,1,0,2.72
performance.linear_model.syn_id,0.570529,0.570529,0.570529,0.0,0.570529,0.0,1,0,2.72
performance.linear_model.syn_ood,0.57918,0.57918,0.57918,0.0,0.57918,0.0,1,0,2.72
performance.mlp.gt,0.499622,0.499622,0.499622,0.0,0.499622,0.0,1,0,256.54
performance.mlp.syn_id,0.651792,0.651792,0.651792,0.0,0.651792,0.0,1,0,256.54
performance.mlp.syn_ood,0.658809,0.658809,0.658809,0.0,0.658809,0.0,1,0,256.54


[2023-12-12T08:21:58.866963+0000][5081][INFO] Encoding race 9065076356728488696
[2023-12-12T08:21:58.887234+0000][5081][INFO] Encoding age 6208747529374880019



------------------------------------------DPGAN_e10_FACT_DP------------------------------------------------


[2023-12-12T08:22:06.958567+0000][5081][INFO] Encoding sex 1851472951615219107
[2023-12-12T08:22:06.982919+0000][5081][INFO] Encoding native-country 7036255289221006500
[2023-12-12T08:22:16.548328+0000][5081][INFO] Encoding marital-status 3218313200385875299
[2023-12-12T08:22:16.578001+0000][5081][INFO] Encoding education 2868315221185268437
[2023-12-12T08:22:19.070929+0000][5081][INFO] Encoding occupation 6063568195674813464
[2023-12-12T08:22:24.866359+0000][5081][INFO] Encoding hours-per-week 914016541816534349
[2023-12-12T08:22:28.396123+0000][5081][INFO] Encoding workclass 8163714818108708022
[2023-12-12T08:22:28.419737+0000][5081][INFO] Encoding relationship 4083672952038004195
[2023-12-12T08:22:28.445814+0000][5081][INFO] Encoding label 1610488302958999244
[2023-12-12T08:22:38.439271+0000][5081][INFO] Training GAN on device cpu. features = 58
100%|██████████| 10/10 [49:17<00:00, 295.78s/it]


Statistics for dataset for mode: fact_dp_dpgan_e10
Precision: 0.6666666666666666
Recall: 0.00011771630370806356
AUROC: 0.49979247562654766
FTU: 0.0003315430011272462
DP: 0.0001610561994624693
real Vs synthetic


[2023-12-12T09:14:04.565806+0000][5081][INFO] Testcase : fact_dp_dpgan_eps_10
[2023-12-12T09:14:04.575546+0000][5081][INFO] [testcase] Experiment repeat: 0 task type: classification Train df hash = 1376007990276252368
[2023-12-12T09:14:04.579666+0000][5081][CRITICAL] module disabled: /usr/local/lib/python3.10/dist-packages/synthcity/plugins/generic/plugin_goggle.py
[2023-12-12T09:14:04.579666+0000][5081][CRITICAL] module disabled: /usr/local/lib/python3.10/dist-packages/synthcity/plugins/generic/plugin_goggle.py
[2023-12-12T09:14:04.579666+0000][5081][CRITICAL] module disabled: /usr/local/lib/python3.10/dist-packages/synthcity/plugins/generic/plugin_goggle.py


Statistics for dataset for mode: fact_dp_dpgan_e1
Precision: 0.10119205298013245
Recall: 0.7925311203319502
AUROC: 0.4937028265834871
FTU: 0.08075714285714286
DP: 0.1366542354821524


[2023-12-12T09:14:04.759100+0000][5081][INFO] Encoding race 5451338623320239768
[2023-12-12T09:14:04.775907+0000][5081][INFO] Encoding age 707540295154257336
[2023-12-12T09:14:11.738364+0000][5081][INFO] Encoding sex 5148996566422987368
[2023-12-12T09:14:11.755967+0000][5081][INFO] Encoding native-country 1125116509362128978
[2023-12-12T09:14:18.941711+0000][5081][INFO] Encoding marital-status 4727374605210109014
[2023-12-12T09:14:18.965665+0000][5081][INFO] Encoding education 8858064146752350924
[2023-12-12T09:14:21.130312+0000][5081][INFO] Encoding occupation 1017428231169796906
[2023-12-12T09:14:27.891840+0000][5081][INFO] Encoding hours-per-week 8092328960570839087
[2023-12-12T09:14:30.564034+0000][5081][INFO] Encoding workclass 3846893311200031661
[2023-12-12T09:14:30.584262+0000][5081][INFO] Encoding relationship 2576186314039511931
[2023-12-12T09:14:30.606997+0000][5081][INFO] Encoding label 3188358937655774923
[2023-12-12T09:14:36.095463+0000][5081][INFO] Training GAN on device


[4m[1mPlugin : fact_dp_dpgan_eps_10[0m[0m


Unnamed: 0,min,max,mean,stddev,median,iqr,rounds,errors,durations
stats.prdc.precision,0.260941,0.260941,0.260941,0.0,0.260941,0.0,1,0,60.97
stats.prdc.recall,0.673833,0.673833,0.673833,0.0,0.673833,0.0,1,0,60.97
stats.prdc.density,0.101047,0.101047,0.101047,0.0,0.101047,0.0,1,0,60.97
stats.prdc.coverage,0.043764,0.043764,0.043764,0.0,0.043764,0.0,1,0,60.97
performance.linear_model.gt,0.81418,0.81418,0.81418,0.0,0.81418,0.0,1,0,2.31
performance.linear_model.syn_id,0.383162,0.383162,0.383162,0.0,0.383162,0.0,1,0,2.31
performance.linear_model.syn_ood,0.381311,0.381311,0.381311,0.0,0.381311,0.0,1,0,2.31
performance.mlp.gt,0.499622,0.499622,0.499622,0.0,0.499622,0.0,1,0,280.25
performance.mlp.syn_id,0.631773,0.631773,0.631773,0.0,0.631773,0.0,1,0,280.25
performance.mlp.syn_ood,0.638453,0.638453,0.638453,0.0,0.638453,0.0,1,0,280.25





In [16]:
# DPGAN test for CF fairness
cf_loader = GenericDataLoader(
    ctgan_cf_synth,
    target_column="label",
    sensitive_columns=["race","sex","native-country"],
)
print("------------------------------------------DPGAN_e01_FACT_CF------------------------------------------------")

cf_dpgan_e01 = plugin_dpgan.DPGANPlugin(n_iter = 10, n_iter_min=1, epsilon=0.1)
cf_dpgan_e01.fit(cf_loader)
cf_dpgan_e01_df = cf_dpgan_e01.generate(count=70000).dataframe()
# cf_dpgan_e01_df.head()
get_metrics("fact_cf_dpgan_e01",  cf_dpgan_e01_df, clean_orig_df)
print("real Vs synthetic")
get_metrics("fact_cf_dpgan_e01", clean_orig_df, cf_dpgan_e01_df)
cf_dpgan_e01_score = Benchmarks.evaluate(
    [("fact_cf_dpgan_eps_0.1", "dpgan", {"epsilon": 0.1, "n_iter": 10, "n_iter_min": 1})],
    cf_loader,
    test_loader,
    synthetic_size=70000,
    repeats=1,
    metrics={
        'stats': ['wasserstein_dist', 'prdc'],
        'performance': ['linear_model', 'mlp'],
        'detection': ['detection_mlp','detection_linear'],
        'privacy': ['delta-presence', 'identifiability_score', 'DomiasMIA_prior']
    }
)
Benchmarks.print(cf_dpgan_e01_score)
print("------------------------------------------DPGAN_e1_FACT_CF------------------------------------------------")
cf_dpgan_e1 = plugin_dpgan.DPGANPlugin(n_iter = 10, n_iter_min=1, epsilon=1)
cf_dpgan_e1.fit(cf_loader)
cf_dpgan_e1_df = cf_dpgan_e1.generate(count=70000).dataframe()
# cf_dpgan_e1_df.head()
get_metrics("fact_cf_dpgan_e1", cf_dpgan_e1_df, clean_orig_df)
print("real Vs synthetic")
get_metrics("fact_cf_dpgan_e1", clean_orig_df, cf_dpgan_e1_df)
cf_dpgan_e1_score = Benchmarks.evaluate(
    [("fact_cf_dpgan_eps_1", "dpgan", {"epsilon": 1, "n_iter": 10, "n_iter_min": 1})],
    cf_loader,
    test_loader,
    synthetic_size=70000,
    repeats=1,
    metrics={
        'stats': ['wasserstein_dist', 'prdc'],
        'performance': ['linear_model', 'mlp'],
        'detection': ['detection_mlp','detection_linear'],
        'privacy': ['delta-presence', 'identifiability_score', 'DomiasMIA_prior']
    }
)
Benchmarks.print(cf_dpgan_e1_score)
print("------------------------------------------DPGAN_e10_FACT_CF------------------------------------------------")
cf_dpgan_e10 = plugin_dpgan.DPGANPlugin(n_iter = 10, n_iter_min=1, epsilon=10)
cf_dpgan_e10.fit(cf_loader)
cf_dpgan_e10_df = cf_dpgan_e10.generate(count=70000).dataframe()
# cf_dpgan_e10_df.head()
get_metrics("fact_cf_dpgan_e10",  cf_dpgan_e10_df, clean_orig_df)
print("real Vs synthetic")
get_metrics("fact_cf_dpgan_e10", clean_orig_df, cf_dpgan_e10_df)
cf_dpgan_e10_score = Benchmarks.evaluate(
    [("fact_cf_dpgan_eps_10", "dpgan", {"epsilon": 10, "n_iter": 10, "n_iter_min": 1})],
    cf_loader,
    test_loader,
    synthetic_size=70000,
    repeats=1,
    metrics={
        'stats': ['wasserstein_dist', 'prdc'],
        'performance': ['linear_model', 'mlp'],
        'detection': ['detection_mlp','detection_linear'],
        'privacy': ['delta-presence', 'identifiability_score', 'DomiasMIA_prior']
    }
)
Benchmarks.print(cf_dpgan_e10_score)

[2023-12-12T11:01:52.734799+0000][5081][INFO] Encoding race 8446115687604148934
[2023-12-12T11:01:52.755283+0000][5081][INFO] Encoding age 5539522800782311059


------------------------------------------DPGAN_e01_FACT_CF------------------------------------------------


[2023-12-12T11:02:01.255775+0000][5081][INFO] Encoding sex 2161890776648423100
[2023-12-12T11:02:01.279064+0000][5081][INFO] Encoding native-country 7895309506583119303
[2023-12-12T11:02:06.440022+0000][5081][INFO] Encoding marital-status 6756142300156524802
[2023-12-12T11:02:06.461906+0000][5081][INFO] Encoding education 8107526629005821050
[2023-12-12T11:02:09.899910+0000][5081][INFO] Encoding occupation 8705551952719262958
[2023-12-12T11:02:18.690919+0000][5081][INFO] Encoding hours-per-week 8039307329332527231
[2023-12-12T11:02:27.529849+0000][5081][INFO] Encoding workclass 1293968098938892497
[2023-12-12T11:02:27.561285+0000][5081][INFO] Encoding relationship 4415120459762814786
[2023-12-12T11:02:27.589381+0000][5081][INFO] Encoding label 130046983386806297
[2023-12-12T11:02:35.690332+0000][5081][INFO] Training GAN on device cpu. features = 58
100%|██████████| 10/10 [40:21<00:00, 242.10s/it]


Statistics for dataset for mode: fact_cf_dpgan_e01
Precision: 1.0
Recall: 0.0014714537963507945
AUROC: 0.5007357268981754
FTU: 0.002453418208341622
DP: 0.0023998957862264338
real Vs synthetic


[2023-12-12T11:43:47.202512+0000][5081][INFO] Testcase : fact_cf_dpgan_eps_0.1
[2023-12-12T11:43:47.206734+0000][5081][INFO] [testcase] Experiment repeat: 0 task type: classification Train df hash = 2154233948135572233
[2023-12-12T11:43:47.210340+0000][5081][CRITICAL] module disabled: /usr/local/lib/python3.10/dist-packages/synthcity/plugins/generic/plugin_goggle.py
[2023-12-12T11:43:47.210340+0000][5081][CRITICAL] module disabled: /usr/local/lib/python3.10/dist-packages/synthcity/plugins/generic/plugin_goggle.py
[2023-12-12T11:43:47.210340+0000][5081][CRITICAL] module disabled: /usr/local/lib/python3.10/dist-packages/synthcity/plugins/generic/plugin_goggle.py


Statistics for dataset for mode: fact_cf_dpgan_e01
Precision: 0.02110564137384711
Recall: 0.9841688654353562
AUROC: 0.49421405168855415
FTU: 0.0054285714285714284
DP: 0.0057046069098621865


[2023-12-12T11:43:47.371718+0000][5081][INFO] Encoding race 8775147317149338725
[2023-12-12T11:43:47.389787+0000][5081][INFO] Encoding age 5234268831758738930
[2023-12-12T11:43:54.584756+0000][5081][INFO] Encoding sex 2171079370777462273
[2023-12-12T11:43:54.605409+0000][5081][INFO] Encoding native-country 6521384654555261041
[2023-12-12T11:44:00.502484+0000][5081][INFO] Encoding marital-status 1337590378187140785
[2023-12-12T11:44:00.523217+0000][5081][INFO] Encoding education 4753453939591747952
[2023-12-12T11:44:04.271091+0000][5081][INFO] Encoding occupation 3758639470859969045
[2023-12-12T11:44:05.115740+0000][5081][INFO] Encoding hours-per-week 7366333977639136298
[2023-12-12T11:44:12.072976+0000][5081][INFO] Encoding workclass 8614102044465897663
[2023-12-12T11:44:12.098544+0000][5081][INFO] Encoding relationship 4806894990592949723
[2023-12-12T11:44:12.123798+0000][5081][INFO] Encoding label 1829721183329371681
[2023-12-12T11:44:17.553623+0000][5081][INFO] Training GAN on devic


[4m[1mPlugin : fact_cf_dpgan_eps_0.1[0m[0m


Unnamed: 0,min,max,mean,stddev,median,iqr,rounds,errors,durations
stats.prdc.precision,0.40996,0.40996,0.40996,0.0,0.40996,0.0,1,0,61.92
stats.prdc.recall,0.600719,0.600719,0.600719,0.0,0.600719,0.0,1,0,61.92
stats.prdc.density,0.144448,0.144448,0.144448,0.0,0.144448,0.0,1,0,61.92
stats.prdc.coverage,0.012215,0.012215,0.012215,0.0,0.012215,0.0,1,0,61.92
performance.linear_model.gt,0.81418,0.81418,0.81418,0.0,0.81418,0.0,1,0,2.21
performance.linear_model.syn_id,0.51988,0.51988,0.51988,0.0,0.51988,0.0,1,0,2.21
performance.linear_model.syn_ood,0.522965,0.522965,0.522965,0.0,0.522965,0.0,1,0,2.21
performance.mlp.gt,0.499622,0.499622,0.499622,0.0,0.499622,0.0,1,0,255.55
performance.mlp.syn_id,0.656667,0.656667,0.656667,0.0,0.656667,0.0,1,0,255.55
performance.mlp.syn_ood,0.656869,0.656869,0.656869,0.0,0.656869,0.0,1,0,255.55


[2023-12-12T12:26:58.604076+0000][5081][INFO] Encoding race 8446115687604148934
[2023-12-12T12:26:58.624329+0000][5081][INFO] Encoding age 5539522800782311059



------------------------------------------DPGAN_e1_FACT_CF------------------------------------------------


[2023-12-12T12:27:07.793892+0000][5081][INFO] Encoding sex 2161890776648423100
[2023-12-12T12:27:07.816674+0000][5081][INFO] Encoding native-country 7895309506583119303
[2023-12-12T12:27:12.667123+0000][5081][INFO] Encoding marital-status 6756142300156524802
[2023-12-12T12:27:12.693262+0000][5081][INFO] Encoding education 8107526629005821050
[2023-12-12T12:27:17.385691+0000][5081][INFO] Encoding occupation 8705551952719262958
[2023-12-12T12:27:27.031687+0000][5081][INFO] Encoding hours-per-week 8039307329332527231
[2023-12-12T12:27:39.711263+0000][5081][INFO] Encoding workclass 1293968098938892497
[2023-12-12T12:27:39.751590+0000][5081][INFO] Encoding relationship 4415120459762814786
[2023-12-12T12:27:39.794543+0000][5081][INFO] Encoding label 130046983386806297
[2023-12-12T12:27:51.770765+0000][5081][INFO] Training GAN on device cpu. features = 58
100%|██████████| 10/10 [47:26<00:00, 284.63s/it]


Statistics for dataset for mode: fact_cf_dpgan_e1
Precision: 0.0
Recall: 0.0
AUROC: 0.5
FTU: 3.315430011272462e-05
DP: 0.0
real Vs synthetic


[2023-12-12T13:17:04.034768+0000][5081][INFO] Testcase : fact_cf_dpgan_eps_1
[2023-12-12T13:17:04.039340+0000][5081][INFO] [testcase] Experiment repeat: 0 task type: classification Train df hash = 2154233948135572233
[2023-12-12T13:17:04.042467+0000][5081][CRITICAL] module disabled: /usr/local/lib/python3.10/dist-packages/synthcity/plugins/generic/plugin_goggle.py
[2023-12-12T13:17:04.042467+0000][5081][CRITICAL] module disabled: /usr/local/lib/python3.10/dist-packages/synthcity/plugins/generic/plugin_goggle.py
[2023-12-12T13:17:04.042467+0000][5081][CRITICAL] module disabled: /usr/local/lib/python3.10/dist-packages/synthcity/plugins/generic/plugin_goggle.py


Statistics for dataset for mode: fact_cf_dpgan_e1
Precision: 0.004236802931409596
Recall: 0.9736842105263158
AUROC: 0.48921692643626236
FTU: 0.006171428571428572
DP: 0.007648244749964506


[2023-12-12T13:17:04.188675+0000][5081][INFO] Encoding race 8775147317149338725
[2023-12-12T13:17:04.204397+0000][5081][INFO] Encoding age 5234268831758738930
[2023-12-12T13:17:11.272478+0000][5081][INFO] Encoding sex 2171079370777462273
[2023-12-12T13:17:11.294045+0000][5081][INFO] Encoding native-country 6521384654555261041
[2023-12-12T13:17:18.144640+0000][5081][INFO] Encoding marital-status 1337590378187140785
[2023-12-12T13:17:18.178366+0000][5081][INFO] Encoding education 4753453939591747952
[2023-12-12T13:17:21.899351+0000][5081][INFO] Encoding occupation 3758639470859969045
[2023-12-12T13:17:22.652771+0000][5081][INFO] Encoding hours-per-week 7366333977639136298
[2023-12-12T13:17:29.108322+0000][5081][INFO] Encoding workclass 8614102044465897663
[2023-12-12T13:17:29.130328+0000][5081][INFO] Encoding relationship 4806894990592949723
[2023-12-12T13:17:29.152570+0000][5081][INFO] Encoding label 1829721183329371681
[2023-12-12T13:17:34.437911+0000][5081][INFO] Training GAN on devic


[4m[1mPlugin : fact_cf_dpgan_eps_1[0m[0m


Unnamed: 0,min,max,mean,stddev,median,iqr,rounds,errors,durations
stats.prdc.precision,0.646402,0.646402,0.646402,0.0,0.646402,0.0,1,0,62.56
stats.prdc.recall,0.554617,0.554617,0.554617,0.0,0.554617,0.0,1,0,62.56
stats.prdc.density,0.188239,0.188239,0.188239,0.0,0.188239,0.0,1,0,62.56
stats.prdc.coverage,0.014344,0.014344,0.014344,0.0,0.014344,0.0,1,0,62.56
performance.linear_model.gt,0.81418,0.81418,0.81418,0.0,0.81418,0.0,1,0,2.36
performance.linear_model.syn_id,0.344789,0.344789,0.344789,0.0,0.344789,0.0,1,0,2.36
performance.linear_model.syn_ood,0.350558,0.350558,0.350558,0.0,0.350558,0.0,1,0,2.36
performance.mlp.gt,0.499622,0.499622,0.499622,0.0,0.499622,0.0,1,0,265.12
performance.mlp.syn_id,0.63047,0.63047,0.63047,0.0,0.63047,0.0,1,0,265.12
performance.mlp.syn_ood,0.633759,0.633759,0.633759,0.0,0.633759,0.0,1,0,265.12


[2023-12-12T14:04:12.871724+0000][5081][INFO] Encoding race 8446115687604148934
[2023-12-12T14:04:12.891465+0000][5081][INFO] Encoding age 5539522800782311059



------------------------------------------DPGAN_e10_FACT_CF------------------------------------------------


[2023-12-12T14:04:21.406309+0000][5081][INFO] Encoding sex 2161890776648423100
[2023-12-12T14:04:21.436299+0000][5081][INFO] Encoding native-country 7895309506583119303
[2023-12-12T14:04:26.513347+0000][5081][INFO] Encoding marital-status 6756142300156524802
[2023-12-12T14:04:26.541008+0000][5081][INFO] Encoding education 8107526629005821050
[2023-12-12T14:04:29.854986+0000][5081][INFO] Encoding occupation 8705551952719262958
[2023-12-12T14:04:38.639257+0000][5081][INFO] Encoding hours-per-week 8039307329332527231
[2023-12-12T14:04:47.472459+0000][5081][INFO] Encoding workclass 1293968098938892497
[2023-12-12T14:04:47.499014+0000][5081][INFO] Encoding relationship 4415120459762814786
[2023-12-12T14:04:47.523616+0000][5081][INFO] Encoding label 130046983386806297
[2023-12-12T14:04:55.718346+0000][5081][INFO] Training GAN on device cpu. features = 58
100%|██████████| 10/10 [52:48<00:00, 316.89s/it]


Statistics for dataset for mode: fact_cf_dpgan_e10
Precision: 0.0
Recall: 0.0
AUROC: 0.5
FTU: 0.0
DP: 0.0
real Vs synthetic


[2023-12-12T15:00:26.040207+0000][5081][INFO] Testcase : fact_cf_dpgan_eps_10
[2023-12-12T15:00:26.043743+0000][5081][INFO] [testcase] Experiment repeat: 0 task type: classification Train df hash = 2154233948135572233
[2023-12-12T15:00:26.046597+0000][5081][CRITICAL] module disabled: /usr/local/lib/python3.10/dist-packages/synthcity/plugins/generic/plugin_goggle.py
[2023-12-12T15:00:26.046597+0000][5081][CRITICAL] module disabled: /usr/local/lib/python3.10/dist-packages/synthcity/plugins/generic/plugin_goggle.py
[2023-12-12T15:00:26.046597+0000][5081][CRITICAL] module disabled: /usr/local/lib/python3.10/dist-packages/synthcity/plugins/generic/plugin_goggle.py


Statistics for dataset for mode: fact_cf_dpgan_e10
Precision: 0.003727590083427016
Recall: 0.984375
AUROC: 0.5105237892613994
FTU: 0.0007
DP: 0.0021654804163947494


[2023-12-12T15:00:26.196128+0000][5081][INFO] Encoding race 8775147317149338725
[2023-12-12T15:00:26.212074+0000][5081][INFO] Encoding age 5234268831758738930
[2023-12-12T15:00:33.029062+0000][5081][INFO] Encoding sex 2171079370777462273
[2023-12-12T15:00:33.048579+0000][5081][INFO] Encoding native-country 6521384654555261041
[2023-12-12T15:00:38.977573+0000][5081][INFO] Encoding marital-status 1337590378187140785
[2023-12-12T15:00:39.001113+0000][5081][INFO] Encoding education 4753453939591747952
[2023-12-12T15:00:43.099102+0000][5081][INFO] Encoding occupation 3758639470859969045
[2023-12-12T15:00:43.909376+0000][5081][INFO] Encoding hours-per-week 7366333977639136298
[2023-12-12T15:00:50.324188+0000][5081][INFO] Encoding workclass 8614102044465897663
[2023-12-12T15:00:50.348120+0000][5081][INFO] Encoding relationship 4806894990592949723
[2023-12-12T15:00:50.372196+0000][5081][INFO] Encoding label 1829721183329371681
[2023-12-12T15:00:55.841580+0000][5081][INFO] Training GAN on devic


[4m[1mPlugin : fact_cf_dpgan_eps_10[0m[0m


Unnamed: 0,min,max,mean,stddev,median,iqr,rounds,errors,durations
stats.prdc.precision,0.544392,0.544392,0.544392,0.0,0.544392,0.0,1,0,65.29
stats.prdc.recall,0.562365,0.562365,0.562365,0.0,0.562365,0.0,1,0,65.29
stats.prdc.density,0.169533,0.169533,0.169533,0.0,0.169533,0.0,1,0,65.29
stats.prdc.coverage,0.010574,0.010574,0.010574,0.0,0.010574,0.0,1,0,65.29
performance.linear_model.gt,0.81418,0.81418,0.81418,0.0,0.81418,0.0,1,0,2.33
performance.linear_model.syn_id,0.476864,0.476864,0.476864,0.0,0.476864,0.0,1,0,2.33
performance.linear_model.syn_ood,0.470183,0.470183,0.470183,0.0,0.470183,0.0,1,0,2.33
performance.mlp.gt,0.499622,0.499622,0.499622,0.0,0.499622,0.0,1,0,281.24
performance.mlp.syn_id,0.606972,0.606972,0.606972,0.0,0.606972,0.0,1,0,281.24
performance.mlp.syn_ood,0.603931,0.603931,0.603931,0.0,0.603931,0.0,1,0,281.24





In [18]:
# #save csv

# orig_cf.to_csv('orig_ct.csv', index=False)
# ctgan_ftu_synth.to_csv('ctgan_ftu_synth.csv', index=False)
# ftu_dpgan_e01_df.to_csv('ct_ftu_dpgan_e01_df.csv', index=False)
# ftu_dpgan_e1_df.to_csv('ct_ftu_dpgan_e1_df.csv', index=False)
# ftu_dpgan_e10_df.to_csv('ct_ftu_dpgan_e10_df.csv', index=False)
ctgan_dp_synth.to_csv('ctgan_dp_synth.csv', index=False)
ctgan_cf_synth.to_csv('ctgan_cf_synth.csv', index=False)
