In [1]:
# from data_synthesizer.util import  plot_training_loss, ModelType
from data_loader import DataLoader
from data_synthesizer.sdv import SDVCTGAN_, SDVTVAE_
from data_evaluator import ClassifierType
from data_synthesizer.pipeline import PipelineBuilder

In [2]:
cat_list_adult = ['workclass','education','marital-status','occupation','relationship','race','sex','native-country','income']
num_list_adult = ['age','fnlwgt','education-num','capital-gain','capital-loss','hours-per-week']
adult_qai_columns = ['education','education-num','marital-status','occupation','relationship','race','sex', 'native-country']
adult_risk_column = ['capital-gain','capital-loss','hours-per-week','native-country','income']
df_real_adult_train = DataLoader('../data/adult_train.csv').get_dataframe(cat_list_adult, str)
df_real_adult_test = DataLoader('../data/adult_test.csv').get_dataframe(cat_list_adult, str)


## CTGAN 

In [5]:
from sdv.metadata import SingleTableMetadata
metadata = SingleTableMetadata()
metadata.detect_from_dataframe(data=df_real_adult_train)
ctgan = SDVCTGAN_(metadata, df_real_adult_test, verbose=False, cuda=True, epochs=1500)

discrete_columns :  ['workclass', 'education', 'marital-status', 'occupation', 'relationship', 'race', 'sex', 'native-country', 'income']


### Without sampling and reject

In [6]:
pipeline_builder = PipelineBuilder(df_real_adult_train, cat_list_adult, num_list_adult, ctgan)
pipeline_builder.add_generation_task()
pipeline_builder.add_fine_tuning_generation_task()
pipeline_builder.add_ressemblance_evaluation_task(df_real_adult_test)
classifier_types = [ClassifierType.CART, 
                    ClassifierType.KNN, 
                    ClassifierType.LDA, 
                    ClassifierType.NB, 
                    ClassifierType.LR, 
                    ClassifierType.RANDOM_FOREST,
                    ClassifierType.SVM,
                    ClassifierType.XGBOOST]
pipeline_builder.add_utility_evaluation_task(df_real_adult_test, classifier_types)
pipeline_builder.add_privacy_evaluation_task(df_real_adult_test,adult_qai_columns, adult_risk_column)
pipeline_builder.add_privacy_anonymeter_evaluation_task(df_real_adult_test)
pipeline_builder.build()
results = pipeline_builder.run()

Generation processing:
_fit from child




got it
Fine Tuning Generation Task
mode collapse correction
[' Holand-Netherlands']
rare from train :  1
True
True
True
True
True
True
True
True
True
True
True
True
_fit from child


ValueError: 'a' cannot be empty unless no samples are taken

In [None]:
import pickle
with open('adult_ctgan.pkl', 'wb') as file:
    # Serialize the object and write it to the file
    pickle.dump(results, file)

In [None]:
synth_data = results['generation_results']['synthetic_data']

### Epsilon Evaluation

In [None]:
from data_synthesizer.privacy_sampling import get_epsilon

epsilon = get_epsilon(df_real_adult_train, synth_data, cat_list_adult, num_list_adult)

In [None]:
epsilon

0.32916666666666666

### With sampling and reject

#### epsilon 0.2

In [None]:
from sdv.metadata import SingleTableMetadata
metadata = SingleTableMetadata()
metadata.detect_from_dataframe(data=df_real_adult_train)
ctgan = SDVCTGAN_(metadata, df_real_adult_test, verbose=False, cuda=True, epochs=1500)

discrete_columns :  ['SEX', 'EDUCATION', 'MARRIAGE', 'PAY_0', 'PAY_2', 'PAY_3', 'PAY_4', 'PAY_5', 'PAY_6', 'default.payment.next.month']


In [None]:
pipeline_builder = PipelineBuilder(df_real_adult_train, cat_list_adult, num_list_adult, ctgan)
pipeline_builder.add_generation_task()
pipeline_builder.add_fine_tuning_generation_task()
pipeline_builder.add_sampling_and_reject_task(0.2)
pipeline_builder.add_ressemblance_evaluation_task(df_real_adult_test)
classifier_types = [ClassifierType.CART, 
                    ClassifierType.KNN, 
                    ClassifierType.LDA, 
                    ClassifierType.NB, 
                    ClassifierType.LR, 
                    ClassifierType.RANDOM_FOREST,
                    ClassifierType.SVM,
                    ClassifierType.XGBOOST]
pipeline_builder.add_utility_evaluation_task(df_real_adult_test, classifier_types)
pipeline_builder.add_privacy_evaluation_task(df_real_adult_test,adult_qai_columns, adult_risk_column)
pipeline_builder.add_privacy_anonymeter_evaluation_task(df_real_adult_test)
pipeline_builder.build()
results = pipeline_builder.run()

Generation processing:
_fit from child
got it
Fine Tuning Generation Task
No Mode Collpase
Sampling and reject task
initial epsilon :  0.316625
Utility evaluation in progress.
Privacy evaluation in progress.
5000  ------finished------ 10000
10000  ------finished------ 15000
15000  ------finished------ 20000
20000  ------finished------ 24000
24000  ------finished------ 24000
5000  ------finished------ 10000
10000  ------finished------ 15000
15000  ------finished------ 20000
20000  ------finished------ 24000
24000  ------finished------ 24000
end
Privacy anonymeter evaluation in progress.


Found 17 failed queries out of 500. Check DEBUG messages for more details.
Found 27 failed queries out of 500. Check DEBUG messages for more details.


Singling out evaluation failed with Optimal parameters not found: The maximum number of function evaluations is exceeded.. Please re-run this cell.For more stable results increase `n_attacks`. Note that this will make the evaluation slower.


Found 23 failed queries out of 500. Check DEBUG messages for more details.
Found 64 failed queries out of 500. Check DEBUG messages for more details.


end


In [None]:
import pickle
with open('adult_ctgan_eps_0.2.pkl', 'wb') as file:
    # Serialize the object and write it to the file
    pickle.dump(results, file)

#### epsilon 0.1

In [None]:
from sdv.metadata import SingleTableMetadata
metadata = SingleTableMetadata()
metadata.detect_from_dataframe(data=df_real_adult_train)
ctgan = SDVCTGAN_(metadata, df_real_adult_test, verbose=False, cuda=True, epochs=1500)

In [None]:
pipeline_builder = PipelineBuilder(df_real_adult_train, cat_list_adult, num_list_adult, ctgan)
pipeline_builder.add_generation_task()
pipeline_builder.add_fine_tuning_generation_task()
pipeline_builder.add_sampling_and_reject_task(0.1)
pipeline_builder.add_ressemblance_evaluation_task(df_real_adult_test)
classifier_types = [ClassifierType.CART, 
                    ClassifierType.KNN, 
                    ClassifierType.LDA, 
                    ClassifierType.NB, 
                    ClassifierType.LR, 
                    ClassifierType.RANDOM_FOREST,
                    ClassifierType.SVM,
                    ClassifierType.XGBOOST]
pipeline_builder.add_utility_evaluation_task(df_real_adult_test, classifier_types)
pipeline_builder.add_privacy_evaluation_task(df_real_adult_test,adult_qai_columns, adult_risk_column)
pipeline_builder.add_privacy_anonymeter_evaluation_task(df_real_adult_test)
pipeline_builder.build()
results = pipeline_builder.run()

Generation processing:
_fit from child
got it
Fine Tuning Generation Task
No Mode Collpase
Sampling and reject task
initial epsilon :  0.2989583333333333
Utility evaluation in progress.
Privacy evaluation in progress.
5000  ------finished------ 10000
10000  ------finished------ 15000
15000  ------finished------ 20000
20000  ------finished------ 24000
24000  ------finished------ 24000
5000  ------finished------ 10000
10000  ------finished------ 15000
15000  ------finished------ 20000
20000  ------finished------ 24000
24000  ------finished------ 24000
end
Privacy anonymeter evaluation in progress.


Found 30 failed queries out of 500. Check DEBUG messages for more details.
Found 20 failed queries out of 500. Check DEBUG messages for more details.
Found 64 failed queries out of 500. Check DEBUG messages for more details.


end


In [None]:
import pickle
with open('adult_ctgan_eps_0.1.pkl', 'wb') as file:
    # Serialize the object and write it to the file
    pickle.dump(results, file)

#### epsilon 0.05

In [None]:
from sdv.metadata import SingleTableMetadata
metadata = SingleTableMetadata()
metadata.detect_from_dataframe(data=df_real_adult_train)
ctgan = SDVCTGAN_(metadata, df_real_adult_test, verbose=False, cuda=True, epochs=1500)

discrete_columns :  ['SEX', 'EDUCATION', 'MARRIAGE', 'PAY_0', 'PAY_2', 'PAY_3', 'PAY_4', 'PAY_5', 'PAY_6', 'default.payment.next.month']


In [None]:
pipeline_builder = PipelineBuilder(df_real_adult_train, cat_list_adult, num_list_adult, ctgan)
pipeline_builder.add_generation_task()
pipeline_builder.add_fine_tuning_generation_task()
pipeline_builder.add_sampling_and_reject_task(0.05)
pipeline_builder.add_ressemblance_evaluation_task(df_real_adult_test)
classifier_types = [ClassifierType.CART, 
                    ClassifierType.KNN, 
                    ClassifierType.LDA, 
                    ClassifierType.NB, 
                    ClassifierType.LR, 
                    ClassifierType.RANDOM_FOREST,
                    ClassifierType.SVM,
                    ClassifierType.XGBOOST]
pipeline_builder.add_utility_evaluation_task(df_real_adult_test, classifier_types)
pipeline_builder.add_privacy_evaluation_task(df_real_adult_test,adult_qai_columns, adult_risk_column)
pipeline_builder.add_privacy_anonymeter_evaluation_task(df_real_adult_test)
pipeline_builder.build()
results = pipeline_builder.run()

Generation processing:
_fit from child
got it
Fine Tuning Generation Task
No Mode Collpase
Sampling and reject task
initial epsilon :  0.32679166666666665
Utility evaluation in progress.
Privacy evaluation in progress.
5000  ------finished------ 10000
10000  ------finished------ 15000
15000  ------finished------ 20000
20000  ------finished------ 24000
24000  ------finished------ 24000
5000  ------finished------ 10000
10000  ------finished------ 15000
15000  ------finished------ 20000
20000  ------finished------ 24000
24000  ------finished------ 24000
end
Privacy anonymeter evaluation in progress.


Found 19 failed queries out of 500. Check DEBUG messages for more details.
Found 29 failed queries out of 500. Check DEBUG messages for more details.
Found 74 failed queries out of 500. Check DEBUG messages for more details.


end


In [None]:
import pickle
with open('adult_ctgan_eps_0.05.pkl', 'wb') as file:
    # Serialize the object and write it to the file
    pickle.dump(results, file)

## TVAE

### Without sampling and reject

In [None]:
from sdv.metadata import SingleTableMetadata
metadata = SingleTableMetadata()
metadata.detect_from_dataframe(data=df_real_adult_train)
ctgan = SDVTVAE_(metadata, df_real_adult_test, cuda=True, epochs=1500)

In [None]:
pipeline_builder = PipelineBuilder(df_real_adult_train, cat_list_adult, num_list_adult, ctgan)
pipeline_builder.add_generation_task()
pipeline_builder.add_fine_tuning_generation_task()
pipeline_builder.add_ressemblance_evaluation_task(df_real_adult_test)
classifier_types = [ClassifierType.CART, 
                    ClassifierType.KNN, 
                    ClassifierType.LDA, 
                    ClassifierType.NB, 
                    ClassifierType.LR, 
                    ClassifierType.RANDOM_FOREST,
                    ClassifierType.SVM,
                    ClassifierType.XGBOOST]
pipeline_builder.add_utility_evaluation_task(df_real_adult_test, classifier_types)
pipeline_builder.add_privacy_evaluation_task(df_real_adult_test,adult_qai_columns, adult_risk_column)
pipeline_builder.add_privacy_anonymeter_evaluation_task(df_real_adult_test)
pipeline_builder.build()
results = pipeline_builder.run()

Generation processing:
Epoch 1, Reconstruct Loss:  54.7997,KLD Loss:  1.0048
Epoch 2, Reconstruct Loss:  38.3826,KLD Loss:  2.7400
Epoch 3, Reconstruct Loss:  30.7936,KLD Loss:  3.7925
Epoch 4, Reconstruct Loss:  25.1604,KLD Loss:  4.8596
Epoch 5, Reconstruct Loss:  22.6204,KLD Loss:  5.5627
Epoch 6, Reconstruct Loss:  20.2034,KLD Loss:  5.7839
Epoch 7, Reconstruct Loss:  20.6139,KLD Loss:  5.9274
Epoch 8, Reconstruct Loss:  17.9145,KLD Loss:  6.2581
Epoch 9, Reconstruct Loss:  17.6577,KLD Loss:  6.1313
Epoch 10, Reconstruct Loss:  16.9406,KLD Loss:  6.4590
Epoch 11, Reconstruct Loss:  16.3330,KLD Loss:  6.3946
Epoch 12, Reconstruct Loss:  14.9013,KLD Loss:  6.2843
Epoch 13, Reconstruct Loss:  13.3985,KLD Loss:  6.5311
Epoch 14, Reconstruct Loss:  14.3925,KLD Loss:  6.4682
Epoch 15, Reconstruct Loss:  13.0050,KLD Loss:  6.4969
Epoch 16, Reconstruct Loss:  10.9634,KLD Loss:  6.8822
Epoch 17, Reconstruct Loss:  11.0559,KLD Loss:  6.8386
Epoch 18, Reconstruct Loss:  11.0354,KLD Loss:  7.0

Found 21 failed queries out of 500. Check DEBUG messages for more details.
Found 30 failed queries out of 500. Check DEBUG messages for more details.


Singling out evaluation failed with Optimal parameters not found: The maximum number of function evaluations is exceeded.. Please re-run this cell.For more stable results increase `n_attacks`. Note that this will make the evaluation slower.


Found 18 failed queries out of 500. Check DEBUG messages for more details.


Singling out evaluation failed with Optimal parameters not found: The maximum number of function evaluations is exceeded.. Please re-run this cell.For more stable results increase `n_attacks`. Note that this will make the evaluation slower.


Found 18 failed queries out of 500. Check DEBUG messages for more details.
Found 49 failed queries out of 500. Check DEBUG messages for more details.


In [None]:
import pickle
with open('adult_tvae.pkl', 'wb') as file:
    # Serialize the object and write it to the file
    pickle.dump(results, file)

In [None]:
synth_data = results['generation_results']['synthetic_data']

### Epsilon Evaluation

In [None]:
from data_synthesizer.privacy_sampling import get_epsilon

epsilon = get_epsilon(df_real_adult_train, synth_data, cat_list_adult, num_list_adult)

In [None]:
epsilon

0.5505833333333333

### With sampling and reject

#### epsilon 0.2

In [None]:
from sdv.metadata import SingleTableMetadata
metadata = SingleTableMetadata()
metadata.detect_from_dataframe(data=df_real_adult_train)
ctgan = SDVTVAE_(metadata, df_real_adult_test, cuda=True, epochs=1500)

In [None]:
pipeline_builder = PipelineBuilder(df_real_adult_train, cat_list_adult, num_list_adult, ctgan)
pipeline_builder.add_generation_task()
pipeline_builder.add_fine_tuning_generation_task()
pipeline_builder.add_sampling_and_reject_task(0.2)
pipeline_builder.add_ressemblance_evaluation_task(df_real_adult_test)
classifier_types = [ClassifierType.CART, 
                    ClassifierType.KNN, 
                    ClassifierType.LDA, 
                    ClassifierType.NB, 
                    ClassifierType.LR, 
                    ClassifierType.RANDOM_FOREST,
                    ClassifierType.SVM,
                    ClassifierType.XGBOOST]
pipeline_builder.add_utility_evaluation_task(df_real_adult_test, classifier_types)
pipeline_builder.add_privacy_evaluation_task(df_real_adult_test,adult_qai_columns, adult_risk_column)
pipeline_builder.add_privacy_anonymeter_evaluation_task(df_real_adult_test)
pipeline_builder.build()
results = pipeline_builder.run()

Generation processing:
Epoch 1, Reconstruct Loss:  56.2923,KLD Loss:  1.6072
Epoch 2, Reconstruct Loss:  36.8117,KLD Loss:  2.7630
Epoch 3, Reconstruct Loss:  33.5539,KLD Loss:  3.4761
Epoch 4, Reconstruct Loss:  26.8240,KLD Loss:  3.9967
Epoch 5, Reconstruct Loss:  24.7169,KLD Loss:  4.8410
Epoch 6, Reconstruct Loss:  24.3200,KLD Loss:  4.9944
Epoch 7, Reconstruct Loss:  21.4920,KLD Loss:  5.4762
Epoch 8, Reconstruct Loss:  19.1179,KLD Loss:  5.7426
Epoch 9, Reconstruct Loss:  18.0660,KLD Loss:  5.8279
Epoch 10, Reconstruct Loss:  16.5426,KLD Loss:  5.9170
Epoch 11, Reconstruct Loss:  17.2859,KLD Loss:  5.6191
Epoch 12, Reconstruct Loss:  16.4465,KLD Loss:  5.6681
Epoch 13, Reconstruct Loss:  14.3192,KLD Loss:  5.6450
Epoch 14, Reconstruct Loss:  12.9209,KLD Loss:  5.8563
Epoch 15, Reconstruct Loss:  13.3046,KLD Loss:  6.0763
Epoch 16, Reconstruct Loss:  13.0130,KLD Loss:  6.1513
Epoch 17, Reconstruct Loss:  13.9988,KLD Loss:  6.3030
Epoch 18, Reconstruct Loss:  11.6365,KLD Loss:  6.6

Found 13 failed queries out of 500. Check DEBUG messages for more details.
Found 18 failed queries out of 500. Check DEBUG messages for more details.


Singling out evaluation failed with Optimal parameters not found: The maximum number of function evaluations is exceeded.. Please re-run this cell.For more stable results increase `n_attacks`. Note that this will make the evaluation slower.


Found 20 failed queries out of 500. Check DEBUG messages for more details.
Found 64 failed queries out of 500. Check DEBUG messages for more details.


end


In [None]:
import pickle
with open('adult_tvae_eps_0.2.pkl', 'wb') as file:
    # Serialize the object and write it to the file
    pickle.dump(results, file)

#### epsilon 0.1

In [None]:
from sdv.metadata import SingleTableMetadata
metadata = SingleTableMetadata()
metadata.detect_from_dataframe(data=df_real_adult_train)
ctgan = SDVTVAE_(metadata, df_real_adult_test, cuda=True, epochs=1500)

In [None]:
pipeline_builder = PipelineBuilder(df_real_adult_train, cat_list_adult, num_list_adult, ctgan)
pipeline_builder.add_generation_task()
pipeline_builder.add_fine_tuning_generation_task()
pipeline_builder.add_sampling_and_reject_task(0.1)
pipeline_builder.add_ressemblance_evaluation_task(df_real_adult_test)
classifier_types = [ClassifierType.CART, 
                    ClassifierType.KNN, 
                    ClassifierType.LDA, 
                    ClassifierType.NB, 
                    ClassifierType.LR, 
                    ClassifierType.RANDOM_FOREST,
                    ClassifierType.SVM,
                    ClassifierType.XGBOOST]
pipeline_builder.add_utility_evaluation_task(df_real_adult_test, classifier_types)
pipeline_builder.add_privacy_evaluation_task(df_real_adult_test,adult_qai_columns, adult_risk_column)
pipeline_builder.add_privacy_anonymeter_evaluation_task(df_real_adult_test)
pipeline_builder.build()
results = pipeline_builder.run()

Generation processing:
Epoch 1, Reconstruct Loss:  55.6646,KLD Loss:  1.1588
Epoch 2, Reconstruct Loss:  40.8972,KLD Loss:  2.6850
Epoch 3, Reconstruct Loss:  32.3673,KLD Loss:  3.2724
Epoch 4, Reconstruct Loss:  28.3063,KLD Loss:  4.1429
Epoch 5, Reconstruct Loss:  24.1014,KLD Loss:  4.8013
Epoch 6, Reconstruct Loss:  22.8494,KLD Loss:  5.1812
Epoch 7, Reconstruct Loss:  20.4163,KLD Loss:  5.3943
Epoch 8, Reconstruct Loss:  20.0537,KLD Loss:  5.6157
Epoch 9, Reconstruct Loss:  19.0639,KLD Loss:  5.9058
Epoch 10, Reconstruct Loss:  18.1121,KLD Loss:  5.9245
Epoch 11, Reconstruct Loss:  17.1159,KLD Loss:  6.1583
Epoch 12, Reconstruct Loss:  16.3203,KLD Loss:  6.1200
Epoch 13, Reconstruct Loss:  15.4644,KLD Loss:  5.9819
Epoch 14, Reconstruct Loss:  13.1003,KLD Loss:  6.0561
Epoch 15, Reconstruct Loss:  12.8354,KLD Loss:  5.9862
Epoch 16, Reconstruct Loss:  11.7874,KLD Loss:  6.0188
Epoch 17, Reconstruct Loss:  12.4498,KLD Loss:  6.2281
Epoch 18, Reconstruct Loss:  11.2834,KLD Loss:  6.2

In [None]:
import pickle
with open('adult_tvae_eps_0.1.pkl', 'wb') as file:
    # Serialize the object and write it to the file
    pickle.dump(results, file)

#### epsilon 0.05

In [None]:
from sdv.metadata import SingleTableMetadata
metadata = SingleTableMetadata()
metadata.detect_from_dataframe(data=df_real_adult_train)
ctgan = SDVTVAE_(metadata, df_real_adult_test, cuda=True, epochs=1500)

In [None]:
pipeline_builder = PipelineBuilder(df_real_adult_train, cat_list_adult, num_list_adult, ctgan)
pipeline_builder.add_generation_task()
pipeline_builder.add_fine_tuning_generation_task()
pipeline_builder.add_sampling_and_reject_task(0.05)
pipeline_builder.add_ressemblance_evaluation_task(df_real_adult_test)
classifier_types = [ClassifierType.CART, 
                    ClassifierType.KNN, 
                    ClassifierType.LDA, 
                    ClassifierType.NB, 
                    ClassifierType.LR, 
                    ClassifierType.RANDOM_FOREST,
                    ClassifierType.SVM,
                    ClassifierType.XGBOOST]
pipeline_builder.add_utility_evaluation_task(df_real_adult_test, classifier_types)
pipeline_builder.add_privacy_evaluation_task(df_real_adult_test,adult_qai_columns, adult_risk_column)
pipeline_builder.add_privacy_anonymeter_evaluation_task(df_real_adult_test)
pipeline_builder.build()
results = pipeline_builder.run()

In [None]:
import pickle
with open('adult_tvae_eps_0.05.pkl', 'wb') as file:
    # Serialize the object and write it to the file
    pickle.dump(results, file)

### ctgan Adult

In [5]:
cat_list_adult = ['workclass','education','marital-status','occupation','relationship','race','sex','native-country','income']

df_real_adult_train = DataLoader('../data/adult_train.csv').get_dataframe(cat_list_adult)
df_real_adult_test = DataLoader('../data/adult_test.csv').get_dataframe(cat_list_adult)

In [6]:
from sdv.metadata import SingleTableMetadata
metadata = SingleTableMetadata()
metadata.detect_from_dataframe(data=df_real_adult_train)
ctgan = SDVCTGAN_(metadata, df_real_adult_train, verbose=False, cuda=True, epochs=1500)

discrete_columns :  ['workclass', 'education', 'marital-status', 'occupation', 'relationship', 'race', 'sex', 'native-country', 'income']


In [7]:
ctgan.fit(df_real_adult_train)

_fit from child
got it


### ctgan cardio

In [22]:
cat_list_cardio = ['gender','cholesterol', 'gluc', 'smoke', 'alco', 'active', 'cardio']

df_real_cardio_train = DataLoader('../data/cardio_train.csv').get_dataframe(cat_list_cardio, category_type=str, sep = ',')
df_real_cardio_test = DataLoader('../data/cardio_test.csv').get_dataframe(cat_list_cardio, category_type=str, sep = ',')

In [26]:
from sdv.metadata import SingleTableMetadata
metadata = SingleTableMetadata()
metadata.detect_from_dataframe(data=df_real_cardio_train)
ctgan = SDVCTGAN_(metadata, df_real_cardio_train, verbose=False, cuda=True, epochs=1500)

discrete_columns :  ['gender', 'cholesterol', 'gluc', 'smoke', 'alco', 'active', 'cardio']


In [27]:
pipeline_builder = PipelineBuilder(df_real_adult_train, cat_list_adult, num_list_adult, ctgan)
pipeline_builder.add_generation_task()
pipeline_builder.add_fine_tuning_generation_task()
# pipeline_builder.add_sampling_and_reject_task(0.1)
pipeline_builder.add_ressemblance_evaluation_task(df_real_adult_test)
pipeline = pipeline_builder.build()
synth = pipeline_builder.run()

_fit from child
got it


In [28]:
data = ctgan.sample(49000)

In [29]:
data.to_csv('../data/cardio_ctgan_test_new.csv', sep=',', index=False)

## Display Results

In [3]:
import pickle
with open('adult_tvae.pkl', 'rb') as file:
    # Deserialize the object from the file
    results_adult_tvae = pickle.load(file)
with open('adult_tvae_eps_0.1.pkl', 'rb') as file:
    # Deserialize the object from the file
    results_adult_tvae_eps_01 = pickle.load(file)
with open('adult_tvae_eps_0.2.pkl', 'rb') as file:
    # Deserialize the object from the file
    results_adult_tvae_eps_02 = pickle.load(file)
with open('adult_ctgan.pkl', 'rb') as file:
    # Deserialize the object from the file
    results_adult_ctgan = pickle.load(file)
with open('adult_ctgan_eps_0.1.pkl', 'rb') as file:
    # Deserialize the object from the file
    results_adult_ctgan_eps_01 = pickle.load(file)
with open('adult_ctgan_eps_0.2.pkl', 'rb') as file:
    # Deserialize the object from the file
    results_adult_ctgan_eps_02 = pickle.load(file)
with open('adult_ctgan_eps_0.05.pkl', 'rb') as file:
    # Deserialize the object from the file
    results_adult_ctgan_eps_005 = pickle.load(file)

In [4]:
dict_results = {'ctgan': results_adult_ctgan,
                'ctgan_eps_0.2' : results_adult_ctgan_eps_02,
                'ctgan_eps_0.1' : results_adult_ctgan_eps_01,
                'ctgan_eps_0.05' : results_adult_ctgan_eps_005,
                'ctgan_eps_0.1' : results_adult_ctgan_eps_01,
                'tvae' : results_adult_tvae,
                'tvae_eps_0.2': results_adult_tvae_eps_02,
                'tvae_eps_0.1': results_adult_tvae_eps_01
                }

### resemblance

In [7]:
from evaluation_report.ressemblance_report import ResemblanceReport

report_resemblance = ResemblanceReport(df_real_adult_train, cat_list_adult, num_list_adult, dict_results)

In [8]:
report_resemblance.get_numerical_univariate_report()

VBox(children=(Accordion(children=(VBox(children=(HBox(children=(HTML(value='<style type="text/css">\n#T_9f746…

In [9]:
report_resemblance.get_categorical_univariate_report()

VBox(children=(Accordion(children=(VBox(children=(HBox(children=(HTML(value='<style type="text/css">\n</style>…

In [10]:
report_resemblance.get_numerical_multivariate_report()

Accordion(children=(VBox(children=(Image(value=b'\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x05\xdc\x00\x00\x…

In [11]:
report_resemblance.get_categorical_multivariate_report()

Accordion(children=(VBox(children=(Image(value=b'\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x05\xdc\x00\x00\x…

In [12]:
report_resemblance.get_numcat_multivariate_report()

Accordion(children=(VBox(children=(Image(value=b'\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x05\xdc\x00\x00\x…

### utility

In [5]:
from evaluation_report.utility_report import UtilityReport

report_utility = UtilityReport(df_real_adult_train, cat_list_adult, num_list_adult, dict_results)

In [7]:
report_utility.get_report()

Accordion(children=(VBox(children=(VBox(children=(Label(value='Accuracy Means:'), HTML(value='<table border="1…

### privacy

In [5]:
from evaluation_report.privacy_report import PrivacyReport


report_privacy = PrivacyReport(df_real_adult_train, cat_list_adult, num_list_adult, dict_results)

In [6]:
from evaluation_report.privacy_anonymeter_report import PrivacyAnonymeterReport


report_privacy_anonymeter = PrivacyAnonymeterReport(df_real_adult_train, cat_list_adult, num_list_adult, dict_results)

In [7]:
report_privacy.get_report()

VBox(children=(Accordion(children=(VBox(children=(HBox(children=(HTML(value='<table border="1" class="datafram…

In [8]:
report_privacy_anonymeter.get_report()

VBox(children=(Accordion(children=(VBox(children=(HTML(value='<table border="1" class="dataframe">\n  <thead>\…