# Empirical Epsilon Calculation - DP-GAN

Note that we use the following python modules:

- tensorflow==2.15.0
- keras==2.15.0
- tensorflow-estimator==2.15.0
- tensorflow-privacy==0.9.0
- numpy==1.26.4
- pandas==2.2.2
- scikit-learn==1.4.2
- scipy==1.11.4
- absl-py==1.4.0

The following statments can be used to install the required python modules:

```bash
pip install tensorflow==2.15.0
pip install keras==2.15.0
pip install tensorflow-estimator==2.15.0
pip install tensorflow-privacy==0.9.0
pip install numpy==1.26.4
pip install pandas==2.2.2
pip install scikit-learn==1.4.2
pip install scipy==1.11.4
pip install absl-py==1.4.0
```


Perform a quick check that `tensorflow`, `keras` and `tensorflow_privacy` are installed and importable. Also check versions of `NumPy`, `Pandas`, and `Scikit-learn`.

In [1]:
# sanity check the environment
import tensorflow as tf, keras, numpy as np, pandas as pd, sklearn
from tensorflow_privacy.privacy.optimizers.dp_optimizer_keras import DPKerasAdamOptimizer

print("TF:", tf.__version__)              # 2.15.0
print("Keras:", keras.__version__)        # 2.15.0
print("NumPy:", np.__version__)           # 1.26.4
print("Pandas:", pd.__version__)          # 2.2.2
print("Sklearn:", sklearn.__version__)    # 1.4.2
_ = DPKerasAdamOptimizer(l2_norm_clip=1.0, noise_multiplier=0.5,
                         num_microbatches=1, learning_rate=1e-3)
print("DP optimizer OK")





TF: 2.15.0
Keras: 2.15.0
NumPy: 1.26.4
Pandas: 2.2.2
Sklearn: 1.4.2
DP optimizer OK


Import required packages.

In [2]:
import math
import numpy as np
import statistics
from sklearn import metrics
from functools import partial
from sklearn.preprocessing import MinMaxScaler
import tensorflow as tf
import argparse
import keras
from tensorflow.keras import backend as K
from sklearn.linear_model import LinearRegression
import sys
import matplotlib.pyplot as plt
from tensorflow.keras.optimizers import Adam
import pandas as pd
import io
from keras.models import load_model
import time
from scipy.stats import pearsonr
from keras.layers import Input, Dense, Reshape, Flatten, Dropout, multiply, GaussianNoise
from keras.layers import BatchNormalization, Activation, Embedding, ZeroPadding2D
from keras.layers import MaxPooling2D, LeakyReLU
from keras.layers import UpSampling2D, Conv2D, Conv1D
from keras.models import Sequential, Model
from keras import losses
import keras.backend as K
from sklearn.model_selection import GridSearchCV
from sklearn.neighbors import KernelDensity
import os
from sklearn.model_selection import train_test_split
import random
from keras.models import load_model
from absl import app
from absl import flags
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import logging
from tensorflow_privacy.privacy.analysis import compute_dp_sgd_privacy
from tensorflow_privacy.privacy.optimizers.dp_optimizer_keras import DPKerasSGDOptimizer, DPKerasAdamOptimizer
from tensorflow_privacy.privacy.analysis.compute_dp_sgd_privacy_lib import compute_dp_sgd_privacy
from sklearn.preprocessing import MinMaxScaler

Include path to where data is saved, import data, and remove duplicates. Note that we are using the oversampled version of the Criteo data (oversampled to make the prevalence of conversion about 10%) so that we have positive values in all data subsets.

In [3]:
# file path to data to synthesize
file_path = '../../Data/Criteo/'
# import the confidential data to synthesize
# using 'churn' as name for compatibility with code from Ponte et al.
churn = pd.read_csv(file_path + "cleaned_criteo_os.gz",
                         compression='gzip',
                         sep='\,',
                         header=0,
                         engine='python')
churn = pd.DataFrame.drop_duplicates(churn)

In [4]:
churn

Unnamed: 0,f0,f1,f2,f3,f4,f5,f6,f7,f8,f9,f10,f11,treatment,conversion,visit,exposure
0,12.616365,10.059654,9.051023,4.679882,10.280525,4.115453,0.294443,4.833815,3.955396,13.190056,5.300375,-0.168679,0,0,0,0
1,12.846971,10.059654,8.334264,-4.109746,11.561050,4.115453,-17.719730,4.833815,3.837301,38.005580,5.900432,-0.337358,1,0,0,0
2,25.823921,10.059654,8.214383,4.679882,10.280525,4.115453,-10.764422,4.833815,3.971858,13.190056,5.300375,-0.168679,1,0,0,0
3,12.616365,10.059654,8.943488,4.679882,10.280525,4.115453,0.294443,4.833815,3.920995,13.190056,5.300375,-0.168679,1,0,0,0
4,21.942159,10.059654,8.214383,4.679882,10.280525,4.115453,-3.993764,4.833815,3.971858,13.190056,5.300375,-0.168679,1,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
81543,13.680284,10.059654,8.325934,-0.600592,11.029584,1.128518,-13.045950,10.885556,3.758296,44.784329,5.844038,-0.267350,1,1,1,1
81544,14.251906,13.579750,8.303577,-2.272900,12.594889,-4.636110,-19.328059,5.621479,3.755250,42.018683,6.141586,-0.168679,1,1,1,1
81545,20.711370,10.059654,8.290111,4.679882,10.280525,4.115453,-6.359690,4.833815,3.813849,26.606156,5.300375,-0.168679,1,1,1,1
81546,23.767207,10.059654,8.283185,4.679882,10.280525,4.115453,-3.282109,4.833815,3.767224,46.714867,5.300375,-0.168679,1,1,1,0


Create dictionary of noise multipliers used for each data set size, and a list of data sizes to use for privacy analysis. Note that the size of the `marketer`, `adversary`, and `external` data are all `training_data_size/3`.

In [5]:
# dictionary containing noise multipliers used for each data set size
all_noise_multipliers = {
      '300': [1.011, 2.98, 6.96, 11.85, 60],
      '3000': [0.6785, 1.43, 3.1, 5.4, 38.5],
      '30000': [0.502, 0.81, 1.35, 2.23, 15.5]}

# defining data sizes that are 3X those of the actual training data
# (i.e., the marketer, adversary, and external data have size training_data_size/3)
data_sizes = [300, 3000, 30000]

Define a class for estimating a GAN with differential privacy.

In [6]:
"""# GANs with differential privacy"""
class GAN():
    def __init__(self, privacy):
      self.img_rows = 1
      self.img_cols = 16
      self.img_shape = (self.img_cols,)
      self.latent_dim = (16)
      lr = 0.001

      optimizer = keras.optimizers.Adam()
      self.discriminator = self.build_discriminator()
      self.discriminator.compile(loss='binary_crossentropy',
                                 optimizer=optimizer,
                                 metrics=['accuracy'])
      if privacy == True:
        # print(noise_multiplier)
        # print("using differential privacy")
        # Build and compile the discriminator
        self.discriminator = self.build_discriminator()
        self.discriminator.compile(optimizer=DPKerasAdamOptimizer(
            l2_norm_clip=4,
            noise_multiplier=noise_multiplier,
            num_microbatches=num_microbatches,
            learning_rate=lr),
            loss= tf.keras.losses.BinaryCrossentropy(from_logits=True, reduction=tf.losses.Reduction.NONE), metrics=['accuracy'])

      # Build the generator
      self.generator = self.build_generator()

      # The generator takes noise as input and generates imgs
      z = Input(shape=(self.latent_dim,))
      img = self.generator(z)

      # For the combined model we will only train the generator
      self.discriminator.trainable = False

      # The discriminator takes generated images as input and determines validity
      valid = self.discriminator(img)

      # The combined model  (stacked generator and discriminator)
      # Trains the generator to fool the discriminator
      self.combined = Model(z, valid)
      self.combined.compile(loss='binary_crossentropy', optimizer= optimizer)


    def build_generator(self):
      model = Sequential()
      model.add(Dense(self.latent_dim, input_dim=self.latent_dim))
      model.add(LeakyReLU(alpha=0.2))
      #model.add(BatchNormalization())
      model.add(Dense(64, input_shape=self.img_shape))
      model.add(LeakyReLU(alpha=0.2))
      #model.add(BatchNormalization())
      model.add(Dense(self.latent_dim))
      model.add(Activation("tanh"))

      #model.summary()

      noise = Input(shape=(self.latent_dim,))
      img = model(noise)
      return Model(noise, img)

    def build_discriminator(self):

        model = Sequential()

        model.add(Dense(64, input_shape=self.img_shape))
        model.add(LeakyReLU(alpha=0.2))
        model.add(Dense(1, activation='sigmoid'))

        #model.summary()

        img = Input(shape=self.img_shape)
        validity = model(img)

        return Model(img, validity)

    def train(self, data, iterations, batch_size, sample_interval, model_name, generator_losses = [], discriminator_acc = [], correlations = [], accuracy = [], MAPD_collect = [],MSE_collect = [], MAE_collect = []):
      # Adversarial ground truths
      valid = np.ones((batch_size, 1))
      fake = np.zeros((batch_size, 1))
      corr = 0
      MAPD = 0
      MSE = 0
      MAE = 0
      #fake += 0.05 * np.random.random(fake.shape)
      #valid += 0.05 * np.random.random(valid.shape)

      for epoch in range(iterations):

            # ---------------------
            #  Train Discriminator
            # ---------------------

            # Select a random batch of images
            idx = np.random.randint(0, data.shape[0], batch_size)
            imgs = data[idx]

            noise = np.random.normal(0, 1, (batch_size, self.latent_dim))

            # Generate a batch of new images
            gen_imgs = self.generator.predict(noise, verbose = False)

            # Train the discriminator
            d_loss_real = self.discriminator.train_on_batch(imgs, valid)
            d_loss_fake = self.discriminator.train_on_batch(gen_imgs, fake)
            d_loss = 0.5 * np.add(d_loss_real, d_loss_fake)

            # ---------------------
            #  Train Generator
            # ---------------------
            # Train the generator (to have the discriminator label samples as valid)

            noise = np.random.normal(0, 1, (batch_size, self.latent_dim))
            g_loss = self.combined.train_on_batch(noise, valid)

            if (epoch % 100) == 0:
              print("%d [D loss: %f, acc.: %.2f%%] [G loss: %f]" % (epoch, d_loss[0], 100*d_loss[1], g_loss))

      self.generator.save(model_name)

Utility function (adapted from Ponte et al.)

In [7]:
def utility(real_data, protected_data):
  from sklearn.linear_model import LogisticRegression
  from sklearn.metrics import mean_absolute_percentage_error, mean_absolute_error, mean_squared_error
  reg = LogisticRegression(penalty=None, max_iter=1000, solver='lbfgs')
  reg.fit(X = real_data.drop('conversion', axis=1), y = real_data['conversion'])
  reg_protect = LogisticRegression(penalty=None, max_iter=1000, solver='lbfgs')
  reg_protect.fit(protected_data.drop('conversion', axis=1), y = protected_data['conversion'])

  conf_coefs = np.concatenate([[reg.intercept_[0]], reg.coef_.flatten()])
  prot_coefs = np.concatenate([[reg_protect.intercept_[0]], reg_protect.coef_.flatten()])

  MAPD = mean_absolute_percentage_error(conf_coefs, prot_coefs)*100
  MAE = mean_absolute_error(conf_coefs, prot_coefs)
  MSE = mean_squared_error(conf_coefs, prot_coefs)
  
  return MAPD, MAE, MSE

#### Results for $N = 300$.

Including old version of code. We believe to have found some logical errors in the code of Ponte et al., and have addressed these in the new version. The errors were as follows:

1. Inside the inner loop over `noise_multipliers`, you overwrite `train_GAN` and `adversary_training_GAN` with newly generated synthetic data:
    - Initial values before the loop:
        - `train_GAN = scaler0.transform(train)` and `adversary_training_GAN = scaler1.transform(adversary_training)` (both derived from confidential data).
    - After training and sampling for the first noise level, you do:
        - `train_GAN = pd.DataFrame(gen_imgs...)` and `adversary_training_GAN = pd.DataFrame(gen_imgs...)`.
    - On the next noise level in the same outer iter, the GANs are trained on these synthetic datasets instead of the original confidential data.
    - Result: for noise_multipliers[1] and beyond (within the same iter), GANs are indeed fit to synthetic data produced by the previous noise level’s GAN.

In [8]:
# import warnings
# warnings.filterwarnings('ignore')

# import os
# import logging
# import tensorflow as tf
# from absl import logging as absl_logging

# # Suppress low-level TF C++ logs (0=all, 1=INFO, 2=WARNING, 3=ERROR)
# os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"

# # Suppress Python-level TF warnings
# tf.get_logger().setLevel(logging.ERROR)
# logging.getLogger("tensorflow").setLevel(logging.ERROR)
# absl_logging.set_verbosity(absl_logging.ERROR)

# noise_multipliers = all_noise_multipliers['300']
# samples = int(data_sizes[0])

# """iteraties en batch size hetzelfde houden."""
# random.seed(1)
# np.random.seed(1)
# tf.random.set_seed(1)

# start_time = time.time()

# epsilons_13 = np.array([])
# MAPD_col_13 = np.array([])
# MAE_col_13 = np.array([])
# MSE_col_13 = np.array([])

# epsilons_3 = np.array([])
# MAPD_col_3 = np.array([])
# MAE_col_3 = np.array([])
# MSE_col_3 = np.array([])

# epsilons_1 = np.array([])
# MAPD_col_1 = np.array([])
# MAE_col_1 = np.array([])
# MSE_col_1 = np.array([])

# epsilons_05 = np.array([])
# MAPD_col_05 = np.array([])
# MAE_col_05 = np.array([])
# MSE_col_05 = np.array([])

# epsilons_005 = np.array([])
# MAPD_col_005 = np.array([])
# MAE_col_005 = np.array([])
# MSE_col_005 = np.array([])

# epsilons_001 = np.array([])
# MAPD_col_001 = np.array([])
# MAE_col_001 = np.array([])
# MSE_col_001 = np.array([])

# TPR_col = np.array([])
# FPR_col = np.array([])
# TNR_col = np.array([])
# FNR_col = np.array([])

# for iter in range(100):
#   random.seed(iter)
#   np.random.seed(iter)
#   tf.random.set_seed(iter)
#   print("iteration is " + str(iter))
#   sampled_churn = churn.sample(frac = 1, random_state = iter)
#   both_train, evaluation_outside_training = train_test_split(sampled_churn, train_size = int(samples*2/3), test_size = int(samples*1/3), stratify = sampled_churn['conversion'])
#   train, adversary_training = train_test_split(both_train, train_size = int(samples*1/3), stratify=both_train['conversion'])

#   scaler0 = MinMaxScaler(feature_range= (-1, 1))
#   scaler0 = scaler0.fit(train)
#   train_GAN = scaler0.transform(train)
#   train_GAN = pd.DataFrame(train_GAN)

#   scaler1 = MinMaxScaler(feature_range= (-1, 1))
#   scaler1 = scaler1.fit(adversary_training)
#   adversary_training_GAN = scaler1.transform(adversary_training)
#   adversary_training_GAN = pd.DataFrame(adversary_training_GAN)

#   for noise in noise_multipliers: # we vary the noise multipliers here
#     random.seed(iter)
#     np.random.seed(iter)
#     tf.random.set_seed(iter)

#     # setting epsilon
#     N = len(train)
#     batch_size = 100
#     ### change for different data sizes
#     iterations = 10
#     epochs = iterations/(N/batch_size) # should be 10

#     # the noise_multiplier is not directly passed to the GAN, but the GAN code reads it from the global environment
#     noise_multiplier = noise
#     l2_norm_clip = 4 # see paper in validation section.
#     delta= 1/N # should be 1/N
#     theor_epsilon = compute_dp_sgd_privacy(N, batch_size, noise_multiplier,
#                           epochs, delta) # calculate the theoretical bound of epsilon
#     N = len(train)/10 # to prevent naive model
#     num_microbatches = batch_size # see validation section paper.
#     # print("theoretical epsilon = " + str(round(theor_epsilon[0],2))) # print epsilon

#     # train GAN on train data
#     gan_train = GAN(privacy = True)
#     gan_train.train(data = np.array(train_GAN), iterations=iterations, batch_size=batch_size, sample_interval=((iterations-1)/10), model_name = "train_1.h5")

#     # Generate a batch of new customers
#     generator = load_model('train_1.h5')
#     noise = np.random.normal(0, 1, (int(samples*1/3), 16))
#     gen_imgs = generator.predict(noise, verbose = False)
#     gen_imgs = scaler0.inverse_transform(gen_imgs)
#     train_GAN = pd.DataFrame(gen_imgs.reshape(int(samples*1/3), 16))
#     train_GAN.columns = train.columns.values

#     ####################################################
#     # round the values of categorical variables, as done by Ponte et al.
#     ####################################################
#     train_GAN['treatment'] = train_GAN['treatment'].round()
#     train_GAN['conversion'] = train_GAN['conversion'].round()
#     train_GAN['visit'] = train_GAN['visit'].round()
#     train_GAN['exposure'] = train_GAN['exposure'].round()
    
#     # adversary has access to the model and samples another adversary_sample
#     gan_adv = GAN(privacy = True)
#     gan_adv.train(data = np.array(adversary_training_GAN), iterations=iterations, batch_size=batch_size, sample_interval=((iterations-1)/10), model_name = "adversary_1.h5")

#     # Generate a batch of new images
#     generator = load_model('adversary_1.h5')
#     noise = np.random.normal(0, 1, (int(samples*1/3), 16))
#     gen_imgs = generator.predict(noise, verbose = False)
#     gen_imgs = scaler1.inverse_transform(gen_imgs)
#     adversary_training_GAN = pd.DataFrame(gen_imgs.reshape(int(samples*1/3), 16))
#     adversary_training_GAN.columns = adversary_training.columns.values

#     ####################################################
#     # round the values of categorical variables, as done by Ponte et al.
#     ####################################################
#     adversary_training_GAN['treatment'] = adversary_training_GAN['treatment'].round()
#     adversary_training_GAN['conversion'] = adversary_training_GAN['conversion'].round()
#     adversary_training_GAN['visit'] = adversary_training_GAN['visit'].round()
#     adversary_training_GAN['exposure'] = adversary_training_GAN['exposure'].round()

#     # stap 1, 2
#     params = {"bandwidth": np.logspace(-1, 1, 20)}
#     grid_train = GridSearchCV(KernelDensity(), params, n_jobs = -1)
#     grid_train.fit(train_GAN)
#     # print(grid_train.best_estimator_)
#     kde_train = grid_train.best_estimator_

#     grid = GridSearchCV(KernelDensity(), params, n_jobs = -1)
#     grid.fit(adversary_training_GAN)
#     # print(grid.best_estimator_)
#     kde_adversary = grid.best_estimator_

#     # stap 3
#     density_train = kde_train.score_samples(train)
#     density_adversary = kde_adversary.score_samples(train)
#     TPR = sum(density_train > density_adversary)/len(density_train)

#     # stap 4
#     density_train_new = kde_train.score_samples(evaluation_outside_training)
#     density_adversary_new = kde_adversary.score_samples(evaluation_outside_training)
#     FPR = sum(density_train_new > density_adversary_new)/len(density_train_new)
#     TNR = 1 - FPR
#     FNR = 1 - TPR
#     print("FPR is " + str(FPR))
#     print("FNR is " + str(FNR))
#     print("TPR is " + str(TPR))
#     print("TNR is " + str(TNR))

#     TPR_col = np.append(TPR_col, TPR)
#     FPR_col = np.append(FPR_col, FPR)
#     TNR_col = np.append(TNR_col, TNR)
#     FNR_col = np.append(FNR_col, FNR)

#     # utility
#     MAPD_train, MAE_train, MSE_train = utility(real_data = train, protected_data = train_GAN)
#     MAPD_adv, MAE_adv, MSE_adv = utility(real_data = train, protected_data = adversary_training_GAN)
#     MAPD = (MAPD_train+MAPD_adv)/2
#     MAE = (MAE_train+MAE_adv)/2
#     MSE = (MSE_train+MSE_adv)/2
#     # print("MAPD" + str(MAPD))

#     ## to save the results per epsilon (a bit lazy admittedly).
#     if noise_multiplier == noise_multipliers[0]:
#       try:
#         epsilons_13 = np.append(epsilons_13,max(math.log((1 - (1/N) - FPR)/FNR), math.log((1 - (1/N) - FNR)/FPR)))
#         print("empirical epsilon = " + str(max(math.log((1 - (1/N) - FPR)/FNR), math.log((1 - (1/N) - FNR)/FPR))))
#         MAPD_col_13 = np.append(MAPD_col_13, MAPD)
#         MAE_col_13 = np.append(MAE_col_13, MAE)
#         MSE_col_13 = np.append(MSE_col_13, MSE)
#       except:
#         print("undefined privacy risk")
#         epsilons_13 = np.append(epsilons_13, 0)
#         print("empirical epsilon = " + str(0))
#         MAPD_col_13 = np.append(MAPD_col_13, MAPD)
#         MAE_col_13 = np.append(MAE_col_13, MAE)
#         MSE_col_13 = np.append(MSE_col_13, MSE)

#     if noise_multiplier == noise_multipliers[1]:
#       try:
#         epsilons_3 = np.append(epsilons_3,max(math.log((1 - (1/N) - FPR)/FNR), math.log((1 - (1/N) - FNR)/FPR)))
#         print("empirical epsilon = " + str(max(math.log((1 - (1/N) - FPR)/FNR), math.log((1 - (1/N) - FNR)/FPR))))
#         MAPD_col_3 = np.append(MAPD_col_3, MAPD)
#         MAE_col_3 = np.append(MAE_col_3, MAE)
#         MSE_col_3 = np.append(MSE_col_3, MSE)
#       except:
#         print("undefined privacy risk")
#         epsilons_3 = np.append(epsilons_3, 0)
#         print("empirical epsilon = " + str(0))
#         MAPD_col_3 = np.append(MAPD_col_3, MAPD)
#         MAE_col_3 = np.append(MAE_col_3, MAE)
#         MSE_col_3 = np.append(MSE_col_3, MSE)

#     if noise_multiplier == noise_multipliers[2]:
#       try:
#         epsilons_1 = np.append(epsilons_1,max(math.log((1 - (1/N) - FPR)/FNR), math.log((1 - (1/N) - FNR)/FPR)))
#         print("empirical epsilon = " + str(max(math.log((1 - (1/N) - FPR)/FNR), math.log((1 - (1/N) - FNR)/FPR))))
#         MAPD_col_1 = np.append(MAPD_col_1, MAPD)
#         MAE_col_1 = np.append(MAE_col_1, MAE)
#         MSE_col_1 = np.append(MSE_col_1, MSE)
#       except:
#         print("undefined privacy risk")
#         epsilons_1 = np.append(epsilons_1, 0)
#         print("empirical epsilon = " + str(0))
#         MAPD_col_1 = np.append(MAPD_col_1, MAPD)
#         MAE_col_1 = np.append(MAE_col_1, MAE)
#         MSE_col_1 = np.append(MSE_col_1, MSE)

#     if noise_multiplier == noise_multipliers[3]:
#       try:
#         epsilons_05 = np.append(epsilons_05,max(math.log((1 - (1/N) - FPR)/FNR), math.log((1 - (1/N) - FNR)/FPR)))
#         print("empirical epsilon = " + str(max(math.log((1 - (1/N) - FPR)/FNR), math.log((1 - (1/N) - FNR)/FPR))))
#         MAPD_col_05 = np.append(MAPD_col_05, MAPD)
#         MAE_col_05 = np.append(MAE_col_05, MAE)
#         MSE_col_05 = np.append(MSE_col_05, MSE)
#       except:
#         print("undefined privacy risk")
#         epsilons_05 = np.append(epsilons_05, 0)
#         print("empirical epsilon = " + str(0))
#         MAPD_col_05 = np.append(MAPD_col_05, MAPD)
#         MAE_col_05 = np.append(MAE_col_05, MAE)
#         MSE_col_05 = np.append(MSE_col_05, MSE)

#     if noise_multiplier == noise_multipliers[4]:
#       try:
#         epsilons_005 = np.append(epsilons_005,max(math.log((1 - (1/N) - FPR)/FNR), math.log((1 - (1/N) - FNR)/FPR)))
#         print("empirical epsilon = " + str(max(math.log((1 - (1/N) - FPR)/FNR), math.log((1 - (1/N) - FNR)/FPR))))
#         MAPD_col_005 = np.append(MAPD_col_005, MAPD)
#         MAE_col_005 = np.append(MAE_col_005, MAE)
#         MSE_col_005 = np.append(MSE_col_005, MSE)
#       except:
#         print("undefined privacy risk")
#         epsilons_005 = np.append(epsilons_005, 0)
#         print("empirical epsilon = " + str(0))
#         MAPD_col_005 = np.append(MAPD_col_005, MAPD)
#         MAE_col_005 = np.append(MAE_col_005, MAE)
#         MSE_col_005 = np.append(MSE_col_005, MSE)

# end_time = time.time()
# elapsed_time = end_time - start_time
# print(elapsed_time)

# epsilons_13.mean()

# epsilons_3.mean()

# epsilons_1.mean()

# epsilons_05.mean()

# epsilons_005.mean()

# np.savetxt("epsilons_13_" + str(samples) + ".csv", epsilons_13, delimiter=",")
# np.savetxt("MAPD_13_" + str(samples) + ".csv", MAPD_col_13, delimiter=",")
# np.savetxt("MAE_13_" + str(samples) + ".csv", MAE_col_13, delimiter=",")
# np.savetxt("MSE_13_" + str(samples) + ".csv", MSE_col_13, delimiter=",")

# np.savetxt("epsilons_3_" + str(samples) + ".csv", epsilons_3, delimiter=",")
# np.savetxt("MAPD_3_" + str(samples) + ".csv", MAPD_col_3, delimiter=",")
# np.savetxt("MAE_3_" + str(samples) + ".csv", MAE_col_3, delimiter=",")
# np.savetxt("MSE_3_" + str(samples) + ".csv", MSE_col_3, delimiter=",")

# np.savetxt("epsilons_1_" + str(samples) + ".csv", epsilons_1, delimiter=",")
# np.savetxt("MAPD_1_" + str(samples) + ".csv", MAPD_col_1, delimiter=",")
# np.savetxt("MAE_1_" + str(samples) + ".csv", MAE_col_1, delimiter=",")
# np.savetxt("MSE_1_" + str(samples) + ".csv", MSE_col_1, delimiter=",")

# np.savetxt("epsilons_05_" + str(samples) + ".csv", epsilons_05, delimiter=",")
# np.savetxt("MAPD_05_" + str(samples) + ".csv", MAPD_col_05, delimiter=",")
# np.savetxt("MAE_05_" + str(samples) + ".csv", MAE_col_05, delimiter=",")
# np.savetxt("MSE_05_" + str(samples) + ".csv", MSE_col_05, delimiter=",")

# np.savetxt("epsilons_005_" + str(samples) + ".csv", epsilons_005, delimiter=",")
# np.savetxt("MAPD_005_" + str(samples) + ".csv", MAPD_col_005, delimiter=",")
# np.savetxt("MAE_005_" + str(samples) + ".csv", MAE_col_005, delimiter=",")
# np.savetxt("MSE_005_" + str(samples) + ".csv", MSE_col_005, delimiter=",")

Code with our fixes.

In [9]:
import warnings
warnings.filterwarnings('ignore')

import os
import logging
import tensorflow as tf
from absl import logging as absl_logging

# Suppress low-level TF C++ logs (0=all, 1=INFO, 2=WARNING, 3=ERROR)
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"

# Suppress Python-level TF warnings
tf.get_logger().setLevel(logging.ERROR)
logging.getLogger("tensorflow").setLevel(logging.ERROR)
absl_logging.set_verbosity(absl_logging.ERROR)

noise_multipliers = all_noise_multipliers['300']
samples = int(data_sizes[0])

"""iteraties en batch size hetzelfde houden."""
random.seed(1)
np.random.seed(1)
tf.random.set_seed(1)

start_time = time.time()

epsilons_13 = np.array([])
MAPD_col_13 = np.array([])
MAE_col_13 = np.array([])
MSE_col_13 = np.array([])

epsilons_3 = np.array([])
MAPD_col_3 = np.array([])
MAE_col_3 = np.array([])
MSE_col_3 = np.array([])

epsilons_1 = np.array([])
MAPD_col_1 = np.array([])
MAE_col_1 = np.array([])
MSE_col_1 = np.array([])

epsilons_05 = np.array([])
MAPD_col_05 = np.array([])
MAE_col_05 = np.array([])
MSE_col_05 = np.array([])

epsilons_005 = np.array([])
MAPD_col_005 = np.array([])
MAE_col_005 = np.array([])
MSE_col_005 = np.array([])

epsilons_001 = np.array([])
MAPD_col_001 = np.array([])
MAE_col_001 = np.array([])
MSE_col_001 = np.array([])

TPR_col = np.array([])
FPR_col = np.array([])
TNR_col = np.array([])
FNR_col = np.array([])

for iter in range(100):
  random.seed(iter)
  np.random.seed(iter)
  tf.random.set_seed(iter)
  print("iteration is " + str(iter))
  sampled_churn = churn.sample(frac = 1, random_state = iter)
  both_train, evaluation_outside_training = train_test_split(sampled_churn, train_size = int(samples*2/3), test_size = int(samples*1/3), stratify = sampled_churn['conversion'])
  train, adversary_training = train_test_split(both_train, train_size = int(samples*1/3), stratify=both_train['conversion'])

  scaler0 = MinMaxScaler(feature_range= (-1, 1))
  scaler0 = scaler0.fit(train)
  train_GAN_real = scaler0.transform(train)
  train_GAN_real = pd.DataFrame(train_GAN_real)

  scaler1 = MinMaxScaler(feature_range= (-1, 1))
  scaler1 = scaler1.fit(adversary_training)
  adversary_training_GAN_real = scaler1.transform(adversary_training)
  adversary_training_GAN_real = pd.DataFrame(adversary_training_GAN_real)

  for noise in noise_multipliers: # we vary the noise multipliers here
    random.seed(iter)
    np.random.seed(iter)
    tf.random.set_seed(iter)

    # setting epsilon
    N = len(train)
    batch_size = 100
    ### change for different data sizes
    iterations = 10
    epochs = iterations/(N/batch_size) # should be 10

    # the noise_multiplier is not directly passed to the GAN, but the GAN code reads it from the global environment
    noise_multiplier = noise
    l2_norm_clip = 4 # see paper in validation section.
    delta= 1/N # should be 1/N
    theor_epsilon = compute_dp_sgd_privacy(N, batch_size, noise_multiplier,
                          epochs, delta) # calculate the theoretical bound of epsilon
    N = len(train)/10 # to prevent naive model
    num_microbatches = batch_size # see validation section paper.
    # print("theoretical epsilon = " + str(round(theor_epsilon[0],2))) # print epsilon

    # train GAN on train data
    gan_train = GAN(privacy = True)
    gan_train.train(data = np.array(train_GAN_real), iterations=iterations, batch_size=batch_size, sample_interval=((iterations-1)/10), model_name = "train_1.h5")

    # Generate a batch of new customers
    generator = load_model('train_1.h5')
    noise = np.random.normal(0, 1, (int(samples*1/3), 16))
    gen_imgs = generator.predict(noise, verbose = False)
    gen_imgs = scaler0.inverse_transform(gen_imgs)
    train_GAN = pd.DataFrame(gen_imgs.reshape(int(samples*1/3), 16))
    train_GAN.columns = train.columns.values

    ####################################################
    # round the values of categorical variables, as done by Ponte et al.
    ####################################################
    train_GAN['treatment'] = train_GAN['treatment'].round()
    train_GAN['conversion'] = train_GAN['conversion'].round()
    train_GAN['visit'] = train_GAN['visit'].round()
    train_GAN['exposure'] = train_GAN['exposure'].round()
    
    # adversary has access to the model and samples another adversary_sample
    gan_adv = GAN(privacy = True)
    gan_adv.train(data = np.array(adversary_training_GAN_real), iterations=iterations, batch_size=batch_size, sample_interval=((iterations-1)/10), model_name = "adversary_1.h5")

    # Generate a batch of new images
    generator = load_model('adversary_1.h5')
    noise = np.random.normal(0, 1, (int(samples*1/3), 16))
    gen_imgs = generator.predict(noise, verbose = False)
    gen_imgs = scaler1.inverse_transform(gen_imgs)
    adversary_training_GAN = pd.DataFrame(gen_imgs.reshape(int(samples*1/3), 16))
    adversary_training_GAN.columns = adversary_training.columns.values

    ####################################################
    # round the values of categorical variables, as done by Ponte et al.
    ####################################################
    adversary_training_GAN['treatment'] = adversary_training_GAN['treatment'].round()
    adversary_training_GAN['conversion'] = adversary_training_GAN['conversion'].round()
    adversary_training_GAN['visit'] = adversary_training_GAN['visit'].round()
    adversary_training_GAN['exposure'] = adversary_training_GAN['exposure'].round()

    # stap 1, 2
    params = {"bandwidth": np.logspace(-1, 1, 20)}
    grid_train = GridSearchCV(KernelDensity(), params, n_jobs = -1)
    grid_train.fit(train_GAN)
    # print(grid_train.best_estimator_)
    kde_train = grid_train.best_estimator_

    grid = GridSearchCV(KernelDensity(), params, n_jobs = -1)
    grid.fit(adversary_training_GAN)
    # print(grid.best_estimator_)
    kde_adversary = grid.best_estimator_

    # stap 3
    density_train = kde_train.score_samples(train)
    density_adversary = kde_adversary.score_samples(train)
    TPR = sum(density_train > density_adversary)/len(density_train)

    # stap 4
    density_train_new = kde_train.score_samples(evaluation_outside_training)
    density_adversary_new = kde_adversary.score_samples(evaluation_outside_training)
    FPR = sum(density_train_new > density_adversary_new)/len(density_train_new)
    TNR = 1 - FPR
    FNR = 1 - TPR
    print("FPR is " + str(FPR))
    print("FNR is " + str(FNR))
    print("TPR is " + str(TPR))
    print("TNR is " + str(TNR))

    TPR_col = np.append(TPR_col, TPR)
    FPR_col = np.append(FPR_col, FPR)
    TNR_col = np.append(TNR_col, TNR)
    FNR_col = np.append(FNR_col, FNR)

    # utility
    # if model cannot be estimated due to missing two classes in target variable, set utility to np.nan
    try:
      MAPD_train, MAE_train, MSE_train = utility(real_data = train, protected_data = train_GAN)
    except:
      MAPD_train, MAE_train, MSE_train = np.nan, np.nan, np.nan
    try:
      MAPD_adv, MAE_adv, MSE_adv = utility(real_data = train, protected_data = adversary_training_GAN)
    except:
      MAPD_adv, MAE_adv, MSE_adv = np.nan, np.nan, np.nan

    MAPD = (MAPD_train+MAPD_adv)/2
    MAE = (MAE_train+MAE_adv)/2
    MSE = (MSE_train+MSE_adv)/2
    # print("MAPD" + str(MAPD))

    ## to save the results per epsilon (a bit lazy admittedly).
    if noise_multiplier == noise_multipliers[0]:
      try:
        epsilons_13 = np.append(epsilons_13,max(math.log((1 - (1/N) - FPR)/FNR), math.log((1 - (1/N) - FNR)/FPR)))
        print("empirical epsilon = " + str(max(math.log((1 - (1/N) - FPR)/FNR), math.log((1 - (1/N) - FNR)/FPR))))
        MAPD_col_13 = np.append(MAPD_col_13, MAPD)
        MAE_col_13 = np.append(MAE_col_13, MAE)
        MSE_col_13 = np.append(MSE_col_13, MSE)
      except:
        print("undefined privacy risk")
        epsilons_13 = np.append(epsilons_13, 0)
        print("empirical epsilon = " + str(0))
        MAPD_col_13 = np.append(MAPD_col_13, MAPD)
        MAE_col_13 = np.append(MAE_col_13, MAE)
        MSE_col_13 = np.append(MSE_col_13, MSE)

    if noise_multiplier == noise_multipliers[1]:
      try:
        epsilons_3 = np.append(epsilons_3,max(math.log((1 - (1/N) - FPR)/FNR), math.log((1 - (1/N) - FNR)/FPR)))
        print("empirical epsilon = " + str(max(math.log((1 - (1/N) - FPR)/FNR), math.log((1 - (1/N) - FNR)/FPR))))
        MAPD_col_3 = np.append(MAPD_col_3, MAPD)
        MAE_col_3 = np.append(MAE_col_3, MAE)
        MSE_col_3 = np.append(MSE_col_3, MSE)
      except:
        print("undefined privacy risk")
        epsilons_3 = np.append(epsilons_3, 0)
        print("empirical epsilon = " + str(0))
        MAPD_col_3 = np.append(MAPD_col_3, MAPD)
        MAE_col_3 = np.append(MAE_col_3, MAE)
        MSE_col_3 = np.append(MSE_col_3, MSE)

    if noise_multiplier == noise_multipliers[2]:
      try:
        epsilons_1 = np.append(epsilons_1,max(math.log((1 - (1/N) - FPR)/FNR), math.log((1 - (1/N) - FNR)/FPR)))
        print("empirical epsilon = " + str(max(math.log((1 - (1/N) - FPR)/FNR), math.log((1 - (1/N) - FNR)/FPR))))
        MAPD_col_1 = np.append(MAPD_col_1, MAPD)
        MAE_col_1 = np.append(MAE_col_1, MAE)
        MSE_col_1 = np.append(MSE_col_1, MSE)
      except:
        print("undefined privacy risk")
        epsilons_1 = np.append(epsilons_1, 0)
        print("empirical epsilon = " + str(0))
        MAPD_col_1 = np.append(MAPD_col_1, MAPD)
        MAE_col_1 = np.append(MAE_col_1, MAE)
        MSE_col_1 = np.append(MSE_col_1, MSE)

    if noise_multiplier == noise_multipliers[3]:
      try:
        epsilons_05 = np.append(epsilons_05,max(math.log((1 - (1/N) - FPR)/FNR), math.log((1 - (1/N) - FNR)/FPR)))
        print("empirical epsilon = " + str(max(math.log((1 - (1/N) - FPR)/FNR), math.log((1 - (1/N) - FNR)/FPR))))
        MAPD_col_05 = np.append(MAPD_col_05, MAPD)
        MAE_col_05 = np.append(MAE_col_05, MAE)
        MSE_col_05 = np.append(MSE_col_05, MSE)
      except:
        print("undefined privacy risk")
        epsilons_05 = np.append(epsilons_05, 0)
        print("empirical epsilon = " + str(0))
        MAPD_col_05 = np.append(MAPD_col_05, MAPD)
        MAE_col_05 = np.append(MAE_col_05, MAE)
        MSE_col_05 = np.append(MSE_col_05, MSE)

    if noise_multiplier == noise_multipliers[4]:
      try:
        epsilons_005 = np.append(epsilons_005,max(math.log((1 - (1/N) - FPR)/FNR), math.log((1 - (1/N) - FNR)/FPR)))
        print("empirical epsilon = " + str(max(math.log((1 - (1/N) - FPR)/FNR), math.log((1 - (1/N) - FNR)/FPR))))
        MAPD_col_005 = np.append(MAPD_col_005, MAPD)
        MAE_col_005 = np.append(MAE_col_005, MAE)
        MSE_col_005 = np.append(MSE_col_005, MSE)
      except:
        print("undefined privacy risk")
        epsilons_005 = np.append(epsilons_005, 0)
        print("empirical epsilon = " + str(0))
        MAPD_col_005 = np.append(MAPD_col_005, MAPD)
        MAE_col_005 = np.append(MAE_col_005, MAE)
        MSE_col_005 = np.append(MSE_col_005, MSE)

end_time = time.time()
elapsed_time = end_time - start_time
print(elapsed_time)

epsilons_13.mean()

epsilons_3.mean()

epsilons_1.mean()

epsilons_05.mean()

epsilons_005.mean()

np.savetxt("epsilons_13_" + str(samples) + ".csv", epsilons_13, delimiter=",")
np.savetxt("MAPD_13_" + str(samples) + ".csv", MAPD_col_13, delimiter=",")
np.savetxt("MAE_13_" + str(samples) + ".csv", MAE_col_13, delimiter=",")
np.savetxt("MSE_13_" + str(samples) + ".csv", MSE_col_13, delimiter=",")

np.savetxt("epsilons_3_" + str(samples) + ".csv", epsilons_3, delimiter=",")
np.savetxt("MAPD_3_" + str(samples) + ".csv", MAPD_col_3, delimiter=",")
np.savetxt("MAE_3_" + str(samples) + ".csv", MAE_col_3, delimiter=",")
np.savetxt("MSE_3_" + str(samples) + ".csv", MSE_col_3, delimiter=",")

np.savetxt("epsilons_1_" + str(samples) + ".csv", epsilons_1, delimiter=",")
np.savetxt("MAPD_1_" + str(samples) + ".csv", MAPD_col_1, delimiter=",")
np.savetxt("MAE_1_" + str(samples) + ".csv", MAE_col_1, delimiter=",")
np.savetxt("MSE_1_" + str(samples) + ".csv", MSE_col_1, delimiter=",")

np.savetxt("epsilons_05_" + str(samples) + ".csv", epsilons_05, delimiter=",")
np.savetxt("MAPD_05_" + str(samples) + ".csv", MAPD_col_05, delimiter=",")
np.savetxt("MAE_05_" + str(samples) + ".csv", MAE_col_05, delimiter=",")
np.savetxt("MSE_05_" + str(samples) + ".csv", MSE_col_05, delimiter=",")

np.savetxt("epsilons_005_" + str(samples) + ".csv", epsilons_005, delimiter=",")
np.savetxt("MAPD_005_" + str(samples) + ".csv", MAPD_col_005, delimiter=",")
np.savetxt("MAE_005_" + str(samples) + ".csv", MAE_col_005, delimiter=",")
np.savetxt("MSE_005_" + str(samples) + ".csv", MSE_col_005, delimiter=",")

iteration is 0
0 [D loss: 0.692554, acc.: 60.00%] [G loss: 0.806731]
0 [D loss: 0.539967, acc.: 77.00%] [G loss: 0.711346]
FPR is 0.08
FNR is 0.98
TPR is 0.02
TNR is 0.92
undefined privacy risk
empirical epsilon = 0
0 [D loss: 0.692078, acc.: 60.00%] [G loss: 0.804765]
0 [D loss: 0.539446, acc.: 77.00%] [G loss: 0.710956]
FPR is 0.07
FNR is 0.98
TPR is 0.02
TNR is 0.9299999999999999
undefined privacy risk
empirical epsilon = 0
0 [D loss: 0.691724, acc.: 60.00%] [G loss: 0.804498]
0 [D loss: 0.539045, acc.: 78.00%] [G loss: 0.711384]
FPR is 0.07
FNR is 0.97
TPR is 0.03
TNR is 0.9299999999999999
undefined privacy risk
empirical epsilon = 0
0 [D loss: 0.691754, acc.: 60.00%] [G loss: 0.804059]
0 [D loss: 0.538864, acc.: 78.00%] [G loss: 0.711549]
FPR is 0.07
FNR is 0.98
TPR is 0.02
TNR is 0.9299999999999999
undefined privacy risk
empirical epsilon = 0
0 [D loss: 0.691563, acc.: 60.00%] [G loss: 0.803883]
0 [D loss: 0.538742, acc.: 78.50%] [G loss: 0.711494]
FPR is 0.07
FNR is 0.98
TPR is 

#### Results for $N = 3000$

In [10]:
warnings.filterwarnings('ignore')

# Suppress low-level TF C++ logs (0=all, 1=INFO, 2=WARNING, 3=ERROR)
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"

# Suppress Python-level TF warnings
tf.get_logger().setLevel(logging.ERROR)
logging.getLogger("tensorflow").setLevel(logging.ERROR)
absl_logging.set_verbosity(absl_logging.ERROR)

noise_multipliers = all_noise_multipliers['3000']
samples = int(data_sizes[1])

"""iteraties en batch size hetzelfde houden."""
random.seed(1)
np.random.seed(1)
tf.random.set_seed(1)

start_time = time.time()

epsilons_13 = np.array([])
MAPD_col_13 = np.array([])
MAE_col_13 = np.array([])
MSE_col_13 = np.array([])

epsilons_3 = np.array([])
MAPD_col_3 = np.array([])
MAE_col_3 = np.array([])
MSE_col_3 = np.array([])

epsilons_1 = np.array([])
MAPD_col_1 = np.array([])
MAE_col_1 = np.array([])
MSE_col_1 = np.array([])

epsilons_05 = np.array([])
MAPD_col_05 = np.array([])
MAE_col_05 = np.array([])
MSE_col_05 = np.array([])

epsilons_005 = np.array([])
MAPD_col_005 = np.array([])
MAE_col_005 = np.array([])
MSE_col_005 = np.array([])

epsilons_001 = np.array([])
MAPD_col_001 = np.array([])
MAE_col_001 = np.array([])
MSE_col_001 = np.array([])

TPR_col = np.array([])
FPR_col = np.array([])
TNR_col = np.array([])
FNR_col = np.array([])

for iter in range(100):
  random.seed(iter)
  np.random.seed(iter)
  tf.random.set_seed(iter)
  print("iteration is " + str(iter))
  sampled_churn = churn.sample(frac = 1, random_state = iter)
  both_train, evaluation_outside_training = train_test_split(sampled_churn, train_size = int(samples*2/3), test_size = int(samples*1/3), stratify = sampled_churn['conversion'])
  train, adversary_training = train_test_split(both_train, train_size = int(samples*1/3), stratify=both_train['conversion'])

  scaler0 = MinMaxScaler(feature_range= (-1, 1))
  scaler0 = scaler0.fit(train)
  train_GAN_real = scaler0.transform(train)
  train_GAN_real = pd.DataFrame(train_GAN_real)

  scaler1 = MinMaxScaler(feature_range= (-1, 1))
  scaler1 = scaler1.fit(adversary_training)
  adversary_training_GAN_real = scaler1.transform(adversary_training)
  adversary_training_GAN_real = pd.DataFrame(adversary_training_GAN_real)

  for noise in noise_multipliers: # we vary the noise multipliers here
    random.seed(iter)
    np.random.seed(iter)
    tf.random.set_seed(iter)

    # setting epsilon
    N = len(train)
    batch_size = 100
    ### change for different data sizes
    iterations = 100
    epochs = iterations/(N/batch_size) # should be 10

    # the noise_multiplier is not directly passed to the GAN, but the GAN code reads it from the global environment
    noise_multiplier = noise
    l2_norm_clip = 4 # see paper in validation section.
    delta= 1/N # should be 1/N
    theor_epsilon = compute_dp_sgd_privacy(N, batch_size, noise_multiplier,
                          epochs, delta) # calculate the theoretical bound of epsilon
    N = len(train)/10 # to prevent naive model
    num_microbatches = batch_size # see validation section paper.
    # print("theoretical epsilon = " + str(round(theor_epsilon[0],2))) # print epsilon

    # train GAN on train data
    gan_train = GAN(privacy = True)
    gan_train.train(data = np.array(train_GAN_real), iterations=iterations, batch_size=batch_size, sample_interval=((iterations-1)/10), model_name = "train_1.h5")

    # Generate a batch of new customers
    generator = load_model('train_1.h5')
    noise = np.random.normal(0, 1, (int(samples*1/3), 16))
    gen_imgs = generator.predict(noise, verbose = False)
    gen_imgs = scaler0.inverse_transform(gen_imgs)
    train_GAN = pd.DataFrame(gen_imgs.reshape(int(samples*1/3), 16))
    train_GAN.columns = train.columns.values

    ####################################################
    # round the values of categorical variables, as done by Ponte et al.
    ####################################################
    train_GAN['treatment'] = train_GAN['treatment'].round()
    train_GAN['conversion'] = train_GAN['conversion'].round()
    train_GAN['visit'] = train_GAN['visit'].round()
    train_GAN['exposure'] = train_GAN['exposure'].round()
    
    # adversary has access to the model and samples another adversary_sample
    gan_adv = GAN(privacy = True)
    gan_adv.train(data = np.array(adversary_training_GAN_real), iterations=iterations, batch_size=batch_size, sample_interval=((iterations-1)/10), model_name = "adversary_1.h5")

    # Generate a batch of new images
    generator = load_model('adversary_1.h5')
    noise = np.random.normal(0, 1, (int(samples*1/3), 16))
    gen_imgs = generator.predict(noise, verbose = False)
    gen_imgs = scaler1.inverse_transform(gen_imgs)
    adversary_training_GAN = pd.DataFrame(gen_imgs.reshape(int(samples*1/3), 16))
    adversary_training_GAN.columns = adversary_training.columns.values

    ####################################################
    # round the values of categorical variables, as done by Ponte et al.
    ####################################################
    adversary_training_GAN['treatment'] = adversary_training_GAN['treatment'].round()
    adversary_training_GAN['conversion'] = adversary_training_GAN['conversion'].round()
    adversary_training_GAN['visit'] = adversary_training_GAN['visit'].round()
    adversary_training_GAN['exposure'] = adversary_training_GAN['exposure'].round()

    # stap 1, 2
    params = {"bandwidth": np.logspace(-1, 1, 20)}
    grid_train = GridSearchCV(KernelDensity(), params, n_jobs = -1)
    grid_train.fit(train_GAN)
    # print(grid_train.best_estimator_)
    kde_train = grid_train.best_estimator_

    grid = GridSearchCV(KernelDensity(), params, n_jobs = -1)
    grid.fit(adversary_training_GAN)
    # print(grid.best_estimator_)
    kde_adversary = grid.best_estimator_

    # stap 3
    density_train = kde_train.score_samples(train)
    density_adversary = kde_adversary.score_samples(train)
    TPR = sum(density_train > density_adversary)/len(density_train)

    # stap 4
    density_train_new = kde_train.score_samples(evaluation_outside_training)
    density_adversary_new = kde_adversary.score_samples(evaluation_outside_training)
    FPR = sum(density_train_new > density_adversary_new)/len(density_train_new)
    TNR = 1 - FPR
    FNR = 1 - TPR
    print("FPR is " + str(FPR))
    print("FNR is " + str(FNR))
    print("TPR is " + str(TPR))
    print("TNR is " + str(TNR))

    TPR_col = np.append(TPR_col, TPR)
    FPR_col = np.append(FPR_col, FPR)
    TNR_col = np.append(TNR_col, TNR)
    FNR_col = np.append(FNR_col, FNR)

    # utility
    # if model cannot be estimated due to missing two classes in target variable, set utility to np.nan
    try:
      MAPD_train, MAE_train, MSE_train = utility(real_data = train, protected_data = train_GAN)
    except:
      MAPD_train, MAE_train, MSE_train = np.nan, np.nan, np.nan
    try:
      MAPD_adv, MAE_adv, MSE_adv = utility(real_data = train, protected_data = adversary_training_GAN)
    except:
      MAPD_adv, MAE_adv, MSE_adv = np.nan, np.nan, np.nan

    MAPD = (MAPD_train+MAPD_adv)/2
    MAE = (MAE_train+MAE_adv)/2
    MSE = (MSE_train+MSE_adv)/2
    # print("MAPD" + str(MAPD))

    ## to save the results per epsilon (a bit lazy admittedly).
    if noise_multiplier == noise_multipliers[0]:
      try:
        epsilons_13 = np.append(epsilons_13,max(math.log((1 - (1/N) - FPR)/FNR), math.log((1 - (1/N) - FNR)/FPR)))
        print("empirical epsilon = " + str(max(math.log((1 - (1/N) - FPR)/FNR), math.log((1 - (1/N) - FNR)/FPR))))
        MAPD_col_13 = np.append(MAPD_col_13, MAPD)
        MAE_col_13 = np.append(MAE_col_13, MAE)
        MSE_col_13 = np.append(MSE_col_13, MSE)
      except:
        print("undefined privacy risk")
        epsilons_13 = np.append(epsilons_13, 0)
        print("empirical epsilon = " + str(0))
        MAPD_col_13 = np.append(MAPD_col_13, MAPD)
        MAE_col_13 = np.append(MAE_col_13, MAE)
        MSE_col_13 = np.append(MSE_col_13, MSE)

    if noise_multiplier == noise_multipliers[1]:
      try:
        epsilons_3 = np.append(epsilons_3,max(math.log((1 - (1/N) - FPR)/FNR), math.log((1 - (1/N) - FNR)/FPR)))
        print("empirical epsilon = " + str(max(math.log((1 - (1/N) - FPR)/FNR), math.log((1 - (1/N) - FNR)/FPR))))
        MAPD_col_3 = np.append(MAPD_col_3, MAPD)
        MAE_col_3 = np.append(MAE_col_3, MAE)
        MSE_col_3 = np.append(MSE_col_3, MSE)
      except:
        print("undefined privacy risk")
        epsilons_3 = np.append(epsilons_3, 0)
        print("empirical epsilon = " + str(0))
        MAPD_col_3 = np.append(MAPD_col_3, MAPD)
        MAE_col_3 = np.append(MAE_col_3, MAE)
        MSE_col_3 = np.append(MSE_col_3, MSE)

    if noise_multiplier == noise_multipliers[2]:
      try:
        epsilons_1 = np.append(epsilons_1,max(math.log((1 - (1/N) - FPR)/FNR), math.log((1 - (1/N) - FNR)/FPR)))
        print("empirical epsilon = " + str(max(math.log((1 - (1/N) - FPR)/FNR), math.log((1 - (1/N) - FNR)/FPR))))
        MAPD_col_1 = np.append(MAPD_col_1, MAPD)
        MAE_col_1 = np.append(MAE_col_1, MAE)
        MSE_col_1 = np.append(MSE_col_1, MSE)
      except:
        print("undefined privacy risk")
        epsilons_1 = np.append(epsilons_1, 0)
        print("empirical epsilon = " + str(0))
        MAPD_col_1 = np.append(MAPD_col_1, MAPD)
        MAE_col_1 = np.append(MAE_col_1, MAE)
        MSE_col_1 = np.append(MSE_col_1, MSE)

    if noise_multiplier == noise_multipliers[3]:
      try:
        epsilons_05 = np.append(epsilons_05,max(math.log((1 - (1/N) - FPR)/FNR), math.log((1 - (1/N) - FNR)/FPR)))
        print("empirical epsilon = " + str(max(math.log((1 - (1/N) - FPR)/FNR), math.log((1 - (1/N) - FNR)/FPR))))
        MAPD_col_05 = np.append(MAPD_col_05, MAPD)
        MAE_col_05 = np.append(MAE_col_05, MAE)
        MSE_col_05 = np.append(MSE_col_05, MSE)
      except:
        print("undefined privacy risk")
        epsilons_05 = np.append(epsilons_05, 0)
        print("empirical epsilon = " + str(0))
        MAPD_col_05 = np.append(MAPD_col_05, MAPD)
        MAE_col_05 = np.append(MAE_col_05, MAE)
        MSE_col_05 = np.append(MSE_col_05, MSE)

    if noise_multiplier == noise_multipliers[4]:
      try:
        epsilons_005 = np.append(epsilons_005,max(math.log((1 - (1/N) - FPR)/FNR), math.log((1 - (1/N) - FNR)/FPR)))
        print("empirical epsilon = " + str(max(math.log((1 - (1/N) - FPR)/FNR), math.log((1 - (1/N) - FNR)/FPR))))
        MAPD_col_005 = np.append(MAPD_col_005, MAPD)
        MAE_col_005 = np.append(MAE_col_005, MAE)
        MSE_col_005 = np.append(MSE_col_005, MSE)
      except:
        print("undefined privacy risk")
        epsilons_005 = np.append(epsilons_005, 0)
        print("empirical epsilon = " + str(0))
        MAPD_col_005 = np.append(MAPD_col_005, MAPD)
        MAE_col_005 = np.append(MAE_col_005, MAE)
        MSE_col_005 = np.append(MSE_col_005, MSE)

end_time = time.time()
elapsed_time = end_time - start_time
print(elapsed_time)

epsilons_13.mean()

epsilons_3.mean()

epsilons_1.mean()

epsilons_05.mean()

epsilons_005.mean()

np.savetxt("epsilons_13_" + str(samples) + ".csv", epsilons_13, delimiter=",")
np.savetxt("MAPD_13_" + str(samples) + ".csv", MAPD_col_13, delimiter=",")
np.savetxt("MAE_13_" + str(samples) + ".csv", MAE_col_13, delimiter=",")
np.savetxt("MSE_13_" + str(samples) + ".csv", MSE_col_13, delimiter=",")

np.savetxt("epsilons_3_" + str(samples) + ".csv", epsilons_3, delimiter=",")
np.savetxt("MAPD_3_" + str(samples) + ".csv", MAPD_col_3, delimiter=",")
np.savetxt("MAE_3_" + str(samples) + ".csv", MAE_col_3, delimiter=",")
np.savetxt("MSE_3_" + str(samples) + ".csv", MSE_col_3, delimiter=",")

np.savetxt("epsilons_1_" + str(samples) + ".csv", epsilons_1, delimiter=",")
np.savetxt("MAPD_1_" + str(samples) + ".csv", MAPD_col_1, delimiter=",")
np.savetxt("MAE_1_" + str(samples) + ".csv", MAE_col_1, delimiter=",")
np.savetxt("MSE_1_" + str(samples) + ".csv", MSE_col_1, delimiter=",")

np.savetxt("epsilons_05_" + str(samples) + ".csv", epsilons_05, delimiter=",")
np.savetxt("MAPD_05_" + str(samples) + ".csv", MAPD_col_05, delimiter=",")
np.savetxt("MAE_05_" + str(samples) + ".csv", MAE_col_05, delimiter=",")
np.savetxt("MSE_05_" + str(samples) + ".csv", MSE_col_05, delimiter=",")

np.savetxt("epsilons_005_" + str(samples) + ".csv", epsilons_005, delimiter=",")
np.savetxt("MAPD_005_" + str(samples) + ".csv", MAPD_col_005, delimiter=",")
np.savetxt("MAE_005_" + str(samples) + ".csv", MAE_col_005, delimiter=",")
np.savetxt("MSE_005_" + str(samples) + ".csv", MSE_col_005, delimiter=",")

iteration is 0
0 [D loss: 0.700648, acc.: 55.50%] [G loss: 0.797813]
0 [D loss: 0.558144, acc.: 77.50%] [G loss: 0.692582]
FPR is 0.039
FNR is 0.97
TPR is 0.03
TNR is 0.961
empirical epsilon = -0.019782008952038244
0 [D loss: 0.700585, acc.: 55.50%] [G loss: 0.796140]
0 [D loss: 0.558279, acc.: 77.00%] [G loss: 0.691218]
FPR is 0.031
FNR is 0.974
TPR is 0.026
TNR is 0.969
empirical epsilon = -0.015520228759096914
0 [D loss: 0.700627, acc.: 55.50%] [G loss: 0.794305]
0 [D loss: 0.557855, acc.: 77.00%] [G loss: 0.690995]
FPR is 0.027
FNR is 0.968
TPR is 0.032
TNR is 0.973
empirical epsilon = -0.005178675478451415
0 [D loss: 0.700227, acc.: 55.50%] [G loss: 0.794536]
0 [D loss: 0.557446, acc.: 78.50%] [G loss: 0.691513]
FPR is 0.573
FNR is 0.41600000000000004
TPR is 0.584
TNR is 0.42700000000000005
empirical epsilon = 0.002400961537538268
0 [D loss: 0.699930, acc.: 55.50%] [G loss: 0.794198]
0 [D loss: 0.557190, acc.: 79.00%] [G loss: 0.691455]
FPR is 0.0
FNR is 1.0
TPR is 0.0
TNR is 1.0


#### Results for $N = 30000$

In [11]:
warnings.filterwarnings('ignore')

# Suppress low-level TF C++ logs (0=all, 1=INFO, 2=WARNING, 3=ERROR)
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"

# Suppress Python-level TF warnings
tf.get_logger().setLevel(logging.ERROR)
logging.getLogger("tensorflow").setLevel(logging.ERROR)
absl_logging.set_verbosity(absl_logging.ERROR)

noise_multipliers = all_noise_multipliers['30000']
samples = int(data_sizes[2])

"""iteraties en batch size hetzelfde houden."""
random.seed(1)
np.random.seed(1)
tf.random.set_seed(1)

start_time = time.time()

epsilons_13 = np.array([])
MAPD_col_13 = np.array([])
MAE_col_13 = np.array([])
MSE_col_13 = np.array([])

epsilons_3 = np.array([])
MAPD_col_3 = np.array([])
MAE_col_3 = np.array([])
MSE_col_3 = np.array([])

epsilons_1 = np.array([])
MAPD_col_1 = np.array([])
MAE_col_1 = np.array([])
MSE_col_1 = np.array([])

epsilons_05 = np.array([])
MAPD_col_05 = np.array([])
MAE_col_05 = np.array([])
MSE_col_05 = np.array([])

epsilons_005 = np.array([])
MAPD_col_005 = np.array([])
MAE_col_005 = np.array([])
MSE_col_005 = np.array([])

epsilons_001 = np.array([])
MAPD_col_001 = np.array([])
MAE_col_001 = np.array([])
MSE_col_001 = np.array([])

TPR_col = np.array([])
FPR_col = np.array([])
TNR_col = np.array([])
FNR_col = np.array([])

for iter in range(100):
  random.seed(iter)
  np.random.seed(iter)
  tf.random.set_seed(iter)
  print("iteration is " + str(iter))
  sampled_churn = churn.sample(frac = 1, random_state = iter)
  both_train, evaluation_outside_training = train_test_split(sampled_churn, train_size = int(samples*2/3), test_size = int(samples*1/3), stratify = sampled_churn['conversion'])
  train, adversary_training = train_test_split(both_train, train_size = int(samples*1/3), stratify=both_train['conversion'])

  scaler0 = MinMaxScaler(feature_range= (-1, 1))
  scaler0 = scaler0.fit(train)
  train_GAN_real = scaler0.transform(train)
  train_GAN_real = pd.DataFrame(train_GAN_real)

  scaler1 = MinMaxScaler(feature_range= (-1, 1))
  scaler1 = scaler1.fit(adversary_training)
  adversary_training_GAN_real = scaler1.transform(adversary_training)
  adversary_training_GAN_real = pd.DataFrame(adversary_training_GAN_real)

  for noise in noise_multipliers: # we vary the noise multipliers here
    random.seed(iter)
    np.random.seed(iter)
    tf.random.set_seed(iter)

    # setting epsilon
    N = len(train)
    batch_size = 100
    ### change for different data sizes
    iterations = 1000
    epochs = iterations/(N/batch_size) # should be 10

    # the noise_multiplier is not directly passed to the GAN, but the GAN code reads it from the global environment
    noise_multiplier = noise
    l2_norm_clip = 4 # see paper in validation section.
    delta= 1/N # should be 1/N
    theor_epsilon = compute_dp_sgd_privacy(N, batch_size, noise_multiplier,
                          epochs, delta) # calculate the theoretical bound of epsilon
    N = len(train)/10 # to prevent naive model
    num_microbatches = batch_size # see validation section paper.
    # print("theoretical epsilon = " + str(round(theor_epsilon[0],2))) # print epsilon

    # train GAN on train data
    gan_train = GAN(privacy = True)
    gan_train.train(data = np.array(train_GAN_real), iterations=iterations, batch_size=batch_size, sample_interval=((iterations-1)/10), model_name = "train_1.h5")

    # Generate a batch of new customers
    generator = load_model('train_1.h5')
    noise = np.random.normal(0, 1, (int(samples*1/3), 16))
    gen_imgs = generator.predict(noise, verbose = False)
    gen_imgs = scaler0.inverse_transform(gen_imgs)
    train_GAN = pd.DataFrame(gen_imgs.reshape(int(samples*1/3), 16))
    train_GAN.columns = train.columns.values

    ####################################################
    # round the values of categorical variables, as done by Ponte et al.
    ####################################################
    train_GAN['treatment'] = train_GAN['treatment'].round()
    train_GAN['conversion'] = train_GAN['conversion'].round()
    train_GAN['visit'] = train_GAN['visit'].round()
    train_GAN['exposure'] = train_GAN['exposure'].round()
    
    # adversary has access to the model and samples another adversary_sample
    gan_adv = GAN(privacy = True)
    gan_adv.train(data = np.array(adversary_training_GAN_real), iterations=iterations, batch_size=batch_size, sample_interval=((iterations-1)/10), model_name = "adversary_1.h5")

    # Generate a batch of new images
    generator = load_model('adversary_1.h5')
    noise = np.random.normal(0, 1, (int(samples*1/3), 16))
    gen_imgs = generator.predict(noise, verbose = False)
    gen_imgs = scaler1.inverse_transform(gen_imgs)
    adversary_training_GAN = pd.DataFrame(gen_imgs.reshape(int(samples*1/3), 16))
    adversary_training_GAN.columns = adversary_training.columns.values

    ####################################################
    # round the values of categorical variables, as done by Ponte et al.
    ####################################################
    adversary_training_GAN['treatment'] = adversary_training_GAN['treatment'].round()
    adversary_training_GAN['conversion'] = adversary_training_GAN['conversion'].round()
    adversary_training_GAN['visit'] = adversary_training_GAN['visit'].round()
    adversary_training_GAN['exposure'] = adversary_training_GAN['exposure'].round()

    # stap 1, 2
    params = {"bandwidth": np.logspace(-1, 1, 20)}
    grid_train = GridSearchCV(KernelDensity(), params, n_jobs = -1)
    grid_train.fit(train_GAN)
    # print(grid_train.best_estimator_)
    kde_train = grid_train.best_estimator_

    grid = GridSearchCV(KernelDensity(), params, n_jobs = -1)
    grid.fit(adversary_training_GAN)
    # print(grid.best_estimator_)
    kde_adversary = grid.best_estimator_

    # stap 3
    density_train = kde_train.score_samples(train)
    density_adversary = kde_adversary.score_samples(train)
    TPR = sum(density_train > density_adversary)/len(density_train)

    # stap 4
    density_train_new = kde_train.score_samples(evaluation_outside_training)
    density_adversary_new = kde_adversary.score_samples(evaluation_outside_training)
    FPR = sum(density_train_new > density_adversary_new)/len(density_train_new)
    TNR = 1 - FPR
    FNR = 1 - TPR
    print("FPR is " + str(FPR))
    print("FNR is " + str(FNR))
    print("TPR is " + str(TPR))
    print("TNR is " + str(TNR))

    TPR_col = np.append(TPR_col, TPR)
    FPR_col = np.append(FPR_col, FPR)
    TNR_col = np.append(TNR_col, TNR)
    FNR_col = np.append(FNR_col, FNR)

    # utility
    # if model cannot be estimated due to missing two classes in target variable, set utility to np.nan
    try:
      MAPD_train, MAE_train, MSE_train = utility(real_data = train, protected_data = train_GAN)
    except:
      MAPD_train, MAE_train, MSE_train = np.nan, np.nan, np.nan
    try:
      MAPD_adv, MAE_adv, MSE_adv = utility(real_data = train, protected_data = adversary_training_GAN)
    except:
      MAPD_adv, MAE_adv, MSE_adv = np.nan, np.nan, np.nan

    MAPD = (MAPD_train+MAPD_adv)/2
    MAE = (MAE_train+MAE_adv)/2
    MSE = (MSE_train+MSE_adv)/2
    # print("MAPD" + str(MAPD))

    ## to save the results per epsilon (a bit lazy admittedly).
    if noise_multiplier == noise_multipliers[0]:
      try:
        epsilons_13 = np.append(epsilons_13,max(math.log((1 - (1/N) - FPR)/FNR), math.log((1 - (1/N) - FNR)/FPR)))
        print("empirical epsilon = " + str(max(math.log((1 - (1/N) - FPR)/FNR), math.log((1 - (1/N) - FNR)/FPR))))
        MAPD_col_13 = np.append(MAPD_col_13, MAPD)
        MAE_col_13 = np.append(MAE_col_13, MAE)
        MSE_col_13 = np.append(MSE_col_13, MSE)
      except:
        print("undefined privacy risk")
        epsilons_13 = np.append(epsilons_13, 0)
        print("empirical epsilon = " + str(0))
        MAPD_col_13 = np.append(MAPD_col_13, MAPD)
        MAE_col_13 = np.append(MAE_col_13, MAE)
        MSE_col_13 = np.append(MSE_col_13, MSE)

    if noise_multiplier == noise_multipliers[1]:
      try:
        epsilons_3 = np.append(epsilons_3,max(math.log((1 - (1/N) - FPR)/FNR), math.log((1 - (1/N) - FNR)/FPR)))
        print("empirical epsilon = " + str(max(math.log((1 - (1/N) - FPR)/FNR), math.log((1 - (1/N) - FNR)/FPR))))
        MAPD_col_3 = np.append(MAPD_col_3, MAPD)
        MAE_col_3 = np.append(MAE_col_3, MAE)
        MSE_col_3 = np.append(MSE_col_3, MSE)
      except:
        print("undefined privacy risk")
        epsilons_3 = np.append(epsilons_3, 0)
        print("empirical epsilon = " + str(0))
        MAPD_col_3 = np.append(MAPD_col_3, MAPD)
        MAE_col_3 = np.append(MAE_col_3, MAE)
        MSE_col_3 = np.append(MSE_col_3, MSE)

    if noise_multiplier == noise_multipliers[2]:
      try:
        epsilons_1 = np.append(epsilons_1,max(math.log((1 - (1/N) - FPR)/FNR), math.log((1 - (1/N) - FNR)/FPR)))
        print("empirical epsilon = " + str(max(math.log((1 - (1/N) - FPR)/FNR), math.log((1 - (1/N) - FNR)/FPR))))
        MAPD_col_1 = np.append(MAPD_col_1, MAPD)
        MAE_col_1 = np.append(MAE_col_1, MAE)
        MSE_col_1 = np.append(MSE_col_1, MSE)
      except:
        print("undefined privacy risk")
        epsilons_1 = np.append(epsilons_1, 0)
        print("empirical epsilon = " + str(0))
        MAPD_col_1 = np.append(MAPD_col_1, MAPD)
        MAE_col_1 = np.append(MAE_col_1, MAE)
        MSE_col_1 = np.append(MSE_col_1, MSE)

    if noise_multiplier == noise_multipliers[3]:
      try:
        epsilons_05 = np.append(epsilons_05,max(math.log((1 - (1/N) - FPR)/FNR), math.log((1 - (1/N) - FNR)/FPR)))
        print("empirical epsilon = " + str(max(math.log((1 - (1/N) - FPR)/FNR), math.log((1 - (1/N) - FNR)/FPR))))
        MAPD_col_05 = np.append(MAPD_col_05, MAPD)
        MAE_col_05 = np.append(MAE_col_05, MAE)
        MSE_col_05 = np.append(MSE_col_05, MSE)
      except:
        print("undefined privacy risk")
        epsilons_05 = np.append(epsilons_05, 0)
        print("empirical epsilon = " + str(0))
        MAPD_col_05 = np.append(MAPD_col_05, MAPD)
        MAE_col_05 = np.append(MAE_col_05, MAE)
        MSE_col_05 = np.append(MSE_col_05, MSE)

    if noise_multiplier == noise_multipliers[4]:
      try:
        epsilons_005 = np.append(epsilons_005,max(math.log((1 - (1/N) - FPR)/FNR), math.log((1 - (1/N) - FNR)/FPR)))
        print("empirical epsilon = " + str(max(math.log((1 - (1/N) - FPR)/FNR), math.log((1 - (1/N) - FNR)/FPR))))
        MAPD_col_005 = np.append(MAPD_col_005, MAPD)
        MAE_col_005 = np.append(MAE_col_005, MAE)
        MSE_col_005 = np.append(MSE_col_005, MSE)
      except:
        print("undefined privacy risk")
        epsilons_005 = np.append(epsilons_005, 0)
        print("empirical epsilon = " + str(0))
        MAPD_col_005 = np.append(MAPD_col_005, MAPD)
        MAE_col_005 = np.append(MAE_col_005, MAE)
        MSE_col_005 = np.append(MSE_col_005, MSE)

end_time = time.time()
elapsed_time = end_time - start_time
print(elapsed_time)

epsilons_13.mean()

epsilons_3.mean()

epsilons_1.mean()

epsilons_05.mean()

epsilons_005.mean()

np.savetxt("epsilons_13_" + str(samples) + ".csv", epsilons_13, delimiter=",")
np.savetxt("MAPD_13_" + str(samples) + ".csv", MAPD_col_13, delimiter=",")
np.savetxt("MAE_13_" + str(samples) + ".csv", MAE_col_13, delimiter=",")
np.savetxt("MSE_13_" + str(samples) + ".csv", MSE_col_13, delimiter=",")

np.savetxt("epsilons_3_" + str(samples) + ".csv", epsilons_3, delimiter=",")
np.savetxt("MAPD_3_" + str(samples) + ".csv", MAPD_col_3, delimiter=",")
np.savetxt("MAE_3_" + str(samples) + ".csv", MAE_col_3, delimiter=",")
np.savetxt("MSE_3_" + str(samples) + ".csv", MSE_col_3, delimiter=",")

np.savetxt("epsilons_1_" + str(samples) + ".csv", epsilons_1, delimiter=",")
np.savetxt("MAPD_1_" + str(samples) + ".csv", MAPD_col_1, delimiter=",")
np.savetxt("MAE_1_" + str(samples) + ".csv", MAE_col_1, delimiter=",")
np.savetxt("MSE_1_" + str(samples) + ".csv", MSE_col_1, delimiter=",")

np.savetxt("epsilons_05_" + str(samples) + ".csv", epsilons_05, delimiter=",")
np.savetxt("MAPD_05_" + str(samples) + ".csv", MAPD_col_05, delimiter=",")
np.savetxt("MAE_05_" + str(samples) + ".csv", MAE_col_05, delimiter=",")
np.savetxt("MSE_05_" + str(samples) + ".csv", MSE_col_05, delimiter=",")

np.savetxt("epsilons_005_" + str(samples) + ".csv", epsilons_005, delimiter=",")
np.savetxt("MAPD_005_" + str(samples) + ".csv", MAPD_col_005, delimiter=",")
np.savetxt("MAE_005_" + str(samples) + ".csv", MAE_col_005, delimiter=",")
np.savetxt("MSE_005_" + str(samples) + ".csv", MSE_col_005, delimiter=",")

iteration is 0
0 [D loss: 0.695153, acc.: 57.00%] [G loss: 0.810395]
100 [D loss: 0.757300, acc.: 43.50%] [G loss: 0.534327]
200 [D loss: 0.611082, acc.: 65.00%] [G loss: 0.681880]
300 [D loss: 0.625804, acc.: 58.50%] [G loss: 0.800837]
400 [D loss: 0.677451, acc.: 49.50%] [G loss: 0.747677]
500 [D loss: 0.731227, acc.: 39.50%] [G loss: 0.664198]
600 [D loss: 0.731589, acc.: 39.00%] [G loss: 0.640361]
700 [D loss: 0.704183, acc.: 54.50%] [G loss: 0.842898]
800 [D loss: 0.671729, acc.: 55.00%] [G loss: 0.662926]
900 [D loss: 0.567506, acc.: 80.00%] [G loss: 0.858781]
0 [D loss: 0.551878, acc.: 81.00%] [G loss: 0.694537]
100 [D loss: 0.595369, acc.: 73.00%] [G loss: 0.850014]
200 [D loss: 0.513830, acc.: 90.00%] [G loss: 0.914746]
300 [D loss: 0.688025, acc.: 65.50%] [G loss: 0.832280]
400 [D loss: 0.521459, acc.: 83.50%] [G loss: 0.939046]
500 [D loss: 0.494869, acc.: 88.00%] [G loss: 1.084590]
600 [D loss: 0.773750, acc.: 38.50%] [G loss: 0.525988]
700 [D loss: 0.521005, acc.: 79.50%] 