### Assess empirical epsilon for Anand & Lee (2022) Method

In [1]:
import math
import numpy as np
import statistics
from sklearn import metrics
from __future__ import print_function, division
from functools import partial
from sklearn.preprocessing import MinMaxScaler
import tensorflow as tf
import argparse
import keras
from tensorflow.keras import backend as K
from sklearn.linear_model import LinearRegression
import sys
import matplotlib.pyplot as plt
from tensorflow.keras.optimizers import Adam
import pandas as pd
import io
from keras.models import load_model
import time
from scipy.stats import pearsonr
from keras.layers import Input, Dense, Reshape, Flatten, Dropout, multiply, GaussianNoise
from keras.layers import BatchNormalization, Activation, Embedding, ZeroPadding2D
from keras.layers import MaxPooling2D, LeakyReLU
from keras.layers.convolutional import UpSampling2D, Conv2D, Conv1D
from keras.models import Sequential, Model
from keras import losses
import keras.backend as K
from sklearn.model_selection import GridSearchCV
from sklearn.neighbors import KernelDensity
import os
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
import random

In [2]:
# set global seeds
seed=1
os.environ['PYTHONHASHSEED'] = str(seed)
# For working on GPUs from "TensorFlow Determinism"
os.environ["TF_DETERMINISTIC_OPS"] = str(seed)
np.random.seed(seed)
random.seed(seed)
tf.random.set_seed(seed)
print(random.random())

0.13436424411240122


In [3]:
# # define utility
# def utility(real_data, protected_data):
#   from sklearn.linear_model import LinearRegression
#   from sklearn.metrics import mean_absolute_percentage_error, mean_absolute_error, mean_squared_error
#   reg = LinearRegression()
#   reg.fit(np.array(real_data)[:,1:9],np.array(real_data)[:,0])
#   reg_protect = LinearRegression()
#   reg_protect.fit(np.array(protected_data)[:,1:9],np.array(protected_data)[:,0])
#   MAPD = mean_absolute_percentage_error(reg.coef_, reg_protect.coef_)*100
#   MAE = mean_absolute_error(reg.coef_, reg_protect.coef_)
#   MSE = mean_squared_error(reg.coef_, reg_protect.coef_)
#   return MAPD, MAE, MSE

In [4]:
class GAN():
    def __init__(self, privacy):
      self.img_rows = 1
      self.img_cols = 1
      self.img_shape = (self.img_cols,)
      self.latent_dim = (1)

      optimizer = keras.optimizers.Adam()
      self.discriminator = self.build_discriminator()
      self.discriminator.compile(loss='binary_crossentropy',
                                 optimizer=optimizer,
                                 metrics=['accuracy'])
      if privacy == True:
        print("using differential privacy")
        # Build and compile the discriminator
        self.discriminator = self.build_discriminator()
        self.discriminator.compile(optimizer=DPKerasAdamOptimizer(
            l2_norm_clip=l2_norm_clip,
            noise_multiplier=noise_multiplier,
            num_microbatches=num_microbatches,
            learning_rate=lr),
            loss= tf.keras.losses.BinaryCrossentropy(from_logits=True, reduction=tf.losses.Reduction.NONE), metrics=['accuracy'])

      # Build the generator
      self.generator = self.build_generator()

      # The generator takes noise as input and generates imgs
      z = Input(shape=(self.latent_dim,))
      img = self.generator(z)

      # For the combined model we will only train the generator
      self.discriminator.trainable = False

      # The discriminator takes generated images as input and determines validity
      valid = self.discriminator(img)

      # The combined model  (stacked generator and discriminator)
      # Trains the generator to fool the discriminator
      self.combined = Model(z, valid)
      self.combined.compile(loss='binary_crossentropy', optimizer= optimizer)


    def build_generator(self):
      model = Sequential()
      model.add(Dense(self.latent_dim, input_dim=self.latent_dim))
      model.add(LeakyReLU(alpha=0.2))
      #model.add(BatchNormalization())
      model.add(Dense(1024, input_shape=self.img_shape))
      model.add(LeakyReLU(alpha=0.2))
      #model.add(BatchNormalization())
      model.add(Dense(self.latent_dim))
      model.add(Activation("tanh"))

      #model.summary()

      noise = Input(shape=(self.latent_dim,))
      img = model(noise)
      return Model(noise, img)

    def build_discriminator(self):

        model = Sequential()

        model.add(Dense(1024, input_shape=self.img_shape))
        model.add(LeakyReLU(alpha=0.2))
        model.add(Dense(1, activation='sigmoid'))

        #model.summary()

        img = Input(shape=self.img_shape)
        validity = model(img)

        return Model(img, validity)

    def train(self, data, iterations, batch_size, model_name, generator_losses = [], discriminator_acc = [], correlations = [], accuracy = [], MAPD_col = [], MSE_col = [], MAE_col = []):
      # Adversarial ground truths

      valid = np.ones((batch_size, 1))
      fake = np.zeros((batch_size, 1))
      corr = 0
      MAPD = 0
      MSE = 0
      MAE = 0
      #fake += 0.05 * np.random.random(fake.shape)
      #valid += 0.05 * np.random.random(valid.shape)

      for epoch in range(iterations):

            # ---------------------
            #  Train Discriminator
            # ---------------------

            # Select a random batch of images
            idx = np.random.randint(0, data.shape[0], batch_size)
            imgs = data[idx]

            noise = np.random.normal(0, 1, (batch_size, self.latent_dim))

            # Generate a batch of new images
            gen_imgs = self.generator.predict(noise, verbose = False)

            # Train the discriminator
            d_loss_real = self.discriminator.train_on_batch(imgs, valid)
            d_loss_fake = self.discriminator.train_on_batch(gen_imgs, fake)
            d_loss = 0.5 * np.add(d_loss_real, d_loss_fake)

            # ---------------------
            #  Train Generator
            # ---------------------
            # Train the generator (to have the discriminator label samples as valid)

            noise = np.random.normal(0, 1, (batch_size, self.latent_dim))
            g_loss = self.combined.train_on_batch(noise, valid)

            # collect losses
            discriminator_acc = np.append(discriminator_acc, 100*d_loss[1])
            generator_losses = np.append(generator_losses, g_loss)
      self.generator.save(model_name)
              #print ("%d [D loss: %f, acc.: %.2f%%] [G loss: %f, corr: %f, MAPD: %f, MSE: %f, MAE: %f]" % (epoch, d_loss[0], 100*d_loss[1], g_loss, corr, MAPD, MSE, MAE))

In [5]:
random.seed(1)
np.random.seed(1)
tf.random.set_seed(1)
epsilons = np.array([])
# MAPD_col = np.array([])
# MAE_col = np.array([])
# MSE_col = np.array([])

Import data.

In [6]:
train_data = pd.read_csv("../../Data/Criteo/cleaned_criteo_small.gz",
                         compression='gzip', 
                         sep='\,',
                         header=0,
                         engine='python')

In [7]:
train_data

Unnamed: 0,f0,f1,f2,f3,f4,f5,f6,f7,f8,f9,f10,f11,treatment,conversion,visit,exposure
0,2.954807,2.368327,2.178833,1.480282,2.330251,20.349663,0.000205,2.100938,48.158740,2.579463,1.667778,0.84478,1,0,0,0
1,3.100481,2.308533,2.105887,107.757316,2.330251,61.279994,0.275765,1.575636,53.083067,2.579463,1.667778,0.84478,1,0,0,0
2,2.945821,2.308533,2.139045,9.914112,2.330251,61.279994,0.003786,1.575636,46.910867,3.486706,1.667778,0.84478,0,0,0,0
3,3.177230,2.308533,2.105887,107.757316,2.330251,61.279994,0.089715,1.575636,53.083067,2.579463,1.667778,0.84478,0,0,0,0
4,3.274234,2.308533,2.105887,107.757316,2.330251,61.279994,0.000003,1.575636,53.083067,2.579463,1.667778,0.84478,1,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
99995,3.037003,2.308533,2.184994,107.757316,2.330251,61.279994,0.275765,1.575636,47.003884,2.579463,1.667778,0.84478,1,0,0,0
99996,3.286020,2.308533,2.105887,107.757316,2.330251,61.279994,0.275765,1.575636,53.083067,2.579463,1.667778,0.84478,1,0,0,0
99997,3.285632,2.308533,2.105887,107.757316,2.330251,61.279994,0.275765,1.575636,53.083067,2.579463,1.667778,0.84478,1,0,0,0
99998,2.534995,2.308533,2.108605,107.757316,2.330251,61.279994,1.342378,1.575636,43.484412,3.572938,1.667778,0.84478,1,0,0,0


Drop duplicates.

In [8]:
train_data = pd.DataFrame.drop_duplicates(train_data)

Function to split into train and test sets, ensuring that train has an even number of rows.

In [9]:
def train_test_split_even(X, train_size):
    # Split the data normally
    X_train, X_test = train_test_split(
        X, train_size=train_size
    )
    
    # If train set has odd number of rows
    if len(X_train) % 2 != 0:
        # Move the last row from train to test
        X_test = pd.concat([X_test, X_train[-1:]], axis=0)
        X_train = X_train[:-1]
    
    return X_train, X_test

Step through loop one step at a time.

In [10]:
# current_data_sample, _ = train_test_split(
#     train_data, 
#     train_size=samples, 
#     stratify=train_data['conversion'], 
#     random_state=seed
# )

In [11]:
# iter=0

In [12]:
# random.seed(iter)
# np.random.seed(iter)
# tf.random.set_seed(iter)
# # Split data
# internal_data, external_data = train_test_split_even(current_data_sample, train_size=0.67)
# marketer_train, adversary_train = train_test_split(internal_data, train_size=0.5)
# N = len(marketer_train)/10

In [13]:
# marketer_train

In [14]:
# adversary_train

In [15]:
# marketer_train_outcome = marketer_train[['f0']]
# marketer_train_covariates = marketer_train.drop('f0', axis=1)

In [16]:
# marketer_train_outcome

In [17]:
# marketer_train_covariates

In [18]:
# adversary_train_outcome = adversary_train[['f0']]
# adversary_train_covariates = adversary_train.drop('f0', axis=1)

In [19]:
# scaler0 = MinMaxScaler(feature_range = (-1, 1))
# scaler0 = scaler0.fit(marketer_train_outcome)
# marketer_train_outcome = scaler0.transform(marketer_train_outcome)
# marketer_train_outcome = pd.DataFrame(marketer_train_outcome)

In [20]:
# marketer_train_outcome

In [21]:
# print("start train set training")

In [22]:
# gan_train = GAN(privacy = False)

In [23]:
# gan_train.train(data = np.array(marketer_train_outcome), iterations=iterations, batch_size=batch_size, model_name = "train_anand.h5")

In [24]:
# # Generate a batch of new customers
# generator = load_model('train_anand.h5', compile = True)

In [25]:
# noise = np.random.normal(0, 1, (len(marketer_train_outcome), 1))
# gen_imgs = generator.predict(noise, verbose = False)
# gen_imgs = scaler0.inverse_transform(gen_imgs)
# gen_imgs = gen_imgs.reshape(len(marketer_train_outcome), 1)
# marketer_train_GAN = pd.DataFrame(gen_imgs)

In [26]:
# marketer_train_GAN

In [27]:
# # adversary has access to the model and samples another adversary_sample
# print("start adversary set training")

# scaler1 = MinMaxScaler(feature_range = (-1, 1))
# scaler1 = scaler1.fit(adversary_train_outcome)
# adversary_train_outcome = scaler1.transform(adversary_train_outcome)
# adversary_train_outcome = pd.DataFrame(adversary_train_outcome)

# gan_adv = GAN(privacy = False)
# gan_adv.train(data = np.array(adversary_train_outcome), iterations=iterations, batch_size=batch_size, model_name = "adversary_anand.h5")

# generator = load_model('adversary_anand.h5', compile = True)

# noise = np.random.normal(0, 1, (len(adversary_train_outcome), 1))
# # Generate a batch of new images
# gen_imgs = generator.predict(noise, verbose = False)
# gen_imgs = scaler1.inverse_transform(gen_imgs)
# gen_imgs = gen_imgs.reshape(len(adversary_train_outcome), 1)
# adversary_train_GAN = pd.DataFrame(gen_imgs)

In [28]:
# # combine one protected variable with other
# marketer_synthetic = pd.concat([marketer_train_covariates.reset_index(drop = True), marketer_train_GAN], axis=1)
# adversary_synthetic = pd.concat([adversary_train_covariates.reset_index(drop = True), adversary_train_GAN], axis=1)

In [29]:
# marketer_synthetic

In [30]:
# # stap 1, 2
# marketer_synthetic.rename(columns = {0:'f0'}, inplace = True)
# adversary_synthetic.rename(columns = {0:'f0'}, inplace = True)
# params = {"bandwidth": np.logspace(-1, 1, 20)}
# grid_marketer = GridSearchCV(KernelDensity(), params, n_jobs = -1)
# grid_marketer.fit(marketer_synthetic)
# marketer_kde = grid_marketer.best_estimator_

In [31]:
# marketer_synthetic

In [32]:
# grid_adversary = GridSearchCV(KernelDensity(), params, n_jobs = -1)
# grid_adversary.fit(adversary_synthetic)
# adversary_kde = grid_adversary.best_estimator_
# external_data = external_data[marketer_synthetic.columns]
# marketer_train = marketer_train[marketer_synthetic.columns]

In [33]:
# # stap 3
# density_marketer = marketer_kde.score_samples(marketer_train) # f1
# density_adversary = adversary_kde.score_samples(marketer_train) # f2
# # print(density_train > density_adversary)  # f1 > f2
# TPR = sum(density_marketer > density_adversary)/len(density_marketer) # all training!

In [34]:
# # stap 4
# density_marketer_new = marketer_kde.score_samples(external_data) # f1
# density_adversary_new = adversary_kde.score_samples(external_data) # f2
# FPR = sum(density_marketer_new > density_adversary_new)/len(density_marketer_new) # random!

In [35]:
# TNR = 1 - FPR
# FNR = 1 - TPR

# risk_vals = [(1 - (1/N) - FPR)/FNR, (1 - (1/N) - FNR)/FPR]

# math.log(risk_vals[np.argmax(risk_vals)])

Perform privacy simulation for 100 iterations.

In [36]:
var_to_synthesize = 'f0'
epsilons = {}

In [37]:
import warnings
warnings.filterwarnings('ignore') 

# loop over sample sizes
for data_size in [300, 3000, 30000]:

    epsilons[str(data_size)] = []

    # perform 100 simulations
    for iter in range(100):

        # therefore we want to have the same number of epochs for smaller sample sizes
        samples = int(data_size*3)
        iterations = 1000
        batch_size = 100
        epochs = iterations/(data_size/batch_size)

        # sample from train_data according to current training data size
        current_data_sample, _ = train_test_split(train_data, 
                                                  train_size=samples, 
                                                  stratify=train_data['conversion'], 
                                                  random_state=seed+iter
                                                  )

        # set seeds
        random.seed(iter)
        np.random.seed(iter)
        tf.random.set_seed(iter)

        # Split data into internal and external, and split internal into marketer and adversary
        internal_data, external_data = train_test_split_even(current_data_sample, train_size=0.67)
        marketer_train, adversary_train = train_test_split(internal_data, train_size=0.5)
        N = len(marketer_train)/10

        # select the variable to synthesize
        marketer_train_outcome = marketer_train[[var_to_synthesize]]
        marketer_train_covariates = marketer_train.drop(var_to_synthesize, axis=1)

        adversary_train_outcome = adversary_train[[var_to_synthesize]]
        adversary_train_covariates = adversary_train.drop(var_to_synthesize, axis=1)

        # perform min-max scaling on variable to synthesize
        scaler0 = MinMaxScaler(feature_range = (-1, 1))
        scaler0 = scaler0.fit(marketer_train_outcome)
        marketer_train_outcome = scaler0.transform(marketer_train_outcome)
        marketer_train_outcome = pd.DataFrame(marketer_train_outcome)

        # train GAN on marketer train data
        gan_train = GAN(privacy = False)
        gan_train.train(data = np.array(marketer_train_outcome), iterations=iterations, batch_size=batch_size, model_name = "train_anand.h5")

        # Generate a batch of new customers
        generator = load_model('train_anand.h5', compile = True)
        noise = np.random.normal(0, 1, (len(marketer_train_outcome), 1))
        gen_imgs = generator.predict(noise, verbose = False)
        gen_imgs = scaler0.inverse_transform(gen_imgs)
        gen_imgs = gen_imgs.reshape(len(marketer_train_outcome), 1)
        marketer_train_GAN = pd.DataFrame(gen_imgs)

        scaler1 = MinMaxScaler(feature_range = (-1, 1))
        scaler1 = scaler1.fit(adversary_train_outcome)
        adversary_train_outcome = scaler1.transform(adversary_train_outcome)
        adversary_train_outcome = pd.DataFrame(adversary_train_outcome)

        gan_adv = GAN(privacy = False)
        gan_adv.train(data = np.array(adversary_train_outcome), iterations=iterations, batch_size=batch_size, model_name = "adversary_anand.h5")

        generator = load_model('adversary_anand.h5', compile = True)
        noise = np.random.normal(0, 1, (len(adversary_train_outcome), 1))
        # Generate a batch of new images
        gen_imgs = generator.predict(noise, verbose = False)
        gen_imgs = scaler1.inverse_transform(gen_imgs)
        gen_imgs = gen_imgs.reshape(len(adversary_train_outcome), 1)
        adversary_train_GAN = pd.DataFrame(gen_imgs)

        # combine synthetic variables with other unprotected covariates
        marketer_synthetic = pd.concat([marketer_train_covariates.reset_index(drop = True), marketer_train_GAN], axis=1)
        adversary_synthetic = pd.concat([adversary_train_covariates.reset_index(drop = True), adversary_train_GAN], axis=1)

        # rename synthetic variables appropriately
        marketer_synthetic.rename(columns = {0:'f0'}, inplace = True)
        adversary_synthetic.rename(columns = {0:'f0'}, inplace = True)

        # fit marketer kernel density estimate to marketer synthetic data
        params = {"bandwidth": np.logspace(-1, 1, 20)}
        grid_marketer = GridSearchCV(KernelDensity(), params, n_jobs = -1)
        grid_marketer.fit(marketer_synthetic)
        marketer_kde = grid_marketer.best_estimator_

        # fit adversary kernel density estimate to adversary synthetic data
        grid_adversary = GridSearchCV(KernelDensity(), params, n_jobs = -1)
        grid_adversary.fit(adversary_synthetic)
        adversary_kde = grid_adversary.best_estimator_

        # reorder columns in external data and marketer train to match order in synthetic data sets
        external_data = external_data[marketer_synthetic.columns]
        marketer_train = marketer_train[marketer_synthetic.columns]

        # compute density estimate for marketer training data using marketer kde and adversary kde
        density_marketer = marketer_kde.score_samples(marketer_train)
        density_adversary = adversary_kde.score_samples(marketer_train)
        TPR = sum(density_marketer > density_adversary)/len(density_marketer)

        # compute same type of density estimates but for the external data
        density_marketer_new = marketer_kde.score_samples(external_data)
        density_adversary_new = adversary_kde.score_samples(external_data)
        FPR = sum(density_marketer_new > density_adversary_new)/len(density_marketer_new)

        # compute empirical epsilon
        TNR = 1 - FPR
        FNR = 1 - TPR
        risk_vals = [(1 - (1/N) - FPR)/FNR, (1 - (1/N) - FNR)/FPR]
        eps_val = math.log(risk_vals[np.argmax(risk_vals)])

        epsilons[str(data_size)].append(eps_val)        



In [38]:
epsilon_results = pd.DataFrame.from_dict(epsilons)
epsilon_results.to_csv('anand_empirical_epsilon_results.csv', index=False)

In [39]:
epsilon_results

Unnamed: 0,300,3000,30000
0,-0.003120,0.499265,0.077857
1,0.101209,1.521037,0.091245
2,-0.027053,0.131938,0.050818
3,inf,0.353071,0.076570
4,0.375646,0.182313,0.184459
...,...,...,...
95,0.303278,0.193810,0.144463
96,inf,0.174953,0.071056
97,0.230347,0.349426,0.102287
98,-0.010017,0.082261,0.069750


In [40]:
epsilon_results.max(axis=0)

300           inf
3000     2.812081
30000    0.961609
dtype: float64

In [None]:
# for iter in range(0,100):
#   random.seed(iter)
#   np.random.seed(iter)
#   tf.random.set_seed(iter)
#   # Split data
#   internal_data, external_data = train_test_split_even(current_data_sample, train_size=0.67)
#   marketer_train, adversary_train = train_test_split(internal_data, train_size=0.5)
#   N = len(marketer_train)/10

  # train_outcome = train_original[['Tenure']]
  # train_covariates = train_original.drop('Tenure', axis=1)

  # adversary_training_outcome = adversary_training[['Tenure']]
  # adversary_training_covariates = adversary_training.drop('Tenure', axis=1)

  
  # scaler0 = MinMaxScaler(feature_range= (-1, 1))
  # scaler0 = scaler0.fit(train_outcome)
  # train_outcome = scaler0.transform(train_outcome)
  # train_outcome = pd.DataFrame(train_outcome)

  # print("start train set training")
  # gan_train = GAN(privacy = False)
  # gan_train.train(data = np.array(train_outcome), iterations=iterations, batch_size=batch_size, model_name = "train_anand.h5")

  # # Generate a batch of new customers
  # generator = load_model('train_anand.h5', compile = True)
  # noise = np.random.normal(0, 1, (len(train_outcome), 1))
  # gen_imgs = generator.predict(noise, verbose = False)
  # gen_imgs = scaler0.inverse_transform(gen_imgs)
  # gen_imgs = gen_imgs.reshape(len(train_outcome), 1)
  # train_GAN = pd.DataFrame(gen_imgs)

  # # adversary has access to the model and samples another adversary_sample
  # print("start adversary set training")

  # scaler1 = MinMaxScaler(feature_range= (-1, 1))
  # scaler1 = scaler1.fit(adversary_training_outcome)
  # adversary_training_outcome = scaler1.transform(adversary_training_outcome)
  # adversary_training_outcome = pd.DataFrame(adversary_training_outcome)

  # gan_adv = GAN(privacy = False)
  # gan_adv.train(data = np.array(adversary_training_outcome), iterations=iterations, batch_size=batch_size, model_name = "adversary_anand.h5")

  # generator = load_model('adversary_anand.h5', compile = True)
  # generated_data = []

  # noise = np.random.normal(0, 1, (len(adversary_training_outcome), 1))
  # # Generate a batch of new images
  # gen_imgs = generator.predict(noise, verbose = False)
  # gen_imgs = scaler1.inverse_transform(gen_imgs)
  # gen_imgs = gen_imgs.reshape(len(adversary_training_outcome), 1)
  # adversary_training_GAN = pd.DataFrame(gen_imgs)

  # # combine one protected variable with other
  # train = pd.concat([train_covariates.reset_index(drop = True), train_GAN], axis=1)
  # adversary = pd.concat([adversary_training_covariates.reset_index(drop = True), adversary_training_GAN], axis=1)

  # # stap 1, 2
  # train.rename(columns = {0:'Tenure'}, inplace = True)
  # adversary.rename(columns = {0:'Tenure'}, inplace = True)
  # params = {"bandwidth": np.logspace(-1, 1, 20)}
  # grid_train = GridSearchCV(KernelDensity(), params, n_jobs = -1)
  # grid_train.fit(train)
  # kde_train = grid_train.best_estimator_

  # params = {"bandwidth": np.logspace(-1, 1, 20)}
  # grid = GridSearchCV(KernelDensity(), params, n_jobs = -1)
  # grid.fit(adversary)
  # kde_adversary = grid.best_estimator_
  # evaluation_outside_training = evaluation_outside_training[['Churn','Sex', 'Age', 'Contact', 'Household_size', 'Social_class', 'Income', 'Ethnicity', 'Tenure']]

  # # stap 3
  # density_train = kde_train.score_samples(train) # f1
  # density_adversary = kde_adversary.score_samples(train) # f2
  # #print(density_train > density_adversary)  # f1 > f2
  # TPR = sum(density_train > density_adversary)/len(density_train) # all training!

  # # stap 4
  # density_train_new = kde_train.score_samples(evaluation_outside_training) # f1
  # density_adversary_new = kde_adversary.score_samples(evaluation_outside_training) # f2
  # #density_train_new > density_adversary_new  # f1 > f2
  # #print(density_train_new > density_adversary_new)  # f1 > f2
  # FPR = sum(density_train_new > density_adversary_new)/len(density_train_new) # random!

  # TNR = 1 - FPR
  # FNR = 1 - TPR
  # print("FPR is " + str(FPR))
  # print("FNR is " + str(FNR))
  # print("TPR is " + str(TPR))
  # print("TNR is " + str(TNR))
  # try:
  #   epsilons = np.append(epsilons,max(math.log((1 - (1/N) - FPR)/FNR), math.log((1 - (1/N) - FNR)/FPR)))
  #   print("empirical epsilon = " + str(max(math.log((1 - (1/N) - FPR)/FNR), math.log((1 - (1/N) - FNR)/FPR))))
  # except:
  #   epsilons = np.append(epsilons, math.log((1 - (1/N) - FPR)/FNR))
  #   print("empirical epsilon = " + str(math.log((1 - (1/N) - FPR)/FNR)))

  # # utility
  # MAPD_train, MAE_train, MSE_train = utility(real_data = train, protected_data = train_GAN)
  # MAPD_adv, MAE_adv, MSE_adv = utility(real_data = train, protected_data = adversary_training_GAN)
  # MAPD_col = np.append(MAPD_col, ((MAPD_train+MAPD_adv)/2))
  # MAE_col = np.append(MAE_col, ((MAE_train+MAE_adv)/2))
  # MSE_col = np.append(MSE_col, ((MSE_train+MSE_adv)/2))
  # print("MAPD train = " + str(MAPD_train))
  # print("MAPD adversary = " + str(MAPD_adv))

# np.savetxt("epsilons_anand_30000.csv", epsilons, delimiter=",")
# np.savetxt("MAPD_anand_30000.csv", MAPD_col, delimiter=",")
# np.savetxt("MAE_anand_30000.csv", MAE_col, delimiter=",")
# np.savetxt("MSE_anand_30000.csv", MSE_col, delimiter=",")