Perform the privacy attack outlined by [Ponte et al. (2024)](https://github.com/GilianPonte/likelihood_based_privacy_attack).

In [6]:
# first we need to import some dependencies.
import pandas as pd
import numpy as np

# import privacy attack
# !pip install git+https://github.com/GilianPonte/likelihood_based_privacy_attack.git
from likelihood_based_privacy_attack import attacks

# here we import the external data, train data and adversary training data (unprotected)
evaluation_outside_training = pd.read_csv("external_data.csv")
train = pd.read_csv("train_data.csv")
adversary_training = pd.read_csv("adv_data.csv")

In [None]:
# here we import the protected train data and adversary training data
synth_train = pd.read_csv("synthetic_train/sd_0.csv")
synth_adversary_training = pd.read_csv("synthetic_adv/sd_0.csv")

# apply privacy attack
attacks.privacy_attack(seed = 1, 
                       simulations = 10, 
                       train = train, 
                       adversary = adversary_training, 
                       outside_training = evaluation_outside_training,
                       protected_training = synth_train, 
                       protected_adversary = synth_adversary_training)

iteration is 0
FPR is 0.9534988713318284
FNR is 0.04804270462633453
TPR is 0.9519572953736655
TNR is 0.046501128668171576
empirical epsilon = -0.006301914657026955
iteration is 1
FPR is 0.9534988713318284
FNR is 0.04804270462633453
TPR is 0.9519572953736655
TNR is 0.046501128668171576
empirical epsilon = -0.006301914657026955
iteration is 2
FPR is 0.9534988713318284
FNR is 0.04804270462633453
TPR is 0.9519572953736655
TNR is 0.046501128668171576
empirical epsilon = -0.006301914657026955
iteration is 3
FPR is 0.9534988713318284
FNR is 0.04804270462633453
TPR is 0.9519572953736655
TNR is 0.046501128668171576
empirical epsilon = -0.006301914657026955
iteration is 4
FPR is 0.9534988713318284
FNR is 0.04804270462633453
TPR is 0.9519572953736655
TNR is 0.046501128668171576
empirical epsilon = -0.006301914657026955
iteration is 5
FPR is 0.9534988713318284
FNR is 0.04804270462633453
TPR is 0.9519572953736655
TNR is 0.046501128668171576
empirical epsilon = -0.006301914657026955
iteration is 6
F

(array([-0.00630191, -0.00630191, -0.00630191, -0.00630191, -0.00630191,
        -0.00630191, -0.00630191, -0.00630191, -0.00630191, -0.00630191]),
 0.9534988713318284,
 0.046501128668171576,
 0.04804270462633453,
 0.9519572953736655)

In [11]:
results = []
for i in range(20):
    synth_train = pd.read_csv(f"synthetic_train/sd_{i}.csv")
    synth_adversary_training = pd.read_csv(f"synthetic_adv/sd_{i}.csv")
    result = attacks.privacy_attack(
        seed=1,
        simulations=10,
        train=train,
        adversary=adversary_training,
        outside_training=evaluation_outside_training,
        protected_training=synth_train,
        protected_adversary=synth_adversary_training
    )
    results.append(np.mean(result[0]))

iteration is 0
FPR is 0.9534988713318284
FNR is 0.04804270462633453
TPR is 0.9519572953736655
TNR is 0.046501128668171576
empirical epsilon = -0.006301914657026955
iteration is 1
FPR is 0.9534988713318284
FNR is 0.04804270462633453
TPR is 0.9519572953736655
TNR is 0.046501128668171576
empirical epsilon = -0.006301914657026955
iteration is 2
FPR is 0.9534988713318284
FNR is 0.04804270462633453
TPR is 0.9519572953736655
TNR is 0.046501128668171576
empirical epsilon = -0.006301914657026955
iteration is 3
FPR is 0.9534988713318284
FNR is 0.04804270462633453
TPR is 0.9519572953736655
TNR is 0.046501128668171576
empirical epsilon = -0.006301914657026955
iteration is 4
FPR is 0.9534988713318284
FNR is 0.04804270462633453
TPR is 0.9519572953736655
TNR is 0.046501128668171576
empirical epsilon = -0.006301914657026955
iteration is 5
FPR is 0.9534988713318284
FNR is 0.04804270462633453
TPR is 0.9519572953736655
TNR is 0.046501128668171576
empirical epsilon = -0.006301914657026955
iteration is 6
F

In [12]:
results

[-0.006301914657026955,
 0.057459346734184215,
 0.11042341032251335,
 0.023185507032990913,
 -0.0013203114834791226,
 0.15240463651665953,
 0.17260694200364485,
 0.008400986126911921,
 0.17004233768332846,
 0.023707984414492688,
 0.0385259068025922,
 0.1383314155479634,
 0.04716964383108578,
 0.27828136479447213,
 -0.0012682839544010311,
 0.14383829299339287,
 0.10221974134456754,
 0.02454248869081174,
 0.1853216799753475,
 -0.0034086048715830848]

Repeat for data generated using synthpop.

In [2]:
results_synthpop = []
for i in range(3):
    synth_train = pd.read_csv(f"synthetic_train/synthpop_train_data_{i}.csv")
    synth_adversary_training = pd.read_csv(f"synthetic_adv/synthpop_adv_data_{i}.csv")
    result = attacks.privacy_attack(
        seed=1,
        simulations=10,
        train=train,
        adversary=adversary_training,
        outside_training=evaluation_outside_training,
        protected_training=synth_train,
        protected_adversary=synth_adversary_training
    )
    results_synthpop.append(np.mean(result[0]))

iteration is 0
FPR is 0.4690744920993228
FNR is 0.47820284697508897
TPR is 0.521797153024911
TNR is 0.5309255079006772
empirical epsilon = 0.09795563288509145
iteration is 1
FPR is 0.4690744920993228
FNR is 0.47820284697508897
TPR is 0.521797153024911
TNR is 0.5309255079006772
empirical epsilon = 0.09795563288509145
iteration is 2
FPR is 0.4690744920993228
FNR is 0.47820284697508897
TPR is 0.521797153024911
TNR is 0.5309255079006772
empirical epsilon = 0.09795563288509145
iteration is 3
FPR is 0.4690744920993228
FNR is 0.47820284697508897
TPR is 0.521797153024911
TNR is 0.5309255079006772
empirical epsilon = 0.09795563288509145
iteration is 4
FPR is 0.4690744920993228
FNR is 0.47820284697508897
TPR is 0.521797153024911
TNR is 0.5309255079006772
empirical epsilon = 0.09795563288509145
iteration is 5
FPR is 0.4690744920993228
FNR is 0.47820284697508897
TPR is 0.521797153024911
TNR is 0.5309255079006772
empirical epsilon = 0.09795563288509145
iteration is 6
FPR is 0.4690744920993228
FNR i

What are the results for just sampled data?



In [7]:
# apply privacy attack
attacks.privacy_attack(seed = 1, 
                       simulations = 10, 
                       train = train, 
                       adversary = adversary_training, 
                       outside_training = evaluation_outside_training,
                       protected_training = train, 
                       protected_adversary = adversary_training)

iteration is 0
FPR is 0.5103503184713376
FNR is 0.3252351097178683
TPR is 0.6747648902821317
TNR is 0.4896496815286624
empirical epsilon = 0.39300709519211946
iteration is 1
FPR is 0.5103503184713376
FNR is 0.3252351097178683
TPR is 0.6747648902821317
TNR is 0.4896496815286624
empirical epsilon = 0.39300709519211946
iteration is 2
FPR is 0.5103503184713376
FNR is 0.3252351097178683
TPR is 0.6747648902821317
TNR is 0.4896496815286624
empirical epsilon = 0.39300709519211946
iteration is 3
FPR is 0.5103503184713376
FNR is 0.3252351097178683
TPR is 0.6747648902821317
TNR is 0.4896496815286624
empirical epsilon = 0.39300709519211946
iteration is 4
FPR is 0.5103503184713376
FNR is 0.3252351097178683
TPR is 0.6747648902821317
TNR is 0.4896496815286624
empirical epsilon = 0.39300709519211946
iteration is 5
FPR is 0.5103503184713376
FNR is 0.3252351097178683
TPR is 0.6747648902821317
TNR is 0.4896496815286624
empirical epsilon = 0.39300709519211946
iteration is 6
FPR is 0.5103503184713376
FNR i

(array([0.3930071, 0.3930071, 0.3930071, 0.3930071, 0.3930071, 0.3930071,
        0.3930071, 0.3930071, 0.3930071, 0.3930071]),
 0.5103503184713376,
 0.4896496815286624,
 0.3252351097178683,
 0.6747648902821317)