In [1]:
import sys
import tensorflow as tf
from tensorflow.keras.layers import Input, Conv2D, Dense, Flatten, Dropout, MaxPooling2D, BatchNormalization, Reshape
from tensorflow.keras.models import Model
from tensorflow.keras.callbacks import Callback, EarlyStopping
from sklearn.model_selection import train_test_split
import time
import pandas as pd
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder, LabelEncoder

from tensorflow_privacy.privacy.analysis.rdp_accountant import compute_rdp
from tensorflow_privacy.privacy.analysis.rdp_accountant import get_privacy_spent
from tensorflow_privacy.privacy.optimizers.dp_optimizer_keras import DPKerasAdamOptimizer
from tensorflow_privacy.privacy.optimizers.dp_optimizer_keras_vectorized import VectorizedDPKerasAdamOptimizer
from tensorflow_privacy.privacy.analysis.compute_noise_from_budget_lib import compute_noise


from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack import membership_inference_attack as mia
from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.data_structures import AttackInputData
from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.data_structures import SlicingSpec
from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.data_structures import AttackType

import tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.plotting as plotting

import numpy as np
import scipy as scp
from scipy import special

import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
headers = ['age','workclass','fnlwgt','education','education-num','marital-status',
           'occupation','relationship','race','sex','capital-gain','capital-loss',
           'hours-per-week','native-country','class']
adult = pd.read_csv('../../datasets/adult.data', 
                    sep=', ', names=headers, na_values='?', engine='python')

# Drop all records with missing values
adult.dropna(inplace=True)
adult.reset_index(drop=True, inplace=True)
# Drop fnlwgt, not interesting for ML
adult.drop('fnlwgt', axis=1, inplace=True)
adult.drop('education', axis=1, inplace=True)

# Convert objects to categories
obj_columns = adult.select_dtypes(['object']).columns
adult[obj_columns] = adult[obj_columns].astype('category')

num_columns = adult.select_dtypes(['int64']).columns
adult[num_columns] = adult[num_columns].astype('float64')
for c in num_columns:
    adult[c] /= (adult[c].max()-adult[c].min())
adult['class'] = adult['class'].cat.codes

obj_columns = adult.select_dtypes(['category']).columns

adult.replace(['Divorced', 
               'Married-AF-spouse', 
               'Married-civ-spouse', 
               'Married-spouse-absent',
               'Never-married',
               'Separated',
               'Widowed'
              ],
              ['not married',
               'married',
               'married',
               'married',
               'not married',
               'not married',
               'not married'
              ], inplace = True)

adult = pd.get_dummies(adult, columns=obj_columns)
X = np.array(adult.drop('class', axis=1))
y = np.array(adult['class'])
y = np.eye(2)[y]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33)

In [3]:
learning_rate = 15e-5

In [4]:
def membership_inference_attack(model, X_train, X_test, y_train, y_test):
    print('Predict on train...')
    logits_train = model.predict(X_train, batch_size=batch_size)
    print('Predict on test...')
    logits_test = model.predict(X_test, batch_size=batch_size)

    print('Apply softmax to get probabilities from logits...')
    prob_train = special.softmax(logits_train, axis=1)
    prob_test = special.softmax(logits_test, axis=1)

    print('Compute losses...')
    cce = tf.keras.backend.categorical_crossentropy
    constant = tf.keras.backend.constant

    loss_train = cce(constant(y_train), constant(prob_train), from_logits=False).numpy()
    loss_test = cce(constant(y_test), constant(prob_test), from_logits=False).numpy()
    
    labels_train = np.argmax(y_train, axis=1)
    labels_test = np.argmax(y_test, axis=1)

    input = AttackInputData(
      logits_train = logits_train,
      logits_test = logits_test,
      loss_train = loss_train,
      loss_test = loss_test,
      labels_train = labels_train,
      labels_test = labels_test
    )

    # Run several attacks for different data slices
    attacks_result = mia.run_attacks(input,
                                     SlicingSpec(
                                         entire_dataset = True,
                                         by_class = True,
                                         by_classification_correctness = True
                                     ),
                                     attack_types = [
                                         AttackType.THRESHOLD_ATTACK,
                                         AttackType.LOGISTIC_REGRESSION,
                                         AttackType.MULTI_LAYERED_PERCEPTRON,
                                         AttackType.RANDOM_FOREST, 
                                         AttackType.K_NEAREST_NEIGHBORS,
                                         AttackType.THRESHOLD_ENTROPY_ATTACK
                                     ])

    # Plot the ROC curve of the best classifier
#     fig = plotting.plot_roc_curve(
#         attacks_result.get_result_with_max_auc().roc_curve)

    # Print a user-friendly summary of the attacks
    print(attacks_result.summary(by_slices = True))
    time.sleep(5)
    return attacks_result.get_result_with_max_auc().get_auc(), attacks_result.get_result_with_max_attacker_advantage().get_attacker_advantage()

callback = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=3)

In [5]:
def create_nn(dropout=None, regularizer=None):
    input_data = Input(shape = X_train[0].shape)
    x = Dense(40, activation='relu', kernel_regularizer=regularizer)(input_data)
    if dropout is not None:
        x = Dropout(dropout)(x)
    x = Dense(40, activation='relu', kernel_regularizer=regularizer)(x)
    if dropout is not None:
        x = Dropout(dropout)(x)
    output = Dense(2, kernel_regularizer=regularizer)(x)

    model = Model(input_data, output)
    
    loss = tf.keras.losses.CategoricalCrossentropy(from_logits=True)
            
    model.compile(optimizer='adam', loss=loss, metrics=['accuracy'])
    
    return model

def create_dp_nn(noise_multiplier, l2_norm_clip, microbatches):
    input_data = Input(shape = X_train[0].shape)
    x = Dense(40, activation='relu')(input_data)
    x = Dense(40, activation='relu')(x)
    output = Dense(2)(x)

    model = Model(input_data, output)
    
    optimizer = DPKerasAdamOptimizer(
                            l2_norm_clip=l2_norm_clip,
                            noise_multiplier=noise_multiplier,
                            num_microbatches=microbatches,
                            learning_rate=learning_rate)
    
    loss = tf.keras.losses.CategoricalCrossentropy(from_logits=True, reduction=tf.losses.Reduction.NONE)
    
    model.compile(optimizer=optimizer,
              loss=loss,
              metrics=['accuracy'])
    
    return model

In [6]:
epochs = 50
batch_size = 48
attacks = 1
settings = [
    (None,None),
    (0.25,None),
    (0.50,None),
    (0.75,None),
    (None,'l2'),
    (0.25,'l2'),
    (0.50,'l2'),
    (0.75,'l2'),
]
results_summary = []

for drop, reg in settings:
    # Instantiate network
    model = create_nn(dropout=drop, regularizer=reg)
    
    # Train network until convergence
    start_time = time.time()
    r = model.fit(X_train, 
                y_train, 
                validation_data=(X_test, y_test),
                epochs=epochs, 
                batch_size=batch_size
               )
    end_time = time.time()
    time_elapsed = (end_time - start_time)

    # MIA 
    aauc = []
    aadv = []
    for _ in range(attacks):
        auc, adv = membership_inference_attack(model, X_train, X_test, y_train, y_test)
        time.sleep(5)
        aauc.append(auc)
        aadv.append(adv)
    mauc = sum(aauc) / attacks
    madv = sum(aadv) / attacks

    # Write result summary
    summ = ', '.join(map(str,[
        len(r.history['loss']), #epochs
        drop,
        reg,
        r.history['loss'][-1], 
        r.history['val_loss'][-1],
        r.history['accuracy'][-1],
        r.history['val_accuracy'][-1],
        time_elapsed,
        mauc,
        madv
    ]))

    results_summary.append(summ)
    print('='*40)
    
    
print('Epochs, Dropout, Regularizer, Loss, Val loss, Accuracy, Val accuracy, Time, AUC, Advantage')
for r in results_summary:
    print(r)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50

KeyboardInterrupt: 

In [17]:
# TEST FOR DIFFERENT EPS
results_summary = []

n = X_train.shape[0]
epochs = 50
batch_size = 48
microbatches = 48
epsilons = [0.1,0.5,1,2,4,8,16,100,1000]
delta = 1e-6
min_noise = 1e-100
l2_norm_clip = 2.5
sampling_rate = batch_size / n
attacks = 1

for e in epsilons:
    # Compute noise multiplier from target epsilon
    noise_multiplier = compute_noise(n, batch_size, e, epochs, delta, min_noise)
    
    # Compute epsilon
    orders = [1 + x / 10. for x in range(1, 100)] + list(range(11, 101))
    sampling_probability = batch_size / n
    rdp = compute_rdp(q=sampling_probability,
                    noise_multiplier=noise_multiplier,
                    steps=epochs * n // batch_size,
                    orders=orders)
    eps = get_privacy_spent(orders, rdp, target_delta=delta)

    # Instantiate network
    model = create_dp_nn(noise_multiplier, l2_norm_clip, microbatches)

    # Train network
    start_time = time.time()
    r = model.fit(X_train, 
                 y_train, 
                 validation_data=(X_test, y_test), 
                 epochs=epochs, 
                 batch_size=batch_size,
                 #callbacks=[callback]
                )
    end_time = time.time()
    time_elapsed = (end_time - start_time)

    # MIA 
    aauc = []
    aadv = []
    for _ in range(attacks):
        auc, adv = membership_inference_attack(model, X_train, X_test, y_train, y_test)
        aauc.append(auc)
        aadv.append(adv)
    mauc = sum(aauc) / attacks
    madv = sum(aadv) / attacks

    # Write result summary
    summ = ', '.join(map(str,[
          len(r.history['loss']),
          e,
          delta,
          l2_norm_clip,
          noise_multiplier,
          sampling_rate,
          eps[0],
          r.history['loss'][-1], 
          r.history['val_loss'][-1],
          r.history['accuracy'][-1],
          r.history['val_accuracy'][-1],
          time_elapsed,
          mauc,
          madv
    ]))
    results_summary.append(summ)
    print('='*40)

    
print('Epochs, Target epsilon, delta, C, Sigma, Sampling rate, Epsilon, Loss, Val loss, Accuracy, Val accuracy, Time, AUC, Advantage')
for r in results_summary:
    print(r)

DP-SGD with sampling rate = 0.238% and noise_multiplier = 17994.001391472444 iterated over 21050 steps satisfies differential privacy with eps = 0.01 and delta = 1e-06.
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
Predict on train...
Predict on test...
Apply softmax to get probabilities from logits...
Compute losses...
Best-performing attacks over all slices
  RANDOM_FOREST (with 7501 training and 7501 test examples) achieved an AUC of 0.53 on slice 

DP-SGD with sampling rate = 0.238% and noise_multiplier = 14.309830337352679 iterated over 21050 steps satisfies differential privacy with eps = 0.1 and delta = 1e-06.
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
Predict on train...
Predict on test...
Apply softmax to get probabilities from logits...
Compute losses...
Best-performing attacks over all slices
  MULTI_LAYERED_PERCEPTRON (with 2453 training and 2453 test examples) achieved an AUC of 0.54

DP-SGD with sampling rate = 0.238% and noise_multiplier = 3.0864306316599435 iterated over 21050 steps satisfies differential privacy with eps = 0.5 and delta = 1e-06.
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
Predict on train...
Predict on test...
Apply softmax to get probabilities from logits...
Compute losses...
Best-performing attacks over all slices
  K_NEAREST_NEIGHBORS (with 1974 training and 1974 test examples) achieved an AUC of 0.54 on s

DP-SGD with sampling rate = 0.238% and noise_multiplier = 1.7217127690788603 iterated over 21050 steps satisfies differential privacy with eps = 1 and delta = 1e-06.
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
Predict on train...
Predict on test...
Apply softmax to get probabilities from logits...
Compute losses...
Best-performing attacks over all slices
  LOGISTIC_REGRESSION (with 2453 training and 2453 test examples) achieved an AUC of 0.52 on sli

DP-SGD with sampling rate = 0.238% and noise_multiplier = 1.0674083630051678 iterated over 21050 steps satisfies differential privacy with eps = 2 and delta = 1e-06.
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
Predict on train...
Predict on test...
Apply softmax to get probabilities from logits...
Compute losses...
Best-performing attacks over all slices
  MULTI_LAYERED_PERCEPTRON (with 2453 training and 2453 test examples) achieved an AUC of 0.53 o

DP-SGD with sampling rate = 0.238% and noise_multiplier = 0.7767693318298682 iterated over 21050 steps satisfies differential privacy with eps = 4 and delta = 1e-06.
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
Predict on train...
Predict on test...
Apply softmax to get probabilities from logits...
Compute losses...
Best-performing attacks over all slices
  LOGISTIC_REGRESSION (with 2453 training and 2453 test examples) achieved an AUC of 0.53 on sli

DP-SGD with sampling rate = 0.238% and noise_multiplier = 0.6223068197048801 iterated over 21050 steps satisfies differential privacy with eps = 8 and delta = 1e-06.
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
Predict on train...
Predict on test...
Apply softmax to get probabilities from logits...
Compute losses...
Best-performing attacks over all slices
  K_NEAREST_NEIGHBORS (with 1790 training and 1790 test examples) achieved an AUC of 0.55 on sli

DP-SGD with sampling rate = 0.238% and noise_multiplier = 0.5172600554247768 iterated over 21050 steps satisfies differential privacy with eps = 16 and delta = 1e-06.
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
Predict on train...
Predict on test...
Apply softmax to get probabilities from logits...
Compute losses...
Best-performing attacks over all slices
  LOGISTIC_REGRESSION (with 2453 training and 2453 test examples) achieved an AUC of 0.52 on sl

DP-SGD with sampling rate = 0.238% and noise_multiplier = 0.3320461699705749 iterated over 21050 steps satisfies differential privacy with eps = 100 and delta = 1e-06.
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
Predict on train...
Predict on test...
Apply softmax to get probabilities from logits...
Compute losses...
Best-performing attacks over all slices
  LOGISTIC_REGRESSION (with 8197 training and 8197 test examples) achieved an AUC of 0.52 on s

DP-SGD with sampling rate = 0.238% and noise_multiplier = 0.21836654222821442 iterated over 21050 steps satisfies differential privacy with eps = 1e+03 and delta = 1e-06.
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
Predict on train...
Predict on test...
Apply softmax to get probabilities from logits...
Compute losses...
Best-performing attacks over all slices
  RANDOM_FOREST (with 1709 training and 1709 test examples) achieved an AUC of 0.52 on slic

Epochs, Target epsilon, delta, C, Sigma, Sampling rate, Epsilon, Loss, Val loss, Accuracy, Val accuracy, Time, AUC, Advantage
50, 0.01, 1e-06, 2.5, 17994.001391472444, 0.0023752969121140144, 0, 0.7798415422439575, 0.7803100943565369, 0.2673198878765106, 0.26029735803604126, 759.0008347034454, 0.5253518123667378, 0.07423043609949465
50, 0.1, 1e-06, 2.5, 14.309830337352679, 0.0023752969121140144, 0.11208687744399892, 0.4863673448562622, 0.4875877797603607, 0.7498515248298645, 0.7535663843154907, 774.4385318756104, 0.5437999160427438, 0.08373142180019238
50, 0.5, 1e-06, 2.5, 3.0864306316599435, 0.0023752969121140144, 0.5000000000000254, 0.44302308559417725, 0.4543653428554535, 0.8076504468917847, 0.8016877770423889, 784.9998731613159, 0.5400670110288985, 0.07836430677254846
50, 1, 1e-06, 2.5, 1.7217127690788603, 0.0023752969121140144, 0.9999999999996306, 0.4501935541629791, 0.46728575229644775, 0.8186856508255005, 0.8090214729309082, 741.9407825469971, 0.5186605629387165, 0.08210010042988

In [73]:
noise_multiplier = compute_noise(n, batch_size, 1000, epochs, delta, min_noise)
orders = np.arange(1.1,10,0.1) + np.arange()
sampling_probability = batch_size / n
rdp = compute_rdp(q=sampling_probability,
                    noise_multiplier=noise_multiplier,
                    steps=epochs * n // batch_size,
                    orders=orders)
get_privacy_spent(orders, rdp, target_delta=delta)

DP-SGD with sampling rate = 0.238% and noise_multiplier = 0.21836654222821442 iterated over 21050 steps satisfies differential privacy with eps = 1e+03 and delta = 1e-06.


ValueError: math domain error

In [81]:
orders = list(np.arange(1.1,10,0.1))
orders

[1.1,
 1.2000000000000002,
 1.3000000000000003,
 1.4000000000000004,
 1.5000000000000004,
 1.6000000000000005,
 1.7000000000000006,
 1.8000000000000007,
 1.9000000000000008,
 2.000000000000001,
 2.100000000000001,
 2.200000000000001,
 2.300000000000001,
 2.4000000000000012,
 2.5000000000000013,
 2.6000000000000014,
 2.7000000000000015,
 2.8000000000000016,
 2.9000000000000017,
 3.0000000000000018,
 3.100000000000002,
 3.200000000000002,
 3.300000000000002,
 3.400000000000002,
 3.500000000000002,
 3.6000000000000023,
 3.7000000000000024,
 3.8000000000000025,
 3.9000000000000026,
 4.000000000000003,
 4.100000000000003,
 4.200000000000003,
 4.3000000000000025,
 4.400000000000003,
 4.5000000000000036,
 4.600000000000003,
 4.700000000000003,
 4.800000000000003,
 4.900000000000004,
 5.0000000000000036,
 5.100000000000003,
 5.200000000000003,
 5.300000000000004,
 5.400000000000004,
 5.5000000000000036,
 5.600000000000003,
 5.700000000000005,
 5.800000000000004,
 5.900000000000004,
 6.00000000

In [None]:
i = 10
1ei