# Requirement:
- numpy 1.19.5
- tensorflow 2.5.0
- tensorflow_privacy 0.6.1
- sklearn 0.24.2

Before performing the experiments, you need to fit the **pickle_file**(path for tabular QMNIST data) according to your own PC.

In [1]:
import numpy as np

import pickle
import tensorflow as tf
from tensorflow.keras import layers
import random as python_random

from sklearn.model_selection import train_test_split

from sklearn import metrics
from sklearn.metrics import log_loss
from sklearn.metrics import accuracy_score
from sklearn.metrics.pairwise import euclidean_distances

# Load QMNIST data.

In [2]:
pickle_file = '/home/jiangnan/Desktop/ppml-workshop/data/QMNIST_tabular_ppml.pickle'

with open(pickle_file, 'rb') as f:
  pickle_data = pickle.load(f)
  x_defender = pickle_data['x_defender']
  x_reserve = pickle_data['x_reserve']
  y_defender = pickle_data['y_defender']
  y_reserve = pickle_data['y_reserve']
  del pickle_data
print('Data loaded.')

Data loaded.


In [3]:
NUM_CLASSES = 10

y_defender = y_defender[:,0]
y_reserve = y_reserve[:,0]

#y_defender = np.expand_dims(y_defender,axis=1)
#y_reserve = np.expand_dims(y_reserve,axis=1)

#y_defender = tf.keras.utils.to_categorical(y_defender, num_classes=NUM_CLASSES)
#y_reserve = tf.keras.utils.to_categorical(y_reserve, num_classes=NUM_CLASSES)

# Defender model $M_D$

import the defender model which need to be tested.

In [4]:
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KDTree
from sklearn.svm import SVC

def defender_model_fn(data_in):
    """The architecture of the defender (victim) model.
    The attack is white-box, hence the attacker is assumed to know this architecture too."""
    random_seeds = random.sample(range(0,100), 2)
    np.random.seed(random_seeds[0])
    python_random.seed(random_seeds[1])
    
    model = KDTree(data_in)
    
    return model

# if random_select, randomly extract two points from both defender & reserve dataset, and repeat for n_extract times
random_select = True
n_extract = 100

#if given_index, the attack will put the two left out points back to their original place
given_index = False
n_sample = 6  #( 7 for 48 - 3200, 8 for 48 - 6400, etc.)

# Oracle attack model $M_A$

In [5]:
import random
from tqdm import tqdm

In [6]:
def create_mock_defender_models(defender, data_in, data_out, n_records = 48, random_select = True, n_extract = 100, given_index = False):
    
    similarities_in = []
    similarities_out = []
    
    number_loop = 0
    
    if random_select == True:
        number_loop = n_extract
    else:
        number_loop = data_in[0].shape[0]
        
    for i in tqdm(range(number_loop)):

        if random_select == True:
            index = random.randint(0,n_records-1)
        else:
            index = i

        evaluation_data_in = data_in[0][index]
        evaluation_label_in = data_in[1][index]

        evaluation_data_out = data_out[0][index]
        evaluation_label_out = data_out[1][index]

        evaluation_data = np.array([evaluation_data_in, evaluation_data_out])
        evaluation_label = np.array([evaluation_label_in, evaluation_label_out])

        evaluation = evaluation_data, evaluation_label


        attack_train_data_in = np.delete(data_in[0], index, axis=0)
        attack_train_label_in = np.delete(data_in[1], index, axis=0)

        attack_in = attack_train_data_in, attack_train_label_in


        attack_train_data_out = np.delete(data_out[0], index, axis=0)
        attack_train_label_out = np.delete(data_out[1], index, axis=0)

        attack_out = attack_train_data_out, attack_train_label_out

        predict = defender_model.query(attack_in[0],k=1)
        
        if given_index == True:

            attack_in_plus_one_in = np.insert(attack_in[0], index, evaluation[0][0].reshape(1,attack_in[0].shape[1]), axis=0), np.insert(attack_in[1], index, evaluation[1][0], axis=0)
            attack_in_plus_one_out = np.insert(attack_in[0], index, evaluation[0][1].reshape(1,attack_in[0].shape[1]), axis=0), np.insert(attack_in[1], index, evaluation[1][1], axis=0)

        else:

            attack_in_plus_one_in = np.vstack((evaluation[0][0].reshape(1,attack_in[0].shape[1]),attack_in[0])), np.hstack(( evaluation[1][0],attack_in[1]))
            attack_in_plus_one_out = np.vstack((evaluation[0][1].reshape(1,attack_in[0].shape[1]),attack_in[0])), np.hstack(( evaluation[1][1],attack_in[1]))

        
        M_cD_in = defender_model_fn(attack_in_plus_one_in[0])
        M_cD_out = defender_model_fn(attack_in_plus_one_out[0])
        
        M_cD_in_predict = M_cD_in.query(attack_in[0])
        M_cD_out_predict = M_cD_out.query(attack_in[0])

        similarity_in = np.mean(np.linalg.norm(M_cD_in_predict[1]-predict[1], axis=1))
        similarity_out = np.mean(np.linalg.norm(M_cD_out_predict[1]-predict[1], axis=1))

        similarities_in.append(similarity_in)
        similarities_out.append(similarity_out)
    
    return similarities_in, similarities_out


# Start experiments:

In [10]:
accuracy_out_all = []

similarities_in_all = []
similarities_out_all = []

n_records = 1600
n_trials = 3

for i in range(n_trials):
    
    
    random_indexes = random.sample(range(0,200000), n_records)
    data_in = x_defender[random_indexes], y_defender[random_indexes]
    data_out = x_reserve[random_indexes], y_reserve[random_indexes]
    defender_model = defender_model_fn(data_in[0])
    
    dist, ind= defender_model.query(data_out[0],k=1)
    
    predict = data_in[1][ind]
    
    acc = accuracy_score(data_out[1], predict)
    
    accuracy_out_all.append(acc)
    
    similarities_in, similarities_out = create_mock_defender_models(defender = defender_model,
                                                                    data_in = data_in,
                                                                    data_out = data_out,
                                                                    n_records = 1600,
                                                                    random_select = random_select,
                                                                    n_extract = n_extract,
                                                                    given_index = given_index)
    
    similarities_in_all.append(similarities_in)
    similarities_out_all.append(similarities_out)

100%|██████████| 100/100 [01:03<00:00,  1.58it/s]
100%|██████████| 100/100 [00:59<00:00,  1.67it/s]
100%|██████████| 100/100 [00:59<00:00,  1.68it/s]


In [11]:
utility_all = (10*np.array(accuracy_out_all)-1)/9

In [12]:
np.mean(utility_all)

0.8488425925925925

In [14]:
# compute the privacy values by comparing all model pairs

privacy_all = []
variance_all = []
sigma_error_all = []

for i in range(len(similarities_in_all)):
    
    similarities_in = similarities_in_all[i]
    similarities_out = similarities_out_all[i]
    
    results = []

    for j in tqdm(range(len(similarities_in))):
        for k in range(len(similarities_out)):

            if similarities_in[j] <= similarities_out[k]:
                results.append(1)
            else:
                results.append(0)

    n = len(results)
    p = 1-np.sum(results)/n

    privacy = min(2*p,1)
    variance = 2*p*(1-p)/n
    sigma_error = 2*np.sqrt(p*(1-p)/n)
    
    privacy_all.append(privacy)
    variance_all.append(variance)
    sigma_error_all.append(sigma_error)
    

100%|██████████| 100/100 [00:00<00:00, 52791.74it/s]
100%|██████████| 100/100 [00:00<00:00, 37681.29it/s]
100%|██████████| 100/100 [00:00<00:00, 58514.29it/s]


In [15]:
print(np.mean(privacy_all))
print(np.mean(variance_all))
print(np.mean(sigma_error_all))

0.9124
4.9616226666666665e-05
0.009961548675457543


In [16]:
np.mean(variance_all)

4.9616226666666665e-05

In [17]:
np.mean(sigma_error_all)

0.009961548675457543