In [None]:
import os
import numpy as np
import matplotlib.pyplot as plt

# Scikit-learn utils
from sklearn.model_selection import train_test_split
from sklearn.datasets import make_classification
from sklearn.metrics import mean_squared_error


# Scikit-learn classifiers
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import MLPClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.gaussian_process.kernels import RBF
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
from xgboost import XGBClassifier

# Tensorflow imports
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, Dropout


# Imports for MIA
from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack import membership_inference_attack as mia
from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.data_structures import AttackInputData
from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.data_structures import SlicingSpec
from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.data_structures import AttackType
import tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.plotting as plotting
from scipy import special

%matplotlib inline

### Dataset loading

Generate classification data using `make_classification`.

Split data according to Salem Adversary 2

In [None]:
n_classes = 2
X, y = make_classification(n_samples=100000, 
                           n_classes=n_classes, 
                           random_state=15)


# First split is data at TRE and Attacker data from the same distribution
X_target, X_shadow, y_target, y_shadow = train_test_split(X, 
                                                          y, 
                                                          test_size=0.50, 
                                                          random_state=15)

# Data at TRE is split into train and test data
X_target_train, X_target_test, y_target_train, y_target_test = train_test_split(X_target, 
                                                                                y_target, 
                                                                                test_size=0.33, 
                                                                                random_state=15)

# X_target will be used to train and test the target model, and as test data for the attack model
# X_shadow will be used to train the attack model

In [None]:
# One-hot class encoding for the tensorflow model
y_target_train_oh = np.eye(n_classes)[y_target_train]
y_target_test_oh = np.eye(n_classes)[y_target_test]
y_shadow_oh = np.eye(n_classes)[y_shadow]

### Scikit learn classifiers plus xgboost

In [None]:
names = [
    "Nearest Neighbors",
    "Linear SVM",
    "RBF SVM",
    "Gaussian Process",
    "Decision Tree",
    "Random Forest",
    "Neural Net",
    "AdaBoost",
    "Naive Bayes",
    "QDA",
    "XGBoost"
]

classifiers = [
    KNeighborsClassifier(3),
    SVC(kernel="linear", probability=True, C=0.025),
    SVC(gamma=2, probability=True, C=1),
    GaussianProcessClassifier(1.0 * RBF(1.0)),
    DecisionTreeClassifier(max_depth=5),
    RandomForestClassifier(max_depth=5, n_estimators=10, max_features=1),
    MLPClassifier(alpha=1, max_iter=1000),
    AdaBoostClassifier(),
    GaussianNB(),
    QuadraticDiscriminantAnalysis(),
    XGBClassifier()
]

### Small tensorflow model

In [None]:
# Tensorflow model (MLP)
input_data = Input(shape = X_target_train[0].shape)
x = Dense(40, activation='relu')(input_data)
x = Dense(40, activation='relu')(x)
output = Dense(2)(x)

tf_clf = Model(input_data, output)

loss = tf.keras.losses.CategoricalCrossentropy(from_logits=True)

tf_clf.compile(optimizer='adam', loss=loss, metrics=['accuracy'])

### MIA Function definition for sklearn and tensorflow models using TF-Privacy
**TODO:** ***Export these functions to separate python files, with docstrings and typing.***

In [None]:
def membership_inference_attack_sklearn(model, X_train, X_test, y_train, y_test):

    print('Compute prediction probabilities...')
    prob_train = model.predict_proba(X_train)
    prob_test = model.predict_proba(X_test)

    print('Compute losses...')
    cce = tf.keras.backend.categorical_crossentropy
    constant = tf.keras.backend.constant

    # This might be a bit of a stretch: using categorical crossentropy for all classifiers
    loss_train = cce(constant(np.eye(2)[y_train]), constant(prob_train), from_logits=False).numpy()
    loss_test = cce(constant(np.eye(2)[y_test]), constant(prob_test), from_logits=False).numpy()

    input = AttackInputData(
      probs_train = prob_train,
      probs_test = prob_test,
      loss_train = loss_train,
      loss_test = loss_test,
      labels_train = y_train,
      labels_test = y_test
    )

    # Run several attacks for different data slices
    attacks_result = mia.run_attacks(input,
                                     SlicingSpec(
                                         entire_dataset = True,
                                         by_class = True,
                                         by_classification_correctness = True
                                     ),
                                     attack_types = [
                                         AttackType.THRESHOLD_ATTACK,
                                         AttackType.LOGISTIC_REGRESSION,
                                         AttackType.MULTI_LAYERED_PERCEPTRON,
                                         AttackType.RANDOM_FOREST, 
                                         AttackType.K_NEAREST_NEIGHBORS,
                                         #AttackType.THRESHOLD_ENTROPY_ATTACK
                                     ])

    # Plot the ROC curve of the best classifier
    fig = plotting.plot_roc_curve(attacks_result.get_result_with_max_auc().roc_curve)
    plt.show()
    # Print a user-friendly summary of the attacks
    print(attacks_result.summary(by_slices = True))
    return attacks_result.get_result_with_max_auc().get_auc(), attacks_result.get_result_with_max_attacker_advantage().get_attacker_advantage()


def membership_inference_attack_tensorflow(model, X_train, X_test, y_train, y_test):
    print('Compute logits...')
    logits_train = model.predict(X_train, batch_size=32)
    logits_test = model.predict(X_test, batch_size=32)

    print('Apply softmax to get probabilities from logits...')
    prob_train = special.softmax(logits_train, axis=1)
    prob_test = special.softmax(logits_test, axis=1)

    print('Compute losses...')
    cce = tf.keras.backend.categorical_crossentropy
    constant = tf.keras.backend.constant

    loss_train = cce(constant(y_train), constant(prob_train), from_logits=False).numpy()
    loss_test = cce(constant(y_test), constant(prob_test), from_logits=False).numpy()
    
    labels_train = np.argmax(y_train, axis=1)
    labels_test = np.argmax(y_test, axis=1)

    input = AttackInputData(
      logits_train = logits_train,
      logits_test = logits_test,
      loss_train = loss_train,
      loss_test = loss_test,
      labels_train = labels_train,
      labels_test = labels_test
    )

    # Run several attacks for different data slices
    attacks_result = mia.run_attacks(input,
                                     SlicingSpec(
                                         entire_dataset = True,
                                         by_class = True,
                                         by_classification_correctness = True
                                     ),
                                     attack_types = [
                                         AttackType.THRESHOLD_ATTACK,
                                         AttackType.LOGISTIC_REGRESSION,
                                         AttackType.MULTI_LAYERED_PERCEPTRON,
                                         AttackType.RANDOM_FOREST, 
                                         AttackType.K_NEAREST_NEIGHBORS,
                                         AttackType.THRESHOLD_ENTROPY_ATTACK
                                     ])

    # Plot the ROC curve of the best classifier
    fig = plotting.plot_roc_curve(attacks_result.get_result_with_max_auc().roc_curve)
    plt.show()
    # Print a user-friendly summary of the attacks
    print(attacks_result.summary(by_slices = True))
    return attacks_result.get_result_with_max_auc().get_auc(), attacks_result.get_result_with_max_attacker_advantage().get_attacker_advantage()

### Run experiments on sklearn classifiers and xgboost

In [None]:
for name, clf in zip(names, classifiers):
    print(f'Training classifier {name}...')
    # Train and test the target model with X_target and y_target
    clf.fit(X_target_train, y_target_train)   
    score = clf.score(X_target_test, y_target_test)
    print(f'{name} obtained an accuracy of {score}')
    
    # Attack the model using the shadow dataset, and test using the target data
    auc, adv = membership_inference_attack_sklearn(model=clf, 
                                                   X_train=X_shadow, 
                                                   X_test=X_target, 
                                                   y_train=y_shadow, 
                                                   y_test=y_target)
    print(f'Max AUC for MIA is {auc}. Max attacker advantage is {adv}')
    print('='*30)

### Run experiments on TensorFlow model

In [None]:
print(f'Training TensorFlow neural network...')
# Train and test the target model with X_target and y_target
tf_clf.fit(X_target_train, 
           y_target_train_oh, 
           validation_data=(X_target_test, y_target_test_oh),
           epochs=10, 
           batch_size=32
          )  
# Attack the model using the shadow dataset, and test using the target data
auc, adv = membership_inference_attack_tensorflow(model=tf_clf, 
                                                  X_train=X_shadow, 
                                                  X_test=X_target, 
                                                  y_train=y_shadow_oh,
                                                  y_test=y_target_oh)
print(f'Max AUC for MIA is {auc}. Max attacker advantage is {adv}')
print('='*30)