In [114]:
import numpy as np
import matplotlib.pyplot as plt

import math
import tensorflow as tf
from tensorflow.keras import datasets, layers, models
from sklearn.model_selection import train_test_split
from tensorflow import keras
from tqdm import tqdm
import sys
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))


Num GPUs Available:  1


## Basic Models

Create basic model functions
- __Target model__: A small and simple CNN
- __N Shadow Models__: Same architecture as the target model
- __Attack Models__ : One for every class

In [131]:
def f_target(X_train, y_train, X_test=None, y_test=None, epochs=100):
  """
  Returns a trained target model, if test data are specified we will evaluate the model and print its accuracy
  """
  model = models.Sequential()
  model.add(layers.Conv2D(64, (3, 3), activation='tanh', input_shape=(32, 32, 3)))
  model.add(layers.MaxPooling2D((2, 2)))
  model.add(layers.Conv2D(64, (3, 3), activation='tanh'))
  model.add(layers.MaxPooling2D((2, 2)))

  model.add(layers.Flatten())
  model.add(layers.Dense(128, activation='tanh'))
  model.add(layers.Dropout(0.25))
  model.add(layers.Dense(10)   )
  
  optimizer = keras.optimizers.Adam(learning_rate=0.001)
  model.compile(optimizer=optimizer,
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])
  if X_test is None or y_test is None:
    history = model.fit(X_train, y_train, epochs=epochs, 
                    validation_split=0.2)
  else:
    history = model.fit(X_train, y_train, epochs=epochs, 
                    validation_data=(X_test, y_test))
  return model
  
def f_shadow(X_train, y_train, X_test=None, y_test=None, epochs=20):
  # simply return the target model architecture
  return f_target(X_train, y_train, X_test, y_test, epochs)

def __f_attack(X_train, y_train, X_test, y_test):
  print(X_train.shape, X_test.shape)
  model = models.Sequential()
  model.add(layers.Dense(10, activation='relu', input_shape=(X_train.shape[1], )))
  model.add(layers.Dropout(0.1))
  model.add(layers.Dense(100, activation='relu'))
  model.add(layers.Dropout(0.2))
  model.add(layers.Dense(100, activation='relu'))
  model.add(layers.Dense(10, activation='softmax'))
  model.add(layers.Dense(2))
  
  optimizer = keras.optimizers.Adam(learning_rate=0.001)
  model.compile(optimizer='adam',
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])
  history = model.fit(X_train, y_train, epochs=80, 
                    validation_data=(X_test, y_test), verbose=True)
  
  return model

def f_attack(X, y):
  # X_i = (class, probability vector, )
  classes = np.unique(train_labels) # all class labels
  with tf.device('/gpu:0'):
  # split to train and test datasets
    X_train, X_test, y_train, y_test = train_test_split(X, y, shuffle=True, test_size=0.25)

  # attack model architecture
  attack_model_per_class = []
  for c in classes:
    # find same class data
    class_instances_train = X_train[:, 0] == c
    class_instances_test = X_test[:, 0] == c

    # get the class dataset
    with tf.device('/gpu:0'):
      D_train = X_train[class_instances_train, 1:]
      labels_train = y_train[class_instances_train]
      D_test = X_test[class_instances_test, 1:] 
      labels_test = y_test[class_instances_test]

    #  create attack model for current class
    with tf.device('/gpu:0'):
      attack_model = __f_attack(D_train, labels_train, D_test, labels_test)

    # insert model in attack models list
    attack_model_per_class.append(attack_model)
  
  return attack_model_per_class


In [116]:
with tf.device('/gpu:0'):
  (train_images, train_labels), (test_images, test_labels) = datasets.cifar10.load_data()
  train_images = train_images[:2000] # as the paper attack train wiht only 200 records
  train_labels = train_labels[:2000]
  # use the rest as testing - 'out' records
  test_images = np.concatenate((train_images[2000:], test_images))
  test_labels = np.concatenate((train_labels[2000:], test_labels))

In [141]:
with tf.device('/gpu:0'):
  target_model = f_target(train_images, train_labels, epochs=50) 

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


In [118]:
# return prediction vector
def predict(model, X_i):
  prob_layer = layers.Softmax()
  return prob_layer(model(X_i.reshape((1, 32, 32, 3)))).numpy()[0]

def rand_record(X=None, k=1):
  if X is None:
    # create a whole new record
    X = np.random.randint(0, 255+1, size=32*32*3).reshape((32, 32, 3))
  else:
    X = X.reshape((32*32*3))
    # change k random features
    k_features = np.random.choice(range(X.shape[0]), size=k, replace=False)

    for i in k_features:
      X[i] += np.random.randint(-X[i], 256-X[i]) # subtract/add a number to change the feature 
  
  return X.reshape((32,32,3))

def synthesize(c, target_model, k_min, k_max, conf_min, iter_max, rej_max):
  X = rand_record()
  y_conf_star = 0.0
  j = 0
  k = k_max
  X_star = None

  for iter in range(iter_max):
    y = predict(target_model, X);
    if y[c] >= y_conf_star:
      if y[c] > conf_min and c == np.argmax(y):
        # sample to decide if we return the data
        if np.random.randint(0, 2):
          return X
      y_conf_star = y[c]
      j = 0
      X_star = X
    else:
      # reject and resample X
      j += 1

      if j > rej_max:
        k = max(k_min, math.ceil(k/2))
        j = 0
    
    X = rand_record(X_star, k)
  
  return X_star # failed. return the last successfull record

In [133]:

def divide_dataset(n_shadows, shadow_dataset_size, X, y):
  D_shadows = []
  rng = default_rng()
  for i in range(n_shadows):
    sample_i = np.random.choice(range(X.shape[0]), shadow_dataset_size, replace=False)
    assert np.unique(sample_i).shape[0] == shadow_dataset_size # sanity check
    D_shadows.append((X[sample_i, :], y[sample_i, :]))
  return D_shadows

# returns a list of 'n_shadows' datasets
def generate_shadow_dataset(target_model, n_shadows, shadow_dataset_size, n_classes, X_test=None, y_test=None):
  # in case we give test data we will just divide those to train the shadow models
  if X_test is not None and y_test is not None:
    return divide_dataset(n_shadows, shadow_dataset_size, X_test, y_test)
  
  
  # helper function to return a datapoint (for sure)
  def get_shadow_datapoint(c):
    X_i = None
    with tf.device('/gpu:0'):
      while X_i is None:
        X_i = synthesize(c, target_model, 1, 32*32*3, 0.65, 100, 5)
    return X_i
  
  D_shadows = []
  for i in range(n_shadows):
    print(f"Generating D_shadow_{i}")
    # uniformly generate X's for all of the classes (y's)
    X_shadow = np.asarray(
        [get_shadow_datapoint(i%n_classes) for i in range(shadow_dataset_size)]
    )
    y_shadow = np.asarray(
        [(i%n_classes) for i in range(shadow_dataset_size)]
    ).reshape((-1, 1))

    D_shadows.append((X_shadow, y_shadow))
  
  return D_shadows

def create_shadows(D_shadows):
  shadow_models = [] # shadow model list

  for D_shadow in D_shadows:
    # sample data to feed/evaluate the model
    X_shadow, y_shadow = D_shadow
    shadow_X_train, shadow_X_test, shadow_y_train, shadow_y_test = train_test_split(X_shadow, y_shadow, shuffle=True, test_size=0.2)

    # generate the shadow model
    shadow_model = f_shadow(shadow_X_train, shadow_y_train, shadow_X_test, shadow_y_test, 50)

    D_shadow = (shadow_X_train, shadow_y_train), (shadow_X_test, shadow_y_test)
    shadow_models.append((shadow_model, D_shadow))

  return shadow_models # return a list where every item is (model, acc), train-data, test-data

In [120]:
# generate shadow datasets
D_shadows = generate_shadow_dataset(target_model, 20, 3000, 10, test_images, test_labels)

In [121]:
# train the shadow models
shadow_models = create_shadows(D_shadows)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/5

In [134]:
# helper function to prepare each shadow dataset batch
def prepare_batch(shadow_model, X, y, in_D):
  #decide membership
  y_member = np.ones(shape=(y.shape[0], 1)) if in_D else np.zeros(shape=(y.shape[0], 1))
  
  # get probability vector
  prob_layer = layers.Softmax() # probability layer implementing softmax for mapping NN results to probabilities in [0, 1]
  prob_vec = prob_layer(shadow_model(X)).numpy()
  
  # return an instance <actual class, prob_vec from shadow model, 'in'/'out' D_target membership> 
  return np.concatenate((y.reshape(-1, 1), prob_vec, y_member), axis=1)

def generate_attack_dataset(shadow_models, n_classes):
  # input is a list where items are model, (X_train, y_train), (X_test, y_test)

  D_attack = None
  # D_attack_i format = <class, prob_vec, membership label (1 or 0)> 
  for shadow_model, ((X_train, y_train), (X_test, y_test)) in shadow_models:
    batch = np.concatenate((
        prepare_batch(shadow_model, X_train, y_train, True), # members of shadow dataset 
        prepare_batch(shadow_model, X_test, y_test, False)   # non members of shadow dataset
    ))   

    D_attack = np.concatenate((D_attack, batch)) if D_attack is not None else batch  

  return D_attack 

In [142]:
D_attack = generate_attack_dataset(shadow_models, 10)

In [143]:
attack_model_bundle = f_attack(D_attack[:, :-1], D_attack[:, -1])

(4525, 10) (1546, 10)
Epoch 1/80
Epoch 2/80
Epoch 3/80
Epoch 4/80
Epoch 5/80
Epoch 6/80
Epoch 7/80
Epoch 8/80
Epoch 9/80
Epoch 10/80
Epoch 11/80
Epoch 12/80
Epoch 13/80
Epoch 14/80
Epoch 15/80
Epoch 16/80
Epoch 17/80
Epoch 18/80
Epoch 19/80
Epoch 20/80
Epoch 21/80
Epoch 22/80
Epoch 23/80
Epoch 24/80
Epoch 25/80
Epoch 26/80
Epoch 27/80
Epoch 28/80
Epoch 29/80
Epoch 30/80
Epoch 31/80
Epoch 32/80
Epoch 33/80
Epoch 34/80
Epoch 35/80
Epoch 36/80
Epoch 37/80
Epoch 38/80
Epoch 39/80
Epoch 40/80
Epoch 41/80
Epoch 42/80
Epoch 43/80
Epoch 44/80
Epoch 45/80
Epoch 46/80
Epoch 47/80
Epoch 48/80
Epoch 49/80
Epoch 50/80
Epoch 51/80
Epoch 52/80
Epoch 53/80
Epoch 54/80
Epoch 55/80
Epoch 56/80
Epoch 57/80
Epoch 58/80
Epoch 59/80
Epoch 60/80
Epoch 61/80
Epoch 62/80
Epoch 63/80
Epoch 64/80
Epoch 65/80
Epoch 66/80
Epoch 67/80
Epoch 68/80
Epoch 69/80
Epoch 70/80
Epoch 71/80
Epoch 72/80
Epoch 73/80
Epoch 74/80
Epoch 75/80
Epoch 76/80
Epoch 77/80
Epoch 78/80
Epoch 79/80
Epoch 80/80
(4476, 10) (1514, 10)
Epoch

In [144]:
def evaluate_attack(attack_model_bundle, X_attack, y_attack, n_classes):
  acc_per_class = []
  for c in range(n_classes):
    attack_model = attack_model_bundle[c]
    class_instances = X_attack[:, 0] == c # get same class samples
    test_loss, test_acc = attack_model.evaluate(X_attack[class_instances, 1:], y_attack[class_instances], verbose=0)
    acc_per_class.append(test_acc)
    print(f"class-{c+1}: {test_acc}")
  return acc_per_class



In [145]:
# create a test dataset 

D_out = prepare_batch(target_model, test_images, test_labels, False)
D_in = prepare_batch(target_model, train_images, train_labels, True)
print("Testing with 'in' data only:")
res_in = evaluate_attack(attack_model_bundle, D_in[:, :-1], D_in[:, -1], 10)

print("\nTesting with 'out' data only:")
res_out = evaluate_attack(attack_model_bundle, D_out[:2000, :-1], D_out[:2000, -1], 10)

print("\nTesting with all prev data: ")
res_all = evaluate_attack(attack_model_bundle, np.concatenate((D_out[:2000, :-1], D_in[:, :-1])), np.concatenate((D_out[:2000, -1], D_in[:, -1])), 10)


Testing with 'in' data only:
class-1: 0.9356435537338257
class-2: 0.963350772857666
class-3: 0.8965517282485962
class-4: 0.8769230842590332
class-5: 0.9065420627593994
class-6: 0.8907103538513184
class-7: 0.9613526463508606
class-8: 0.9346733689308167
class-9: 0.9162561297416687
class-10: 0.9162561297416687

Testing with 'out' data only:
class-1: 0.43877550959587097
class-2: 0.3787878751754761
class-3: 0.5487179756164551
class-4: 0.6130653023719788
class-5: 0.5353535413742065
class-6: 0.5081080794334412
class-7: 0.31481480598449707
class-8: 0.4818652868270874
class-9: 0.2626728117465973
class-10: 0.4482758641242981

Testing with all prev data: 
class-1: 0.6909547448158264
class-2: 0.6658097505569458
class-3: 0.7261306643486023
class-4: 0.7436548471450806
class-5: 0.7281553149223328
class-6: 0.698369562625885
class-7: 0.631205677986145
class-8: 0.7117347121238708
class-9: 0.5785714387893677
class-10: 0.6822659969329834
