In [1]:
!python -m pip install git+https://github.com/VissaMoutafis/opacus.git@35b88529db696444a87d166ca429232d46495658

In [2]:
!pip install hiplot 
!pip install GPUtil

In [3]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import hiplot as hip

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import torch
from torch import nn
import torch.nn.functional as F
import torch.optim as optim

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, models, datasets
from tqdm.notebook import tqdm

# kaggle set up line
!cp ../input/mia-v2 mia_v2 -r 

from mia_v2.torch2tf import *
from mia_v2.attack_model import *
from mia_v2.label_only import *
from mia_v2.shadow_models import *
from mia_v2.utilities import *
from mia_v2.wrappers import ConfidenceVectorAttack, LabelOnlyAttack

from opacus import PrivacyEngine

from math import log, inf

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

import warnings
warnings.simplefilter("ignore")

In [4]:
ATTACKER_DATASET_SIZE = 10000
SHADOW_MODELS = 10
SHADOW_DATASET_SIZE = 5000
TARGET_DATASET_SIZES = [2500, 5000, 7500]
EPSILON_PER_STEP = [log(50), log(20), log(15)]
MAX_GRAD_NORMS = 1
EPOCHS = 50
SAMPLE_TRAINS = 10
TEST_SET_SIZE = 0.3

prec = []
rec = []
auc = []
model_vuln = [] # model vulnerability metric
model_acc = [] # accuracy of model

In [5]:
def create_target_model():
    target_model = TargetModel()
    criterion = nn.CrossEntropyLoss
    optimizer = {'builder':optim.Adam, 'args':{'lr':0.001}}
    igniter = TfIgniter((3, 32, 32), (32, 32, 3), target_model, device)
    return igniter

def fit_model(igniter, train_images, train_labels, test_images, test_labels, epochs, privacy=None):
    if privacy is not None:
        igniter.fit(train_images, 
                    train_labels, 
                    validation_data=(test_images, test_labels), 
                    verbose=True, 
                    privacy=privacy, 
                    epochs=epochs, 
                    batch_size =16, 
                    es=CustomEarlyStopping(patience=10, min_delta=2e-5), optimizer=optimizer, criterion=criterion)
    else:
        igniter.fit(train_images, 
                    train_labels, 
                    validation_data=(test_images, test_labels), 
                    verbose=True, 
                    epochs=epochs, 
                    batch_size =16, 
                    es=CustomEarlyStopping(patience=10, min_delta=2e-5), optimizer=optimizer, criterion=criterion)

In [6]:
def perform_attack(attack, target_model, train_images, train_labels, test_images, test_labels, configs):
    attack.target_model = target_model
    attack.target_dataset = (train_images, train_labels)
    score_ = attack.evaluate_attack()
    min_d_size = min(len(test_images), len(train_images))
    auc.append({**configs, 'AUC Score' : score_[1]
      })
    rec.append({**configs, 'Recall' : score_[0]['macro avg']['recall']
      })
    prec.append({**configs, 'Precision' : score_[0]['macro avg']['precision']
      })
    model_vuln.append({**configs, 'Model Vulnerability' : evaluate_model_vulnerability(target_model, 
                                                        (train_images[:min_d_size], train_labels[:min_d_size]), 
                                                        (test_images[:min_d_size], test_labels[:min_d_size]), 'tf', batch_size=64)
      })
    
    y_pred = np.argmax(target_model.predict(test_images), axis=1)
    acc = accuracy_score(test_labels, y_pred)
    model_acc.append({**config, 'Classification Accuracy': acc})    
    

In [7]:
(train_images, train_labels), (test_images, test_labels) = datasets.cifar10.load_data()
images = np.concatenate((train_images, test_images))
labels = np.concatenate((train_labels, test_labels)).reshape(-1).astype(np.int64)
images = (images / 255).reshape((-1, 3, 32, 32))

In [8]:
class TargetModel(nn.Module):
    def __init__(self):
      super().__init__()
      self.conv1 = nn.Conv2d(3, 32, 3)
      self.pool = nn.MaxPool2d(2, 2)
      self.conv2 = nn.Conv2d(32, 64, 3)
      self.fc1 = nn.Linear(2304, 256)
      self.fc2 = nn.Linear(256, 10)
      self.double()
      self.to(device)


    def forward(self, x):
      x = self.pool(F.relu(self.conv1(x)))
      x = self.pool(F.relu(self.conv2(x)))
      x = torch.flatten(x, 1) # flatten all dimensions except batch
      x = F.relu(self.fc1(x))
      x = self.fc2(x)
      return x

In [9]:
def f_shadow():
  return TfIgniter((3, 32, 32), (32, 32, 3), TargetModel(), device)

In [10]:
config = {}
# config['D_attacker'] = ATTACKER_DATASET_SIZE
attacker_images, attacker_labels = images[:ATTACKER_DATASET_SIZE], labels[:ATTACKER_DATASET_SIZE]

# config['N_Shadows'] = SHADOW_MODELS
attacker_dataset = (attacker_images.reshape(-1, 32, 32, 3), attacker_labels)
print("Setting up Confidence Vector Attack...")
attack = ConfidenceVectorAttack(None, attacker_dataset, attacker_dataset, 
                                shadow_creator=f_shadow, 
                                attack_model_creator=cifar_10_f_attack_builder,
                                n_shadows=SHADOW_MODELS, 
                                D_shadow_size=SHADOW_DATASET_SIZE, 
                                verbose=False)
es = CustomEarlyStopping(patience=15, min_delta=2e-5)
criterion = nn.CrossEntropyLoss
optimizer = {'builder':optim.Adam, 'args':{'lr':0.003}}
print("Training Confidence Vector Attack...")
attack.perform_attack(shadow={'epochs':EPOCHS, 'batch_size':32, 'es':es, 'optimizer':optimizer, 'criterion':criterion})
print("Done")

In [12]:

for d_target_size in TARGET_DATASET_SIZES:
    config['D_target'] = d_target_size
    config['eps'] = None
    config['max_grad_norm'] = None
    # add the test set in the total dataset size
    d_size = d_target_size//(TEST_SET_SIZE)+1
    assert d_size+ATTACKER_DATASET_SIZE <= len(images) # sanity check 
    # make sure that attacker and target dataset are disjoint
    train_images, train_labels = images[ATTACKER_DATASET_SIZE:int(ATTACKER_DATASET_SIZE+d_size)], labels[ATTACKER_DATASET_SIZE:int(ATTACKER_DATASET_SIZE+d_size)]

    # TRAIN & ATTACK UNDEFENDED MODEL
    train_images, test_images, train_labels, test_labels = train_test_split(train_images, train_labels, test_size=TEST_SET_SIZE, shuffle=True, random_state=0)
    print("Training undefended model...")
    target_model = create_target_model()
    fit_model(target_model, train_images, train_labels, test_images, test_labels, EPOCHS)
    print("Attacking undefended model...")
    perform_attack(attack, target_model, train_images, train_labels, test_images, test_labels, config)
    del target_model
    
    # TRAIN & ATTACK DEFENDED MODELS
    for eps in EPSILON_PER_STEP:
        config['eps'] = eps
        # set up privacy details
        total_epsilon = EPOCHS*eps
        delta = 1/d_target_size
        privacy = {
            'engine':PrivacyEngine(),
            'args':{
                'epochs': EPOCHS,
                'target_epsilon':total_epsilon,
                'target_delta': delta,
                'max_grad_norm':MAX_GRAD_NORMS
            }
        }
        print(f'Attacking ({eps}-{delta})-DP model: {SAMPLE_TRAINS}-attack-attempts...')
        for sample_train_id in range(SAMPLE_TRAINS):
            config['dp_run_id'] = sample_train_id
            target_model = create_target_model()
            for i in range(5):
                config['epochs'] = i*EPOCHS 
                fit_model(target_model, train_images, train_labels, test_images, test_labels, EPOCHS, privacy=privacy)
                perform_attack(attack, target_model, train_images, train_labels, test_images, test_labels, config)
            del target_model

In [None]:
def reduce_stats(stats, y_key):
  _stats = []
  i = 0
  while i < len(stats):
    accum = [stats[i][y_key]]
    config = {key:item for key, item in stats[i].items() if key != y_key}
    while 'dp_run_id' in stats[i]:
      i += 1
      accum.append(stats[i][y_key])
    
    _stats.append({**config, y_key:np.mean(accum)}) 
    i += 1
    return _stats
    

In [None]:
exp = hip.Experiment.from_iterable(reduce_stats(auc))
exp.display_data(hip.Displays.PARALLEL_PLOT).update({'hide': ['uid'],})
exp.display()

In [None]:
exp = hip.Experiment.from_iterable(reduce_stats(model_vuln))
exp.display_data(hip.Displays.PARALLEL_PLOT).update({'hide': ['uid'],})
exp.display()

In [None]:
exp = hip.Experiment.from_iterable(reduce_stats(prec))
exp.display_data(hip.Displays.PARALLEL_PLOT).update({'hide': ['uid'],})
exp.display()

In [None]:
exp = hip.Experiment.from_iterable(reduce_stats(rec))
exp.display_data(hip.Displays.PARALLEL_PLOT).update({'hide': ['uid'],})
exp.display()

In [None]:
exp = hip.Experiment.from_iterable(reduce_stats(model_acc))
exp.display_data(hip.Displays.PARALLEL_PLOT).update({'hide': ['uid'],})
exp.display()