In [1]:
# import packages

import numpy as np
import tensorflow as tf

from absl import app
from absl import flags
from absl import logging

from tensorflow_privacy.privacy.analysis.rdp_accountant import compute_rdp
from tensorflow_privacy.privacy.analysis.rdp_accountant import get_privacy_spent
from tensorflow_privacy.privacy.optimizers.dp_optimizer_keras import DPKerasSGDOptimizer



# DP-SGD



Stochastic Gradient Descent: optimizer for neural network 
- iterative procedure where at each iteration, batch of data randomly sampled from training set
- error b/w model's prediction and training labels computed (error called loss) and this loss is then differentiated with respect to model's parameters
- the derivatives (graidents) tells us how to update each parameter to bring model closer to predicting correct label
- iterativcely recomputing gradients and applying them to update model's parameter is called descent
- algorithm works by... 
1) sampling minibatch of training points (x, y) with x as input and y as label
2) compute loss (error) by L(theta, x, y) b/w model's prediction f_theta(x) and label y where theta represents model parameters
3) compute gradient of loss L(theta, x, y) with respect to model parameters theta
4) finally multiply gradients by learning rate and apply product to update model parameters theta.

Making Stochastic Gradient Descent Differentially Private
- make 2 modifications to make DP-SGD, first, sensitivity of each gradient needs to be bounded (limit how much each individual training point sampled in minibatch can influence resulting gradeint computation) by clipping each gradient between steps 3 and 4. Second, randomize algorithm's behavior to make it statistically impossible to know if a particular point was included by comparing updates stochastic gradient descent applies when it operates with or without a certain point and is done by sampling random noise and adding it to clipped gradients

Importance of TF Privacy
- TF Privacy provides code to wrap existing TF optimizer to create variant that performs steps needed to make differentially private SGD
- Code...
- to compute loss between model's prediction and labels -> vector_loss = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=labels, logits=logits) -> we are using cross-entropy as common loss because it is well-suited for the classification problem (MNIST)
- to create an optimizer in TF, instantiate it by passing learning rate value with optimizers.dp_optimizer module of TF privacy to implement DF -> optimizer = optimizers.dp_optimizer.DPGradientDescentGaussianOptimizer(
    l2_norm_clip=FLAGS.l2_norm_clip,
    noise_multiplier=FLAGS.noise_multiplier,
    num_microbatches=FLAGS.microbatches,
    learning_rate=FLAGS.learning_rate,
    population_size=60000) 
    train_op = optimizer.minimize(loss=vector_loss)
    
- note: TF Privacy introduces 3 new hyperparameters to optimizer object including l2_norm clip (maximum Euclidean norm of each individual gradient to bound optimizer's sensitivity to individual training points), noise_multipler/Sigma (control how much noise is sampled and added to gradients before applied by optimizer and more noise results in better privacy at expense of lower utility) and num_microbatches (clipping by microbatches to allow for parallelism)


In [2]:
# define flags

flags.DEFINE_boolean(
    'dpsgd', True, 'If True, train with DP-SGD. If False, '
    'train with vanilla SGD.')
flags.DEFINE_float('learning_rate', 0.15, 'Learning rate for training')
flags.DEFINE_float('noise_multiplier', 0.1,
                   'Ratio of the standard deviation to the clipping norm')
flags.DEFINE_float('l2_norm_clip', 1.0, 'Clipping norm')
flags.DEFINE_integer('batch_size', 250, 'Batch size')
flags.DEFINE_integer('epochs', 60, 'Number of epochs')
flags.DEFINE_integer(
    'microbatches', 250, 'Number of microbatches '
    '(must evenly divide batch_size)')
flags.DEFINE_string('model_dir', None, 'Model directory')

FLAGS = flags.FLAGS


AttributeError: module 'tensorflow' has no attribute 'app'

In [3]:
def compute_epsilon(steps):

    if FLAGS.noise_multiplier == 0.0:
        return float('inf')
    orders = [1 + x / 10. for x in range(1, 100)] + list(range(12, 64))
    sampling_probability = FLAGS.batch_size / 60000
    rdp = compute_rdp(
      q=sampling_probability,
      noise_multiplier=FLAGS.noise_multiplier,
      steps=steps,
      orders=orders)
    # Delta is set to 1e-5 because MNIST has 60000 training points.
    return get_privacy_spent(orders, rdp, target_delta=1e-5)[0]


def load_mnist():
  
    train, test = tf.keras.datasets.mnist.load_data()
    train_data, train_labels = train
    test_data, test_labels = test

    train_data = np.array(train_data, dtype=np.float32) / 255
    test_data = np.array(test_data, dtype=np.float32) / 255

    train_data = train_data.reshape((train_data.shape[0], 28, 28, 1))
    test_data = test_data.reshape((test_data.shape[0], 28, 28, 1))

    train_labels = np.array(train_labels, dtype=np.int32)
    test_labels = np.array(test_labels, dtype=np.int32)

    train_labels = tf.keras.utils.to_categorical(train_labels, num_classes=10)
    test_labels = tf.keras.utils.to_categorical(test_labels, num_classes=10)

    assert train_data.min() == 0.
    assert train_data.max() == 1.
    assert test_data.min() == 0.
    assert test_data.max() == 1.

    return train_data, train_labels, test_data, test_labels

    
    
logging.set_verbosity(logging.INFO)
if FLAGS.dpsgd and FLAGS.batch_size % FLAGS.microbatches != 0:
    raise ValueError('Number of microbatches should divide evenly batch_size')

# Load training and test data.
train_data, train_labels, test_data, test_labels = load_mnist()

# Define a sequential Keras model
model = tf.keras.Sequential([
  tf.keras.layers.Conv2D(
      16,
      8,
      strides=2,
      padding='same',
      activation='relu',
      input_shape=(28, 28, 1)),
  tf.keras.layers.MaxPool2D(2, 1),
  tf.keras.layers.Conv2D(
      32, 4, strides=2, padding='valid', activation='relu'),
  tf.keras.layers.MaxPool2D(2, 1),
  tf.keras.layers.Flatten(),
  tf.keras.layers.Dense(32, activation='relu'),
  tf.keras.layers.Dense(10)
])

if FLAGS.dpsgd:
    optimizer = DPKerasSGDOptimizer(
        l2_norm_clip=FLAGS.l2_norm_clip,
        noise_multiplier=FLAGS.noise_multiplier,
        num_microbatches=FLAGS.microbatches,
        learning_rate=FLAGS.learning_rate)
    # Compute vector of per-example loss rather than its mean over a minibatch.
    loss = tf.keras.losses.CategoricalCrossentropy(
        from_logits=True, reduction=tf.losses.Reduction.NONE)
else:
    optimizer = tf.keras.optimizers.SGD(learning_rate=FLAGS.learning_rate)
    loss = tf.keras.losses.CategoricalCrossentropy(from_logits=True)

# Compile model with Keras
model.compile(optimizer=optimizer, loss=loss, metrics=['accuracy'])

# Train model with Keras
model.fit(
  train_data,
  train_labels,
  epochs=FLAGS.epochs,
  validation_data=(test_data, test_labels),
  batch_size=FLAGS.batch_size)

# Compute the privacy budget expended.
if FLAGS.dpsgd:
    eps = compute_epsilon(FLAGS.epochs * 60000 // FLAGS.batch_size)
    print('For delta=1e-5, the current epsilon is: %.2f' % eps)
else:
    print('Trained with vanilla non-private SGD optimizer')




UnparsedFlagAccessError: Trying to access flag --dpsgd before flags were parsed.