# Data Privacy Final Project
### Sam Clark & Josh Childs

For this project, we've decided to compare the accuracy of several normal Convolutional Neural Networks to their counter parts that will use differential privacy. We will be using the MNIST dataset with the tensflow library.  

In [1]:
import tensorflow.compat.v2 as tf
import tensorflow_datasets as tfds
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from ipywidgets import IntProgress
from sklearn.metrics import classification_report
import numpy as np
import pickle
tf.enable_v2_behavior()

In [2]:
from tensorflow.compat.v1.distributions import Laplace

## Load MNIST Data

In [3]:
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()

## Preprocess Data

In [4]:
x_train = x_train.reshape(x_train.shape[0], 28, 28, 1)
x_test = x_test.reshape(x_test.shape[0], 28, 28, 1)
input_shape = (28, 28, 1)
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train /= 255
x_test /= 255

## Build Model

In [5]:
model = Sequential([
  Conv2D(28, kernel_size=(3,3), input_shape=input_shape),
  MaxPooling2D(pool_size=(2, 2)),
  Flatten(),
  Dense(128, activation=tf.nn.relu),  
  Dropout(0.3),
  Dense(10,activation=tf.nn.softmax)
])
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 26, 26, 28)        280       
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 13, 13, 28)        0         
_________________________________________________________________
flatten (Flatten)            (None, 4732)              0         
_________________________________________________________________
dense (Dense)                (None, 128)               605824    
_________________________________________________________________
dropout (Dropout)            (None, 128)               0         
_________________________________________________________________
dense_1 (Dense)              (None, 10)                1290      
Total params: 607,394
Trainable params: 607,394
Non-trainable params: 0
__________________________________________________

## Train & Save

In [6]:
# callback = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=3)

# model.compile(optimizer='sgd',
#               loss='sparse_categorical_crossentropy', 
#               metrics=['accuracy'])

# model.fit(x=x_train,y=y_train, epochs=1, callbacks = [callback])
# model.save("models/original")

## Evaluate Models

In [7]:
# model.evaluate(x_test, y_test)

In [8]:
len(x_train)

60000

## Differential Privacy Optimizer Implementation

In [9]:
def tf_l2_clip(v, b):
    norm = tf.norm(v, ord=2)
    return tf.cond(norm > b, lambda: b * (v / norm), lambda: v)

def laplace_mech(v, sensitivity, epsilon):
    return v + np.random.laplace(loc=0, scale=sensitivity / epsilon)

def tf_laplace_mech(v, sensitivity, epsilon):
    return tf.numpy_function(laplace_mech, [v, sensitivity, epsilon], tf.float32)

def tf_gaussian_mech(v, sensitivity, epsilon, delta):
    return v + tf.random.normal(v.shape, mean=0.0, stddev=sensitivity * np.sqrt(2*np.log(1.25/delta)) / epsilon)

def tf_gaussian_mech_RDP(v, sensitivity, alpha, epsilon):
    sigma = np.sqrt((sensitivity**2 * alpha) / (2 * epsilon))
    return v + tf.random.normal(v.shape, mean=0.0, stddev=sigma)

def tf_gaussian_mech_zCDP(v, sensitivity, rho):
    sigma = np.sqrt((sensitivity**2) / (2 * rho))
    return v + tf.random.normal(v.shape, mean=0.0, stddev=sigma)

In [10]:
class DPOptimizer(tf.keras.optimizers.Optimizer):
    def __init__(self, epochs, b=3.0, learning_rate=0.01, name="DPOptimizer", **kwargs):
        super().__init__(name, **kwargs)
        self._set_hyper("learning_rate", learning_rate)
        self.epochs = epochs
        self.b = b
    
    def _create_slots(self, var_list):
        pass

    def get_config(self):
        base_config = super().get_config()
        return {
            **base_config,
            "learning_rate": self._serialize_hyperparameter("learning_rate"),
        }

    
class EpsilonDeltaDPGradientDescent(DPOptimizer):
    def __init__(self, epochs, epsilon, delta, b=3.0, learning_rate=0.01, name="EpsilonDeltaDPGradientDescent", **kwargs):
        DPOptimizer.__init__(self, epochs, b=b, learning_rate=learning_rate, name=name, **kwargs)        
        self.epsilon = epsilon
        self.delta = delta

    @tf.function
    def _resource_apply_dense(self, grad, var):
        var_dtype = var.dtype.base_dtype
        lr_t = self._decayed_lr(var_dtype)
        
        epsilon_i = self.epsilon / self.epochs
        delta_i = self.delta / self.epochs
        
        #clipped_grad = tf.math.reduce_mean(tf_l2_clip(grad, self.b), axis=0, keepdims=True)
        #clipped_grad = tf.numpy_function(lambda x: np.mean(x, axis=0), [tf_l2_clip(grad, self.b)], tf.float32)
        #clipped_grad = tf.math.reduce_mean(tf.clip_by_norm(grad, self.b), axis=0)
        clipped_grad = tf_l2_clip(grad, self.b)
        new_var_m = var - tf_gaussian_mech(clipped_grad, self.b/len(x_train), epsilon_i, delta_i) * lr_t
        #new_var_m = var - grad * lr_t
        
        new_var = new_var_m
        var.assign(new_var)

    
class RenyiDPGradientDescent(DPOptimizer):
    def __init__(self, epochs, alpha, epsilon_bar, b=3.0, learning_rate=0.01, name="RenyiDPGradientDescent", **kwargs):
        super().__init__(epochs,b=b, learning_rate=learning_rate, name=name, **kwargs)
        self._set_hyper("learning_rate", learning_rate)
        
        self.epsilon_bar = epsilon_bar
        self.alpha = alpha
        
    @tf.function
    def _resource_apply_dense(self, grad, var):
        var_dtype = var.dtype.base_dtype
        lr_t = self._decayed_lr(var_dtype)
        
        epsilon_bar_i = self.epsilon_bar / self.epochs
         
        clipped_grad = tf_l2_clip(grad, self.b)
        new_var_m = var - tf_gaussian_mech_RDP(clipped_grad, self.b/len(x_train), self.alpha, epsilon_bar_i) * lr_t
        
        new_var = new_var_m
        var.assign(new_var)

        
class ZeroConcentratedDPGradientDescent(DPOptimizer):
    def __init__(self, epochs, rho, b=3.0, learning_rate=0.01, name="ZeroConcentratedDPGradientDescent", **kwargs):
        super().__init__(epochs,b=b, learning_rate=learning_rate, name=name, **kwargs)
        self._set_hyper("learning_rate", learning_rate)
        
        self.rho = rho
        
    @tf.function
    def _resource_apply_dense(self, grad, var):
        var_dtype = var.dtype.base_dtype
        lr_t = self._decayed_lr(var_dtype)
        
        rho_i = self.rho / self.epochs
         
        clipped_grad = tf_l2_clip(grad, self.b)
        new_var_m = var - tf_gaussian_mech_zCDP(clipped_grad, self.b/len(x_train), rho_i) * lr_t
        
        new_var = new_var_m
        var.assign(new_var)
        
class PureDPGradientDescent(DPOptimizer):
    def __init__(self, epochs, epsilon, b=3.0, learning_rate=0.01, name="PureDPGradientDescent", **kwargs):
        super().__init__(epochs,b=b, learning_rate=learning_rate, name=name, **kwargs)
        self._set_hyper("learning_rate", learning_rate)
        
        self.epsilon = epsilon
        
    @tf.function
    def _resource_apply_dense(self, grad, var):
        var_dtype = var.dtype.base_dtype
        lr_t = self._decayed_lr(var_dtype)
        
        epsilon_i = self.epsilon / self.epochs
         
        clipped_grad = tf_l2_clip(grad, self.b)
        new_var_m = var - tf_laplace_mech(clipped_grad, self.b/len(x_train), epsilon_i) * lr_t
        
        new_var = new_var_m
        var.assign(new_var)

In [11]:
# es = callback = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=3)
# ed_dp = EpsilonDeltaDPGradientDescent(epochs=3, epsilon=0.1, delta=1e-5)
# r_dp = RenyiDPGradientDescent(epochs=1, alpha=500, epsilon_bar=0.001)
# zc_dp = ZeroConcentratedDPGradientDescent(epochs=5, rho=0.000001)
# pure_dp = PureDPGradientDescent(epochs=1, epsilon=.001)

# # we need to create new layers, otherwise scuffed
# model = Sequential([
#   Conv2D(28, kernel_size=(3,3), input_shape=input_shape),
#   MaxPooling2D(pool_size=(2, 2)),
#   Flatten(),
#   Dense(128, activation=tf.nn.relu),  
#   Dropout(0.3),
#   Dense(10,activation=tf.nn.softmax)
# ])

# model.compile(optimizer=pure_dp, 
#               loss='sparse_categorical_crossentropy', 
#               metrics=['accuracy'])

# model.fit(x=x_train,y=y_train, epochs=1, callbacks=[es], batch_size=64)


## Effect of Noise on Training 

In [18]:
rhos = np.linspace(0.0000001, 0.0001, 100)

epsilons = []
for rho in rhos:
    epsilon = rho + 2 * np.sqrt(rho * np.log(1 / 1e-5))
    epsilons.append(epsilon)

epsilon_bars = []
for epsilon in epsilons:
    epsilon_bar = epsilon - np.log(1 / 1e-5) / (5 - 1)
    epsilon_bars.append(epsilon_bar)

ep_de_opts = [EpsilonDeltaDPGradientDescent(epochs=10, epsilon=e, delta=1e-5) for e in epsilons]
renyi_opts = [RenyiDPGradientDescent(epochs=10, alpha=5, epsilon_bar=e_b) for e_b in epsilon_bars]
zeroc_opts = [ZeroConcentratedDPGradientDescent(epochs=10, rho=r) for r in rhos]
pured_opts = [PureDPGradientDescent(epochs=10, epsilon=e) for e in epsilons]

optimizer_data = {
    "EpsilonDelta": {
        "batches" : ep_de_opts,
        "accuracy": []
    },
    "Renyi": {
        "batches" : renyi_opts,
        "accuracy": []
    },
    "ZeroConc": {
        "batches" : zeroc_opts,
        "accuracy": []
    },
    "Pure": {
        "batches": pured_opts,
        "accuracy": []
    }
}    

In [None]:
for opt in optimizer_data:
    for batch in optimizer_data[opt]["batches"]:

        model = Sequential([
          Conv2D(28, kernel_size=(3,3), input_shape=input_shape),
          MaxPooling2D(pool_size=(2, 2)),
          Flatten(),
          Dense(128, activation=tf.nn.relu),  
          Dropout(0.3),
          Dense(10,activation=tf.nn.softmax)
        ])

        model.compile(optimizer=batch, 
                      loss='sparse_categorical_crossentropy', 
                      metrics=['accuracy'])

        model.fit(x=x_train,y=y_train, epochs=10, batch_size=64)
        accuracy = model.evaluate(x_test, y_test)
        optimizer_data[opt]["accuracy"].append(accuracy[1])

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10


Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10

In [None]:
pickle.dump(optimizer_data, open("optimizer_data.pickle", "wb"))

In [86]:
t = tf.constant([1.0, 1.0, 1.0, 1.0])
tf_RDP_gaussian_mech(t, 0.0001, 500, 0.001)

NameError: name 'tf_RDP_gaussian_mech' is not defined

In [87]:
t = tf.constant([1.0, 1.0, 1.0, 1.0])
tf_gaussian_mech_zCDP(t, 0.0001, 0.1)

<tf.Tensor: shape=(4,), dtype=float32, numpy=array([1.0002027 , 0.99996525, 1.0000987 , 1.0000061 ], dtype=float32)>

In [13]:
t = tf.constant([1.0, 1.0, 1.0, 1.0])
tf_gaussian_mech_RDP(t, 1.0, 5, 0.0001)

<tf.Tensor: shape=(4,), dtype=float32, numpy=array([ 115.59105 ,  316.6945  , -122.92863 ,  -63.800293], dtype=float32)>

In [89]:
tf.math.reduce_mean(t, axis=0)

<tf.Tensor: shape=(), dtype=float32, numpy=1.0>

In [32]:
model = Sequential([
  Flatten(input_shape=(28, 28, 1)),
  Dense(128, activation='relu'),
  Dense(10, activation='softmax')
])

model.compile(optimizer=EpsilonDeltaDPGradientDescent(epochs=10, epsilon=10000000.0, delta=1e-5),
              loss='sparse_categorical_crossentropy', 
              metrics=['accuracy'])

model.fit(x=x_train,y=y_train, epochs=10, batch_size=64)
accuracy = model.evaluate(x_test, y_test)
optimizer_data[opt]["accuracy"].append(accuracy[1])

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
