# CIFAR CLASSIFICATION

### Imports

In [79]:
import tensorflow as tf

import deel.lipdp.layers as DP_layers
import deel.lipdp.losses as DP_losses
from deel.lipdp.pipeline import bound_clip_value
from deel.lipdp.pipeline import load_and_prepare_data
from deel.lipdp.sensitivity import get_max_epochs
from deel.lipdp.model import DP_Accountant
from deel.lipdp.model import DP_Sequential
from deel.lipdp.model import DPParameters
from deel.lipdp.model import AdaptiveLossGradientClipping

### Loading the data :

It is important to import the data with the right DP parameters to account properly for the privacy guarantees of the trained model.

In [80]:
augment = True 

if augment : 
    augmentations = [
        tf.image.flip_left_right,
    ]
else : 
    augmentations = None 
    
ds_train, ds_test, dataset_metadata = load_and_prepare_data(
    "cifar10",
    batch_size=2048,
    colorspace="HSV",
    augmentations=augmentations,
    drop_remainder=True,  # accounting assumes fixed batch size
    bound_fct=bound_clip_value(
        15.0
    ),  # clipping preprocessing allows to control input bound
)

Please pay attention to the fact that the effective batch size in memory will be batch_size $\times$ len(augmentations).

Fortunately, the backpropagation's computation time on our framework is less sensitive to the batch size than for other frameworks : 

<img src="ressources/all_speed_curves.png" alt="Speed curves with concurrent frameworks" width="800"/>

### Declaring the DP parameters :

We also need to declare explicitly the parameters of the DP training process.

In [81]:
dp_parameters = DPParameters(
    noisify_strategy="global",
    noise_multiplier=1.0,
    delta=1e-5,
)

### Defining the model :

We use a simple convolutive network to classify on the MNIST dataset. We add a loss gradient clipping layer at the end of our network for more tightness on our gradient's upper bound. Therefore allowing for better results with one less hyperparameter to tune for dynamically chosen clipping constant. 

In [82]:
layers = [
    DP_layers.DP_BoundedInput(
        input_shape=dataset_metadata.input_shape,
        upper_bound=dataset_metadata.max_norm,
    ),
    DP_layers.DP_SpectralConv2D(
        filters=32, kernel_size=3, use_bias=False, kernel_initializer="orthogonal"
    ),
    DP_layers.DP_Flatten(),
    DP_layers.DP_SpectralDense(
        units=512, use_bias=False, kernel_initializer="orthogonal"
    ),
    DP_layers.DP_GroupSort(2),
    DP_layers.DP_SpectralDense(
        units=10, use_bias=False, kernel_initializer="orthogonal"
    ),
    DP_layers.DP_ClipGradient(
        epsilon=1, mode="dynamic_svt", patience=5
    )
]

model = DP_Sequential(
    layers=layers, dp_parameters=dp_parameters, dataset_metadata=dataset_metadata
)

loss = DP_losses.DP_TauCategoricalCrossentropy(tau=20.0)

# Compatible with any kind of non-private optimizer : 
opt = tf.keras.optimizers.SGD(learning_rate=1e-2)

model.compile(
    loss=loss,
    optimizer=opt,
    metrics=["accuracy"],
    run_eagerly=False,
)

  warn(_msg_not_lip.format(layer.name))
  warn(_msg_not_lip.format(layer.name))


### Define the desired DP guarantees :

We compute the budget of epochs needed to yields the DP guarantees that you desire :

In [83]:
num_epochs = get_max_epochs(8.0, model)

epoch bounds = (0, 512.0) and epsilon = 145.43699578143378 at epoch 512.0
epoch bounds = (0, 256.0) and epsilon = 78.50396131727922 at epoch 256.0
epoch bounds = (0, 128.0) and epsilon = 44.007702875320284 at epoch 128.0
epoch bounds = (0, 64.0) and epsilon = 26.759573654340812 at epoch 64.0
epoch bounds = (0, 32.0) and epsilon = 17.233876551599256 at epoch 32.0
epoch bounds = (0, 16.0) and epsilon = 11.129175323732037 at epoch 16.0
epoch bounds = (8.0, 16.0) and epsilon = 7.653461249237745 at epoch 8.0
epoch bounds = (8.0, 12.0) and epsilon = 9.391318298430164 at epoch 12.0
epoch bounds = (8.0, 10.0) and epsilon = 8.945753193137257 at epoch 10.0
epoch bounds = (9.0, 10.0) and epsilon = 7.876243805065025 at epoch 9.0


### Train the model : 

The training process is called through the model.fit attribute. We use the following callbacks : 

- **DP_Accountant** (log_fn) : accounts for the privacy guarantees after each epoch of training (*log_fn* makes it compatible with W&B logging).
- **DP_AdaptiveGradientClipping** (ds_train, patience) : automatically updates the losses's gradient clipping constant every *patience* steps. 


In [84]:
callbacks = [
    DP_Accountant(log_fn="logging"),
    AdaptiveLossGradientClipping(
        ds_train=ds_train
    ),  # DO NOT USE THIS CALLBACK WHEN mode != "dynamic_svt"
]

hist = model.fit(
    ds_train,
    epochs=num_epochs,
    validation_data=ds_test,
    callbacks=callbacks,
)

On train begin : 
Initial value is now equal to lipschitz constant of loss:  tf.Tensor(1.4142135, shape=(), dtype=float32)
Epoch 1/9


 (3.520469678118549, 1e-05)-DP guarantees for epoch 1 

updated_clip_value :  1.0928657358879175
Epoch 2/9
 (3.953077748875339, 1e-05)-DP guarantees for epoch 2 

Epoch 3/9
 (4.38568578363591, 1e-05)-DP guarantees for epoch 3 

Epoch 4/9
 (4.818293794804343, 1e-05)-DP guarantees for epoch 4 

Epoch 5/9
 (6.146785094069724, 1e-05)-DP guarantees for epoch 5 

Epoch 6/9
 (6.579393133892413, 1e-05)-DP guarantees for epoch 6 

updated_clip_value :  1.090034477504788
Epoch 7/9
 (7.012001173715104, 1e-05)-DP guarantees for epoch 7 

Epoch 8/9
 (7.42943319023189, 1e-05)-DP guarantees for epoch 8 

Epoch 9/9
 (7.653461249237745, 1e-05)-DP guarantees for epoch 9 



### 