This example contains the necessary bits of code to run the federated training with differential privacy (dp). The imortant parameters to choose and tune for this are in the dictionary **security_params**.

In [1]:
import os
import sys
import requests
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
import jsonpickle as jpk
import time
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
from tqdm.notebook import tqdm
# Federated imports
import forcast_federated_learning as ffl

# Parameters
num_clients        = 10
com_rounds         = 200
seed               = 0
batch_size         = 200
noise_multiplier   = 0.3
max_grad_norm      = 0.5
epochs             = 2
lr                 = 0.005
secure_rng         = True
device             = 'cuda'

# Metrics
df_metrics = pd.DataFrame(dict(zip(['round', 'accuracy', 'loss', 'epsilon', 'delta'], [int,[],[],[],[]])))

# Load local train data
import tensorflow as tf
(X_train, y_train), (X_test, y_test) = tf.keras.datasets.mnist.load_data()
X_train, X_test = X_train / 255.0, X_test / 255.0

# Create custom pytorch datasers for train and testing
traindata = ffl.datasets.ImageDataset(X_train, y_train, categorical=True)
testdata  = ffl.datasets.ImageDataset(X_test, y_test, categorical=True)

In [2]:
# Split the train data and use only a fraction
traindata_split = ffl.data.random_split(traindata, num_clients=num_clients, seed=seed)

# Get data loader
train_loaders = [ffl.utils.DataLoader(traindata, batch_size=batch_size, shuffle=True, seed=seed)   for traindata in traindata_split]
test_loader  = ffl.utils.DataLoader(testdata, batch_size=len(testdata), shuffle=True, seed=seed)

In [3]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 6, 5)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(6, 12, 5)
        self.fc1 = nn.Linear(12 * 4 * 4, 20)
        self.drop_layer = nn.Dropout(p=0.1)
        self.fc2 = nn.Linear(20, 10)
        self.fc3 = nn.Linear(10, 10)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(-1, 12 * 4 * 4)
        x = F.relu(self.fc1(x))
        x = self.drop_layer(x)
        x = F.relu(self.fc2(x))
        x = F.softmax(self.fc3(x), dim=1)
        
        return x
    
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv2d( 1, 64, kernel_size=3)
        self.conv2 = nn.Conv2d(64, 64, kernel_size=3)
        self.pool1 = nn.MaxPool2d(2, 2)
        self.bn1   = nn.BatchNorm2d(64)
        self.conv3 = nn.Conv2d(64, 128, kernel_size=3)
        self.conv4 = nn.Conv2d(128, 128, kernel_size=3)
        self.pool2 = nn.MaxPool2d(2, 2)
        # flatten
        self.bn2   = nn.BatchNorm1d(128 * 4 * 4)
        self.fc1   = nn.Linear(128 * 4 * 4, 512)
        self.fc2   = nn.Linear(512, 10)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = F.relu(self.conv2(x))
        x = self.pool1(x)
        x = self.bn1(x)
        x = F.relu(self.conv3(x))
        x = F.relu(self.conv4(x))
        x = self.pool2(x)
        x = x.view(-1, 128 * 4 * 4)
        x = self.bn2(x)
        x = F.relu(self.fc1(x))
        x = F.softmax(self.fc2(x), dim=1)
        
        return x

In [4]:
# Train params
delta              = 10**-np.ceil(np.log10(len(traindata))) # delta < 1/len(dataset)
security_params    = {'noise_multiplier': noise_multiplier, 'max_grad_norm': max_grad_norm, 'batch_size': batch_size, 'sample_size': len(traindata), 'target_delta': delta, 'secure_rng': secure_rng} 
optimizer_params   = {'lr': lr}
train_params       = {'epochs': epochs}

local_models       = []
for _ in range(num_clients):
    # Create federated model based on a pytorch model
    num_features, num_classes  = 4, 3
    model                      = CNN() # pytorch model
    model     = ffl.security.module_modification.convert_batchnorm_modules(model)
    inspector = ffl.security.DPModelInspector()
    assert inspector.validate(model) == True
    loss_fn                    = nn.CrossEntropyLoss() # classification
    local_model                = ffl.LocalModel(model, model_type = 'nn', loss_fn=loss_fn, train_params=train_params)
    local_model.optimizer      = ffl.optim.Adam(local_model.parameters(), **optimizer_params)
    local_model.privacy_engine = ffl.security.PrivacyEngine(local_model, **security_params)
    local_model.privacy_engine.attach(local_model.optimizer)
    
    local_models.append(local_model)

In [5]:
model           = local_model.model # pytorch model
fed_model       = ffl.FederatedModel(model, model_type='nn')
public_context, secret_key = ffl.encryption.get_context()
encryption      = False

In [6]:
for com_round in tqdm(range(com_rounds)):
    for local_model, train_loader in zip(local_models, train_loaders):
        local_model.step(train_loader, device=device)
    
    client_weights = []
    for local_model in local_models:
        state_dict      = local_model.state_dict()
        if encryption:
            # Each client encrypts the their model parameters (state_dict)
            # The library handles internally the encrypted data so the functions don't change much
            enc_state_dict  = ffl.encryption.EncStateDict(state_dict)
            enc_state_dict  = enc_state_dict.encrypt(public_context)
            client_weights.append(enc_state_dict)
        else:
            client_weights.append(state_dict)
    client_lens    = [len(traindata) for traindata in traindata_split]
    
    ## Server aggregate
    fed_model.server_agregate(client_weights, client_lens, secret_key=secret_key)
    weights = fed_model.state_dict()
    
    for local_model in local_models:
        local_model.load_state_dict(weights)
    
    acc, loss = local_model.test(test_loader)
    if local_model.privacy_engine: # privacy spent
        epsilon, best_alpha = local_model.privacy_engine.get_privacy_spent(delta)
        print(f'Test accuracy: {acc:.2f} - Privacy spent: (ε = {epsilon:.2f}, δ = {delta:.2f})')
        df_aux       = pd.DataFrame({'round': [com_round+1], 'accuracy': [acc], 'loss': [loss], 'epsilon': [epsilon], 'delta':[delta] })
    else: 
        print(f'Test accuracy: {acc:.2f}')
        df_aux       = pd.DataFrame({'round': [com_round+1], 'accuracy': [acc], 'loss': [loss], 'epsilon': [None], 'delta':[None] })
        
    # Save metrics
    df_metrics   = pd.concat([df_metrics, df_aux], axis=0)

HBox(children=(FloatProgress(value=0.0, max=200.0), HTML(value='')))

Test accuracy: 15.08 - Privacy spent: (ε = 20.13, δ = 0.00)
Test accuracy: 90.69 - Privacy spent: (ε = 23.24, δ = 0.00)
Test accuracy: 94.83 - Privacy spent: (ε = 25.27, δ = 0.00)
Test accuracy: 95.82 - Privacy spent: (ε = 27.29, δ = 0.00)
Test accuracy: 96.47 - Privacy spent: (ε = 29.08, δ = 0.00)
Test accuracy: 96.79 - Privacy spent: (ε = 30.29, δ = 0.00)
Test accuracy: 97.17 - Privacy spent: (ε = 31.50, δ = 0.00)
Test accuracy: 97.24 - Privacy spent: (ε = 32.71, δ = 0.00)
Test accuracy: 97.47 - Privacy spent: (ε = 33.92, δ = 0.00)
Test accuracy: 97.56 - Privacy spent: (ε = 35.13, δ = 0.00)
Test accuracy: 97.63 - Privacy spent: (ε = 36.35, δ = 0.00)
Test accuracy: 97.73 - Privacy spent: (ε = 37.56, δ = 0.00)
Test accuracy: 97.82 - Privacy spent: (ε = 38.77, δ = 0.00)
Test accuracy: 97.83 - Privacy spent: (ε = 39.67, δ = 0.00)
Test accuracy: 97.90 - Privacy spent: (ε = 40.45, δ = 0.00)
Test accuracy: 98.10 - Privacy spent: (ε = 41.22, δ = 0.00)
Test accuracy: 98.00 - Privacy spent: (ε

In [7]:
# Save metrics onto csv file
df_metrics.to_csv('./sim_mnist_dp.csv', index=False)