<a href="https://colab.research.google.com/github/xavoliva6/oml_project/blob/main/src/index.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Federated Machine Learning with Differential Privacy

**Clone Repo**<br/>
Run the following cells in Google Colab in order to clone the code from Github to your Google Drive or pull the latest changes.

In [None]:
from google.colab import drive
drive.mount('/content/gdrive')

In [None]:
%cd /content/gdrive/My Drive

In [None]:
# Run this cell if you did not clone the repo yet
! git clone https://github.com/xavoliva6/dpfl_pytorch.git OPT4ML

In [None]:
# Run this cell to pull the latest changes
%cd /content/gdrive/My Drive/OPT4ML
! git config --global user.email "student@epfl.ch"
! git config --global user.name "Student"
! git stash
! git pull origin main

**Colab Support**<br/>
Only run the following lines if you want to run the code on Google Colab

In [None]:
# Enable access to files stored in Google Drive
from google.colab import drive
drive.mount('/content/gdrive/')

In [None]:
%cd /content/gdrive/My Drive/OPT4ML/src

# Main

In [None]:
# Install necessary requirements
!pip install -r ../requirements.txt

In [None]:
# Make sure cuda support is available
import torch
if torch.cuda.is_available():
  device_name = "cuda:0"
else:
  device_name = "cpu"
print("device_name: {}".format(device_name))
device = torch.device(device_name)

In [None]:
%load_ext autoreload
%autoreload 2


In [None]:
import numpy as np
import warnings
import matplotlib.pyplot as plt
from utils import *

from model import CNN
from server import Server


In [None]:
if __name__ == "__main__":
    warnings.filterwarnings("ignore")

In [None]:
# --- TRAINING PARAMETERS ---
# NR_CLIENTS ... Number of clients participating in the training process.
# LR ... Learning rate used for the stochastic gradient descent.
# CLIENT EPOCHS ... Number of epochs that every client is trained during every step.
# NR_TRAINING_ROUNDS ... Number of times that the server performs a global model update.
# IS_PARALLEL ... Is the training of the clients performed in parallel.
# DATA ... Dataset to be used. Either 'MNIST', 'MED' or 'FEMNIST'.
# BATCH_SIZE ... Batch size used by SGD algorithm.

In [None]:
# --- DIFFERENTIAL PRIVACY PARAMETERS ---
# IS PRIVATE ... 
# MAX_GRAD_NORM
# EPSILON
# EPISILON_TRAINING_ITERATION
# VIRTUAL_BATCH_SIZE
# N_ACCUMULATION_STEPS

## 1. Experiments on Medical Dataset
In the first experiment we are comparing the accuracy of a fed ml approach including privacy 

In [None]:
data = "MED"
batch_size = 8

### Experiment 1.1

In [None]:
# --- TRAINING PARAMETERS ---
nr_clients = 3
lr = 0.01
client_epochs = 10
nr_training_rounds = 100
is_parallel = False
# --- DIFFERENTIAL PRIVACY PARAMETERS ---
is_private = False
max_grad_norm = 1.2
epsilon = 10
virtual_batch_size = 2 * batch_size
assert virtual_batch_size % batch_size == 0  # VIRTUAL_BATCH_SIZE should be divisible by BATCH_SIZE
n_accumulation_steps = int(virtual_batch_size / batch_size)

In [None]:
server = Server(nr_clients=nr_clients, nr_training_rounds=nr_training_rounds, lr=lr, epochs=client_epochs, data=data,
                batch_size=batch_size, max_grad_norm=max_grad_norm, epsilon=epsilon,
                n_accumulation_steps=n_accumulation_steps, is_parallel=is_parallel, is_private=is_private,
                device=device, verbose="server")
test_losses, test_accs = server(early=True, patience=10, delta=0.05)

In [None]:
plot_metrics(test_losses, test_accs)

In [None]:
nr_clienst=1
is_private=False
server = Server(nr_clients=nr_clients, nr_training_rounds=nr_training_rounds, lr=lr, epochs=client_epochs, data=data,
                batch_size=batch_size, max_grad_norm=max_grad_norm, epsilon=epsilon,
                n_accumulation_steps=n_accumulation_steps, is_parallel=is_parallel, is_private=is_private,
                device=device, verbose="server")
test_losses, test_accs = server(early=False, patience=3, delta=0.05)

In [None]:
plot_metrics(test_losses, test_accs)

## 2. Experiments on MNIST Dataset

In [None]:
data = 'MNIST'
batch_size = 64

In [None]:
# --- TRAINING PARAMETERS ---
nr_clients = 3
lr = 0.01
client_epochs = 10
nr_training_rounds = 20
is_parallel = True

In [None]:
# --- DIFFERENTIAL PRIVACY PARAMETERS ---
is_private = False
max_grad_norm = 1.2
epsilon = 5
epsilon_training_iteration = epsilon / nr_training_rounds
virtual_batch_size = 2 * batch_size
assert virtual_batch_size % batch_size == 0  # VIRTUAL_BATCH_SIZE should be divisible by BATCH_SIZE
n_accumulation_steps = int(virtual_batch_size / batch_size)

In [None]:
server = Server(nr_clients=nr_clients, nr_training_rounds=nr_training_rounds, lr=lr, epochs=client_epochs, data=data, batch_size=batch_size, max_grad_norm=max_grad_norm, epsilon=epsilon, n_accumulation_steps=n_accumulation_steps, epsilon_training_iteration=epsilon_training_iteration, is_parallel=is_parallel, is_private=is_private, verbose="server")
test_losses, test_accs = server(early=True, patience=5, delta=0.05)

In [None]:
plot_metrics(test_losses, test_accs)

In [None]:
server = Server(nr_clients=nr_clients, nr_training_rounds=nr_training_rounds, lr=lr, epochs=client_epochs, data=data, batch_size=batch_size, max_grad_norm=max_grad_norm, epsilon=epsilon, n_accumulation_steps=n_accumulation_steps, epsilon_training_iteration=epsilon_training_iteration, is_parallel=is_parallel, device=device, is_private=is_private)
test_losses, test_accs = server(early=True, patience=5, delta=0.05)

## 3. Experiments on FEMNIST Dataset

In [None]:
data = "FEMNIST"
batch_size = 64

In [None]:
# --- TRAINING PARAMETERS ---
nr_clients = 3
lr = 0.01
client_epochs = 10
nr_training_rounds = 20
is_parallel = True

In [None]:
# --- DIFFERENTIAL PRIVACY PARAMETERS ---
is_private = False
max_grad_norm = 1.2
epsilon = 5
epsilon_training_iteration = epsilon / nr_training_rounds
virtual_batch_size = 2 * batch_size
assert virtual_batch_size % batch_size == 0  # VIRTUAL_BATCH_SIZE should be divisible by BATCH_SIZE
n_accumulation_steps = int(virtual_batch_size / batch_size)

In [None]:
torch.cuda.empty_cache()
server = Server(nr_clients=nr_clients, nr_training_rounds=nr_training_rounds, lr=lr, epochs=client_epochs, data=data, batch_size=batch_size, max_grad_norm=max_grad_norm, epsilon=epsilon, n_accumulation_steps=n_accumulation_steps, epsilon_training_iteration=epsilon_training_iteration, is_parallel=is_parallel, is_private=is_private, device=device, verbose="server")
test_losses, test_accs = server(early=True, patience=3, delta=0.05)

In [None]:
plot_metrics(test_losses, test_accs)

In [None]:
is_private = True
server = Server(nr_clients=nr_clients, nr_training_rounds=nr_training_rounds, lr=lr, epochs=client_epochs, data=data, batch_size=batch_size, max_grad_norm=max_grad_norm, epsilon=epsilon, n_accumulation_steps=n_accumulation_steps, epsilon_training_iteration=epsilon_training_iteration, is_parallel=is_parallel, is_private=is_private, device=device, verbose="all")
test_losses, test_accs = server(early=True, patience=3, delta=0.05)

In [None]:
plot_metrics(test_losses, test_accs)