In [None]:
!pip install syft==0.2.9 >/dev/null

[31mERROR: tensorflow 2.4.1 has requirement numpy~=1.19.2, but you'll have numpy 1.18.5 which is incompatible.[0m
[31mERROR: google-colab 1.0.0 has requirement notebook~=5.3.0; python_version >= "3.0", but you'll have notebook 5.7.8 which is incompatible.[0m
[31mERROR: google-colab 1.0.0 has requirement requests~=2.23.0, but you'll have requests 2.22.0 which is incompatible.[0m
[31mERROR: google-colab 1.0.0 has requirement tornado~=5.1.0; python_version >= "3.0", but you'll have tornado 4.5.3 which is incompatible.[0m
[31mERROR: datascience 0.10.6 has requirement folium==0.2.1, but you'll have folium 0.8.3 which is incompatible.[0m
[31mERROR: bokeh 2.1.1 has requirement tornado>=5.1, but you'll have tornado 4.5.3 which is incompatible.[0m
[31mERROR: albumentations 0.1.12 has requirement imgaug<0.2.7,>=0.2.5, but you'll have imgaug 0.2.9 which is incompatible.[0m


Models are updated to a trusted third party - let's call it secureWorker

In [None]:
import syft
import torch
from torch import nn, optim

In [None]:
hook = syft.TorchHook(torch)

# Creating two workers
bob = syft.VirtualWorker(hook, id='bob')
alice = syft.VirtualWorker(hook, id='alice')
# Creating secure worker
secure_worker = syft.VirtualWorker(hook, id='secure_worker')

# Toy Dataset
data = torch.tensor([[0,0], [0,1], [1,0], [1., 1]], requires_grad=True)
target = torch.tensor([[0],[0],[1],[1.]], requires_grad=True)

# Get pointers to data on each worker by sending data to bob and alice
bobs_data = data[0:2].send(bob)
bobs_target = target[0:2].send(bob)

alices_data = data[2:].send(alice)
alices_target = target[2:].send(alice)

# Simple Linear Model
model = nn.Linear(2, 1)

In [None]:
# Sending model's copies to bob and alice
bobs_model = model.copy().send(bob)
alices_model = model.copy().send(alice)

# Two different models mean two different optimizers
bobs_opt = optim.SGD(params=bobs_model.parameters(), lr=0.1)
alices_opt = optim.SGD(params=alices_model.parameters(), lr=0.1)

In [None]:
# A single step of training for bob is like this

# Zeroing out gradients
bobs_opt.zero_grad()
# Predicting, finding loss and backpropagate
bobs_pred = bobs_model(bobs_data)
bobs_loss = ((bobs_pred-bobs_target)**2).sum()
bobs_loss.backward()
# Updating weights
bobs_opt.step()
# Get bob's loss
bobs_loss = bobs_loss.get().data
bobs_loss

tensor(1.3651)

In [None]:
# Same procedure follows for alice as 
alices_opt.zero_grad()
alices_pred = alices_model(alices_data)
alices_loss = ((alices_pred-alices_target)**2).sum()
alices_loss.backward()
alices_opt.step()
alices_loss = alices_loss.get().data
alices_loss

tensor(0.8685)

In [None]:
# Put the above two in loop to train them for a few rounds

epochs = 20
for i in range(epochs):

    # For Bob
    bobs_opt.zero_grad()
    bobs_pred = bobs_model(bobs_data)
    bobs_loss = ((bobs_pred-bobs_target)**2).sum()
    bobs_loss.backward()
    bobs_opt.step()
    bobs_loss = bobs_loss.get().data
    print('Bobs Loss:', bobs_loss)
    # For Alice
    alices_opt.zero_grad()
    alices_pred = alices_model(alices_data)
    alices_loss = ((alices_pred-alices_target)**2).sum()
    alices_loss.backward()
    alices_opt.step()
    alices_loss = alices_loss.get().data
    print('Alices loss:',alices_loss)

Bobs Loss: tensor(0.3120)
Alices loss: tensor(0.1593)
Bobs Loss: tensor(0.0727)
Alices loss: tensor(0.1282)
Bobs Loss: tensor(0.0181)
Alices loss: tensor(0.1067)
Bobs Loss: tensor(0.0055)
Alices loss: tensor(0.0888)
Bobs Loss: tensor(0.0024)
Alices loss: tensor(0.0739)
Bobs Loss: tensor(0.0015)
Alices loss: tensor(0.0615)
Bobs Loss: tensor(0.0012)
Alices loss: tensor(0.0512)
Bobs Loss: tensor(0.0010)
Alices loss: tensor(0.0426)
Bobs Loss: tensor(0.0008)
Alices loss: tensor(0.0355)
Bobs Loss: tensor(0.0007)
Alices loss: tensor(0.0295)
Bobs Loss: tensor(0.0006)
Alices loss: tensor(0.0246)
Bobs Loss: tensor(0.0005)
Alices loss: tensor(0.0204)
Bobs Loss: tensor(0.0004)
Alices loss: tensor(0.0170)
Bobs Loss: tensor(0.0004)
Alices loss: tensor(0.0142)
Bobs Loss: tensor(0.0003)
Alices loss: tensor(0.0118)
Bobs Loss: tensor(0.0003)
Alices loss: tensor(0.0098)
Bobs Loss: tensor(0.0002)
Alices loss: tensor(0.0082)
Bobs Loss: tensor(0.0002)
Alices loss: tensor(0.0068)
Bobs Loss: tensor(0.0002)
Al

Now we have a trained model for each bob and alice. how to aggregate them?

In [None]:
alices_model.move(secure_worker)
bobs_model.move(secure_worker)
secure_worker._objects

{20907566982: Parameter containing:
 tensor([0.9622], requires_grad=True), 50686509638: Parameter containing:
 tensor([[-0.4107,  0.0154]], requires_grad=True), 59568901628: Parameter containing:
 tensor([-0.0095], requires_grad=True), 95708304698: Parameter containing:
 tensor([[-0.0115,  0.0879]], requires_grad=True)}

In [None]:
# Simple averaging on secure_worker
# here model is the global model. Alice's Model and Bob's model resides in secure_worker
# model resides in 'me'. Secure worker averages it and then sends it to 'me'.
'''
model.weight.data.set_() might not work for v0.2.9.
So change it as model.weight.set_().
But this will create race condition/error with autograd so,
use in scope of torch.no_grad()
'''

with torch.no_grad():
    model.weight.set_(((alices_model.weight.data + bobs_model.weight.data) / 2).get())

In [None]:
model.weight

Parameter containing:
tensor([[-0.2111,  0.0516]], requires_grad=True)

In [None]:
with torch.no_grad():
    model.bias.set_(((alices_model.bias.data + bobs_model.bias.data) / 2).get())

In [None]:
model.bias

Parameter containing:
tensor([0.4763], requires_grad=True)

In [None]:
model

Linear(in_features=2, out_features=1, bias=True)

In [None]:
# This is the central model
model.location # Does not return anything... 'me' would have been nice, as anyway pytorch is overriden by syft

This is the basic trusted aggregation workflow. 
* Create a secure worker object from VirtualWorker()
* Move the clients' models to secure worker
* Average the weights and biases (implicitly on the secure worker) and then take just the result to model (Global model)

In [None]:
# Put this in a loop
federated_rounds = 10
epochs = 20
lr = 0.1

for federated_round_number in range(federated_rounds):

    bobs_model = model.copy().send(bob)
    alices_model = model.copy().send(alice)

    bobs_opt = optim.SGD(params=bobs_model.parameters(), lr=lr)
    alices_opt = optim.SGD(params=alices_model.parameters(), lr=lr)

    for epoch in range(epochs):

        bobs_opt.zero_grad()
        bobs_pred = bobs_model(bobs_data)
        bobs_loss = ((bobs_pred - bobs_target)**2).sum()
        bobs_loss.backward()
        bobs_opt.step()
        bobs_loss = bobs_loss.get().data

        alices_opt.zero_grad()
        alices_pred = alices_model(alices_data)
        alices_loss = ((alices_pred - alices_target)**2).sum()
        alices_loss.backward()
        alices_opt.step()
        alices_loss = alices_loss.get().data

    bobs_model.move(secure_worker)
    alices_model.move(secure_worker)

    # Secure Aggregation Logic
    with torch.no_grad():
        model.weight.set_(((alices_model.weight.data + bobs_model.weight.data) / 2).get())
        model.bias.set_(((alices_model.bias.data + bobs_model.bias.data) / 2).get())
    
    print('At Federated Round {}, Bob\'s Loss = {}; Alice\'s Loss = {}'.format(federated_round_number, bobs_loss, alices_loss))

At Federated Round 0, Bob's Loss = 0.0011702917981892824; Alice's Loss = 0.0002818136417772621
At Federated Round 1, Bob's Loss = 0.0008468828164041042; Alice's Loss = 0.00020137940009590238
At Federated Round 2, Bob's Loss = 0.000613740470726043; Alice's Loss = 0.00015224932576529682
At Federated Round 3, Bob's Loss = 0.00045033235801383853; Alice's Loss = 0.00011353669833624735
At Federated Round 4, Bob's Loss = 0.00033197057200595737; Alice's Loss = 8.410410373471677e-05
At Federated Round 5, Bob's Loss = 0.00024505818146280944; Alice's Loss = 6.216921610757709e-05
At Federated Round 6, Bob's Loss = 0.00018097036809194833; Alice's Loss = 4.592759069055319e-05
At Federated Round 7, Bob's Loss = 0.00013365710037760437; Alice's Loss = 3.3924199669854715e-05
At Federated Round 8, Bob's Loss = 9.871630754787475e-05; Alice's Loss = 2.5056135200429708e-05
At Federated Round 9, Bob's Loss = 7.291034853551537e-05; Alice's Loss = 1.850617627496831e-05


Here the models are sent to a third party secure aggregator. It performs the (alice+bob)/2 there itself and will give you the model parameters when you call get() on the result.

Hence, the root server ('me') does not actually see alice's and bob's model, it only gets an aggregation

Shortcomings:
1. Aggregator requires trust
2. Will fail in Curious Aggregator Threat model.
3. Here, in this code, model training at bob and alice is sequential. On different physical machines, do it in paralel. 
4. Asynchronous model training and aggregation will be better for cross device federated learning.

Fix:

Use Secure Multiparty Computation.