In [None]:
!pip install syft==0.2.9 >/dev/null

[31mERROR: tensorflow 2.4.1 has requirement numpy~=1.19.2, but you'll have numpy 1.18.5 which is incompatible.[0m
[31mERROR: google-colab 1.0.0 has requirement notebook~=5.3.0; python_version >= "3.0", but you'll have notebook 5.7.8 which is incompatible.[0m
[31mERROR: google-colab 1.0.0 has requirement requests~=2.23.0, but you'll have requests 2.22.0 which is incompatible.[0m
[31mERROR: google-colab 1.0.0 has requirement tornado~=5.1.0; python_version >= "3.0", but you'll have tornado 4.5.3 which is incompatible.[0m
[31mERROR: datascience 0.10.6 has requirement folium==0.2.1, but you'll have folium 0.8.3 which is incompatible.[0m
[31mERROR: bokeh 2.1.1 has requirement tornado>=5.1, but you'll have tornado 4.5.3 which is incompatible.[0m
[31mERROR: albumentations 0.1.12 has requirement imgaug<0.2.7,>=0.2.5, but you'll have imgaug 0.2.9 which is incompatible.[0m


Using version 0.1.2a1 specifically resulted in AttributeError: 'str' object has no attribute 'decode' pytorch

## Centralized Example

In [None]:
import torch
from torch import nn, optim

# A Toy Dataset
data = torch.tensor([[1.,1],[0,1],[1,0],[0,0]], requires_grad=True)
target = torch.tensor([[1.],[1], [0], [0]], requires_grad=True)

# A toy model
model = nn.Linear(2, 1)

# Optimizer
opt = optim.SGD(params=model.parameters(), lr=0.1)
opt.zero_grad()

# Prediction, loss and backprop
pred = model(data)
loss = ((pred - target)**2).sum()
loss.backward()
opt.step()
print(loss.data)

tensor(4.8502)


In [None]:
opt.zero_grad()

# Prediction, loss and backprop
pred = model(data)
loss = ((pred - target)**2).sum()
loss.backward()
opt.step()
print(loss.data)

tensor(1.2803)


Loss went down.

This is for a regular centralized Deep Learning. Creating a training loop for it as follows:

In [None]:
def train(iterations=20):
    for iter in range(iterations):
        opt.zero_grad()
        pred = model(data)
        loss = ((target - pred)**2).sum()
        loss.backward()
        opt.step()
        print(loss.data)

train(10)

tensor(0.7584)
tensor(0.5392)
tensor(0.3938)
tensor(0.2899)
tensor(0.2145)
tensor(0.1594)
tensor(0.1190)
tensor(0.0891)
tensor(0.0669)
tensor(0.0504)


## Making this Federated

### First thing to do is to split the data and send them to two different workers

In [None]:
import syft

# Overriding pytorch methods to syft methods that act on pointers to tensors instead
hook = syft.TorchHook(torch)

# Creating Virtual workers for bob and alice
bob = syft.VirtualWorker(hook, id='bob')
alice = syft.VirtualWorker(hook, id='alice')



In [None]:
# Sending first two rows to bob
data_bob = data[0:2].send(bob)
target_bob = target[0:2].send(bob)

# Sending the last two rows to alice
data_alice = data[2:4].send(alice)
target_alice = target[2:4].send(alice)

# Initializing datasets - to be used later
datasets = [(data_bob, target_bob), 
            (data_alice, target_alice)]

# Model
# model = nn.Linear(2, 1)
# opt = optim.SGD(params=model.parameters(), lr=0.1)

In [None]:
# datasets[0] # data with bob

In [None]:
# datasets[1] # data with alice

In [None]:
# _data, _target = datasets[0]

In [None]:
# _data

In [None]:
# _data.location

We will use this location to send the model to their respective workers

In [None]:
# model = model.send(_data.location)

In [None]:
# model

In [None]:
# opt.zero_grad()

In [None]:
def train(iterations=20):

    # Setting the optimizer and model
    model = nn.Linear(2,1)
    opt = optim.SGD(params=model.parameters(), lr=0.1)
    
    for iter in range(iterations):
        # Loop to select different workers 
        # Note that here, workers are selected one after the other
        # Once training completes on one worker, training begins on another
        for _data, _target in datasets:

            # send model to the worker pointed by _data.location
            model = model.send(_data.location)

            # do normal training
            opt.zero_grad()
            pred = model(_data)
            loss = ((pred - _target)**2).sum()
            loss.backward()
            opt.step()

            # get the model back from the worker
            model = model.get()

            print(loss.get())

In [None]:
train()

tensor(10.8281, requires_grad=True)
tensor(0.1081, requires_grad=True)
tensor(0.1975, requires_grad=True)
tensor(0.1577, requires_grad=True)
tensor(0.0577, requires_grad=True)
tensor(0.1072, requires_grad=True)
tensor(0.0355, requires_grad=True)
tensor(0.0707, requires_grad=True)
tensor(0.0236, requires_grad=True)
tensor(0.0471, requires_grad=True)
tensor(0.0161, requires_grad=True)
tensor(0.0318, requires_grad=True)
tensor(0.0111, requires_grad=True)
tensor(0.0217, requires_grad=True)
tensor(0.0078, requires_grad=True)
tensor(0.0150, requires_grad=True)
tensor(0.0056, requires_grad=True)
tensor(0.0104, requires_grad=True)
tensor(0.0040, requires_grad=True)
tensor(0.0074, requires_grad=True)
tensor(0.0030, requires_grad=True)
tensor(0.0052, requires_grad=True)
tensor(0.0022, requires_grad=True)
tensor(0.0037, requires_grad=True)
tensor(0.0016, requires_grad=True)
tensor(0.0027, requires_grad=True)
tensor(0.0012, requires_grad=True)
tensor(0.0019, requires_grad=True)
tensor(0.0009, requ

Shortcoming: Model can be used to reverse engineer the data

Shortcoming: Model training on alice only happens after bob. This means alice have a model that is trained on bob and she can reverse engineer the model to explain bob's data characteristics.

Shortcoming: Linear training - can be parallelized - first synchronously, then ansynchronously further down the line.

The trusted third party 'me' which is created during hooking of pysyft to torch (LOL), will be able to see the private models passed around by alice and bob.

#### Note: Library is very unstable. Could throw error randomly. (get_msg_pack, pytorch has no attribute.. etc). Try restarting kernel and running. (Without factory resetting runtime.)

## How to average the gradients before calling get()?

This is to make sure that we dont see anyone's gradient/model. This can be done by using pointers to pointers.

In [None]:
bob.clear_objects()
alice.clear_objects()

x = torch.tensor([1,2,3,4,5]).send(bob)
bob._objects

{86522025535: tensor([1, 2, 3, 4, 5])}

In [None]:
x = x.send(alice)

In [None]:
x

(Wrapper)>[PointerTensor | me:77807438018 -> alice:83172385140]

In [None]:
alice._objects

{83172385140: (Wrapper)>[PointerTensor | alice:83172385140 -> bob:86522025535]}

In [None]:
x.location

<VirtualWorker id:alice #objects:1>

In [None]:
bob._objects

{86522025535: tensor([1, 2, 3, 4, 5])}

In [None]:
x

(Wrapper)>[PointerTensor | me:77807438018 -> alice:83172385140]

At alice, she got a pointer from alice to bob. Ie, x at alice points to bob.

And our x pointer no longer points to bob, but at alice.

So, now when we call x, it points to alice's machine first which then points to bob's machine and then execution happens over there.

In [None]:
y = x + x; y

(Wrapper)>[PointerTensor | me:83415556672 -> alice:47654710747]

In [None]:
y.location

<VirtualWorker id:alice #objects:2>

In [None]:
alice._objects

{47654710747: (Wrapper)>[PointerTensor | alice:47654710747 -> bob:99760747990],
 83172385140: (Wrapper)>[PointerTensor | alice:83172385140 -> bob:86522025535]}

In [None]:
bob._objects

{86522025535: tensor([1, 2, 3, 4, 5]),
 99760747990: tensor([ 2,  4,  6,  8, 10])}

y still points to alice, but execution happened at bob.

Alice have now got another pointer to bob.

x = torch.tensor([1,1,1,1]).send(bob).send(alice) means that the owner of the tensor is bob and alice have got a pointer to that tensor that resides in bob.

In [None]:
x.location

<VirtualWorker id:alice #objects:2>

### how do we get this data back?


In [None]:
x.location # pointer to alice

<VirtualWorker id:alice #objects:2>

In [None]:
x = x.get(); x

(Wrapper)>[PointerTensor | me:83172385140 -> bob:86522025535]

In [None]:
x.location # Now it is a pointer to bob

<VirtualWorker id:bob #objects:2>

In [None]:
x = x.get(); x

tensor([1, 2, 3, 4, 5])

In [None]:
bob._objects

{99760747990: tensor([ 2,  4,  6,  8, 10])}

Note that if you delete the pointer like del x, it will delete (chained delete) all the objects (including further pointers in chain) pointed by that pointer

### Pointer Chain Operations

In [None]:
bob.clear_objects()
alice.clear_objects()

x = torch.tensor([1,2,3,4,5]).send(bob).send(alice)
bob._objects

{32923310645: tensor([1, 2, 3, 4, 5])}

In [None]:
alice._objects

{6412344893: (Wrapper)>[PointerTensor | alice:6412344893 -> bob:32923310645]}

In [None]:
x.location

<VirtualWorker id:alice #objects:1>

In [None]:
x.remote_get()

(Wrapper)>[PointerTensor | me:65059115163 -> alice:6412344893]

In [None]:
bob._objects

{}

In [None]:
alice._objects

{6412344893: tensor([1, 2, 3, 4, 5])}

Did you see that?

We were able to get bob's data to alice's machine without ever touching our machine 'me'. This forms the basis for better privacy. Honest but curious threat model is satisfied.

In [None]:
x.location

<VirtualWorker id:alice #objects:1>

In [None]:
x.move(bob); x

(Wrapper)>[PointerTensor | me:65059115163 -> bob:6412344893]

In [None]:
bob._objects

{6412344893: tensor([1, 2, 3, 4, 5])}

In [None]:
alice._objects

{}

In [None]:
x.location

<VirtualWorker id:bob #objects:1>