<a href="https://colab.research.google.com/github/ibacaraujo/pysyft-learning/blob/master/Part_02_Intro_to_Federated_Learning.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Part 02. Intro to Federated Learning

In [1]:
!pip install tf-encrypted

! URL="https://github.com/openmined/PySyft.git" && FOLDER="PySyft" && if [ ! -d $FOLDER ]; then git clone -b dev --single-branch $URL; else (cd $FOLDER && git pull $URL && cd ..); fi;

!cd PySyft; python setup.py install  > /dev/null

import os
import sys
module_path = os.path.abspath(os.path.join('./PySyft'))
if module_path not in sys.path:
    sys.path.append(module_path)
    
!pip install --upgrade --force-reinstall lz4
!pip install --upgrade --force-reinstall websocket
!pip install --upgrade --force-reinstall websockets
!pip install --upgrade --force-reinstall zstd

Collecting tf-encrypted
[?25l  Downloading https://files.pythonhosted.org/packages/15/be/a4c0af9fdc5e5cee28495460538acf2766382bd572e01d4847abc7608dba/tf_encrypted-0.5.9-py3-none-manylinux1_x86_64.whl (2.7MB)
[K     |████████████████████████████████| 2.7MB 3.4MB/s 
[?25hCollecting pyyaml>=5.1
[?25l  Downloading https://files.pythonhosted.org/packages/3d/d9/ea9816aea31beeadccd03f1f8b625ecf8f645bd66744484d162d84803ce5/PyYAML-5.3.tar.gz (268kB)
[K     |████████████████████████████████| 276kB 21.2MB/s 
Building wheels for collected packages: pyyaml
  Building wheel for pyyaml (setup.py) ... [?25l[?25hdone
  Created wheel for pyyaml: filename=PyYAML-5.3-cp36-cp36m-linux_x86_64.whl size=44229 sha256=8d077e3166085a0b825bd9c6213e5081f1f36865572a49588da6e75117f9071b
  Stored in directory: /root/.cache/pip/wheels/e4/76/4d/a95b8dd7b452b69e8ed4f68b69e1b55e12c9c9624dd962b191
Successfully built pyyaml
Installing collected packages: pyyaml, tf-encrypted
  Found existing installation: PyYAML 3.1

In [0]:
import torch
from torch import nn
from torch import optim

In [0]:
# A Toy Dataset
data = torch.tensor([[0,0], [0,1], [1,0], [1,1.]], requires_grad=True)
target = torch.tensor([[0], [0], [1], [1.]], requires_grad=True)

# A Toy Model
model = nn.Linear(2,1)

def train():
  # Training Logic
  opt = optim.SGD(params=model.parameters(), lr=0.1)
  for iter in range(20):
    # 1) Erase previous gradients (if they exist)
    opt.zero_grad()

    # 2) Make a prediction
    pred = model(data)

    # 3) Calculate how much we missed
    loss = ((pred - target)**2).sum()

    # 4) Figure out which weights caused us to miss
    loss.backward()

    # 5) Change those weights
    opt.step()

    # 6) Print our progress
    print(loss.data)

In [4]:
train()

tensor(0.9193)
tensor(0.2797)
tensor(0.1576)
tensor(0.1007)
tensor(0.0656)
tensor(0.0429)
tensor(0.0282)
tensor(0.0186)
tensor(0.0123)
tensor(0.0082)
tensor(0.0055)
tensor(0.0037)
tensor(0.0025)
tensor(0.0017)
tensor(0.0012)
tensor(0.0008)
tensor(0.0006)
tensor(0.0004)
tensor(0.0003)
tensor(0.0002)


Let's modify this example to do it the Federated Learning way!

In [5]:
import syft as sy
hook = sy.TorchHook(torch)

Falling back to insecure randomness since the required custom op could not be found for the installed version of TensorFlow. Fix this by compiling custom ops. Missing file was '/usr/local/lib/python3.6/dist-packages/tf_encrypted/operations/secure_random/secure_random_module_tf_1.15.0.so'





In [0]:
# create a couple workers
bob = sy.VirtualWorker(hook, id="bob")
alice = sy.VirtualWorker(hook, id="alice")

In [0]:
# A Toy Dataset
data = torch.tensor([[0,0], [0,1], [1,0], [1,1.]], requires_grad=True)
target = torch.tensor([[0], [0], [1], [1.]], requires_grad=True)

# Get pointers to training data on each worker by
# sending some training data to bob and alice
data_bob = data[0:2]
target_bob = target[0:2]

data_alice = data[2:]
target_alice = target[2:]

# A Toy Model
model = nn.Linear(2,1)

data_bob = data_bob.send(bob)
data_alice = data_alice.send(alice)
target_bob = target_bob.send(bob)
target_alice = target_alice.send(alice)

# Organize pointers into a list
datasets = [(data_bob, target_bob), (data_alice, target_alice)]

In [0]:
def train():
  # Training Logic
  opt = optim.SGD(params=model.parameters(), lr=0.1)
  for iter in range(10):
    # NEW) Iterate through each worker's dataset
    for data, target in datasets:

      # NEW) Send model to correct worker
      model.send(data.location)

      # 1) Erase previous gradients (if they exist)
      opt.zero_grad()

      # 2) Make a prediction
      pred = model(data)

      # 3) Calculate how much we missed
      loss = ((pred - target)**2).sum()

      # 4) Figure out which weights caused us to miss
      loss.backward()

      # 5) Change those weights
      opt.step()

      # NEW) Get model (with gradients)
      model.get()

      # 6) Print our progress
      print(loss.get()) # NEW) Slight edit. Need to call .get() on loss.

In [11]:
train()

tensor(0.3017, requires_grad=True)
tensor(0.7528, requires_grad=True)
tensor(0.1045, requires_grad=True)
tensor(0.1058, requires_grad=True)
tensor(0.0742, requires_grad=True)
tensor(0.0620, requires_grad=True)
tensor(0.0473, requires_grad=True)
tensor(0.0402, requires_grad=True)
tensor(0.0307, requires_grad=True)
tensor(0.0265, requires_grad=True)
tensor(0.0204, requires_grad=True)
tensor(0.0178, requires_grad=True)
tensor(0.0138, requires_grad=True)
tensor(0.0121, requires_grad=True)
tensor(0.0096, requires_grad=True)
tensor(0.0083, requires_grad=True)
tensor(0.0068, requires_grad=True)
tensor(0.0058, requires_grad=True)
tensor(0.0049, requires_grad=True)
tensor(0.0041, requires_grad=True)
