# Fed. Learning with Model Averaging

In [1]:
import torch
import copy
import syft as sy
from torch import nn
from torch import optim
hook = sy.TorchHook(torch)

In [2]:
# create data owners!
toby = sy.VirtualWorker(hook, id='toby')
julie = sy.VirtualWorker(hook, id='julie')
secure_machine = sy.VirtualWorker(hook, id='secure_worker')

# Fake Dataset (pulled from tutorial)
data = torch.tensor([[0,0],[0,1],[1,0],[1,1.]], requires_grad=True)
target = torch.tensor([[0],[0],[1],[1.]], requires_grad=True)

# same way to get data as I did in example1
toby_data = data[0:2]
toby_target = target[0:2]

julie_data = data[2:]
julie_target = target[2:]


In [3]:
toby_data

tensor([[0., 0.],
        [0., 1.]], grad_fn=<SliceBackward>)

In [4]:
toby_target

tensor([[0.],
        [0.]], grad_fn=<SliceBackward>)

In [5]:
julie_data

tensor([[1., 0.],
        [1., 1.]], grad_fn=<SliceBackward>)

In [6]:
julie_target

tensor([[1.],
        [1.]], grad_fn=<SliceBackward>)

In [7]:
toby_data = toby_data.send(toby)
toby_target = toby_target.send(toby)

julie_data = julie_data.send(julie)
julie_target = julie_target.send(julie)

In [8]:
toby_data.location._objects

{14821136741: tensor([[0., 0.],
         [0., 1.]], requires_grad=True),
 65833685749: tensor([[0.],
         [0.]], requires_grad=True)}

In [9]:
toby_target

(Wrapper)>[PointerTensor | me:53583708348 -> toby:65833685749]

In [10]:
julie_data

(Wrapper)>[PointerTensor | me:69408708435 -> julie:42773609519]

In [11]:
julie_target

(Wrapper)>[PointerTensor | me:18226774777 -> julie:44233108398]

In [12]:
model = nn.Linear(2,1)

In [13]:
model

Linear(in_features=2, out_features=1, bias=True)

In [14]:
# want to make sure things look the same
toby_data.location._objects

{14821136741: tensor([[0., 0.],
         [0., 1.]], requires_grad=True),
 65833685749: tensor([[0.],
         [0.]], requires_grad=True)}

In [15]:
# Train models in parallel, send updated models to worker, average models

iters = 8
worker_iters = 4

for counter in range(iters):
    
    # send models
    toby_model = model.copy().send(toby)
    julie_model = model.copy().send(julie)

    # build my optimizer with a learning rate of 0.15
    toby_opt = optim.SGD(params=toby_model.parameters(), lr=0.15)
    julie_opt = optim.SGD(params=julie_model.parameters(), lr=0.15)
    
    for worker_ctr in range(worker_iters):
        
        # Train Toby model
        toby_opt.zero_grad()
        toby_pred = toby_model(toby_data)
        toby_loss = ((toby_pred - toby_target)**2).sum()
        toby_loss.backward()
        
        toby_opt.step()
        toby_loss = toby_loss.get().data
        
        # Train Julie's Model
        
        julie_opt.zero_grad()
        julie_pred = julie_model(julie_data)
        julie_loss = ((julie_pred - julie_target)**2).sum()
        julie_loss.backward()
        
        julie_opt.step()
        julie_loss = julie_loss.get().data
    
    # send it to the secure worker
    toby_model.move(secure_machine)
    julie_model.move(secure_machine)
    
    # set the weights and biases to be an average of our two workers; divide weight.data by n for number of workers
    with torch.no_grad():
        model.weight.set_(((julie_model.weight.data + toby_model.weight.data) / 2).get())
        model.bias.set_(((julie_model.bias.data + toby_model.bias.data) / 2).get())
        
    print ("Toby Loss: " + str(toby_loss) + "\n" + "Julie Loss: " + str(julie_loss))

Toby Loss: tensor(0.0269)
Julie Loss: tensor(0.0585)
Toby Loss: tensor(0.0068)
Julie Loss: tensor(0.0260)
Toby Loss: tensor(0.0011)
Julie Loss: tensor(0.0106)
Toby Loss: tensor(3.5893e-05)
Julie Loss: tensor(0.0044)
Toby Loss: tensor(8.9421e-05)
Julie Loss: tensor(0.0019)
Toby Loss: tensor(0.0003)
Julie Loss: tensor(0.0009)
Toby Loss: tensor(0.0004)
Julie Loss: tensor(0.0005)
Toby Loss: tensor(0.0004)
Julie Loss: tensor(0.0003)


In [16]:
# this can see how well the model worked
predictions = model(data)
loss = ((predictions - target) ** 2).sum()

In [17]:
print(predictions)
print(target)
print(loss.data)

tensor([[0.0798],
        [0.0755],
        [0.8967],
        [0.8923]], grad_fn=<AddmmBackward>)
tensor([[0.],
        [0.],
        [1.],
        [1.]], requires_grad=True)
tensor(0.0343)
