In [1]:
import torch
from torch import nn
from torch import optim


# A Toy Dataset
data = torch.tensor([[0,0,0,0],[1,0,0,0],[0,1,0,0],[0,0,1,0],[1,1,0,0],[1,0,1,0],[0,1,1,0],[1,1,1,0],[0,0,0,1],[1,0,0,1],[0,1,0,1],[0,0,1,1],[1,1,0,1],[1,0,1,1],[0,1,1,1],[1,1,1,1.]])
target = torch.tensor([[0],[0],[0],[0],[0],[0],[0],[0],[1],[1],[1],[1],[1],[1],[1],[1.]])


# A Toy Model
model = nn.Sequential(
        nn.Linear(4, 3),
        nn.Tanh(),
        nn.Linear(3, 1),
        nn.Sigmoid()
    )

def train():
    # Training Logic
    opt = optim.SGD(params=model.parameters(),lr=0.1)
    for iter in range(20):

        # 1) erase previous gradients (if they exist)
        opt.zero_grad()

        # 2) make a prediction
        pred = model(data)

        # 3) calculate how much we missed
        loss = ((pred - target)**2).sum()

        # 4) figure out which weights caused us to miss
        loss.backward()

        # 5) change those weights
        opt.step()

        # 6) print our progress
        print(loss.data)
        
train()

tensor(4.1422)
tensor(4.0383)
tensor(3.9704)
tensor(3.9137)
tensor(3.8563)
tensor(3.7910)
tensor(3.7117)
tensor(3.6119)
tensor(3.4836)
tensor(3.3187)
tensor(3.1109)
tensor(2.8600)
tensor(2.5750)
tensor(2.2722)
tensor(1.9710)
tensor(1.6896)
tensor(1.4403)
tensor(1.2278)
tensor(1.0513)
tensor(0.9064)


In [2]:
import torch
from torch import nn
from torch import optim
import syft as sy
import time
hook = sy.TorchHook(torch)

# from torchviz import make_dot, make_dot_from_trace
from torch.autograd import Variable

# A Toy Dataset
x = torch.tensor([[0,0,0,0],[1,0,0,0],[0,1,0,0],[0,0,1,0],[1,1,0,0],[1,0,1,0],[0,1,1,0],[1,1,1,0],[0,0,0,1],[1,0,0,1],[0,1,0,1],[0,0,1,1],[1,1,0,1],[1,0,1,1],[0,1,1,1],[1,1,1,1.]])
y = torch.tensor([[0],[0],[0],[0],[0],[0],[0],[0],[1],[1],[1],[1],[1],[1],[1],[1.]])

#   Variables for performance metrics
start_time = time.time()
epochs = 300
lr = 0.2
counter = 0


# Define 2 chained models
models = [
    nn.Sequential(
        nn.Linear(4, 3),
        nn.Tanh()
    ),
    nn.Sequential(
        nn.Linear(3, 1),
        nn.Sigmoid()
    )
]

# Create optimisers for each segment and link to their segment
optimizers = [
    optim.SGD(params=model.parameters(),lr=lr)
    for model in models
]

# create some workers
alice = sy.VirtualWorker(hook, id="alice")
bob = sy.VirtualWorker(hook, id="bob")
workers = alice, bob

# Send Model Segments to starting locations
model_locations = [alice, bob]

for model, location in zip(models, model_locations):
    model.send(location)

# Create a remote copy of the dataset for each worker
datasets = [
    sy.BaseDataset(x.send(alice), y.send(bob))
    for worker in workers
]

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])








In [3]:
def forward(models, x):

    inputs = []
    outputs = []
    
    # First: provide x as input
    inputs.append(x)
    outputs.append(models[0](inputs[-1]))
    
    # Move a copy of the inputs from the previous layer to the layer in front
    inputs.append(outputs[-1].copy().move(bob))
    outputs.append(models[1](inputs[-1]))
    
#     print((model[1](inputs[-1])).get())
    
    
    return inputs, outputs

In [5]:
def backward(models, optimizers, inputs, outputs, dataset):
    data, targets = dataset.data, dataset.targets
        
    # Destroy pre-existing gradient of final layer
    optimizers[-1].zero_grad()
    loss = (((outputs[-1] - targets)**2).sum())
    loss.backward()
    # End layer sends the gradient of the activation signal back to the layer behind
    input_gradient = inputs[-1].grad.clone().move(alice)
    # End layer updates weights
    optimizers[-1].step()

    # Compute Final Layer, same but now input is the real input data
    optimizers[0].zero_grad()
    segment_output = outputs[0]
    # Dot join the gradient of the input to the layer in front to the output of this segment
#     intermediate_loss = torch.matmul(torch.t(segment_output), input_gradient).sum()
    intermediate_loss = ((segment_output.grad + (input_gradient*0.1))**2).sum()
    intermediate_loss.backward()
    optimizers[0].step()
        
    return outputs[-1], loss

In [6]:
for i in range(20):
    inputs, outputs = forward(models, datasets[0].data)
    prediction, loss = backward(models, optimizers, inputs, outputs, datasets[0])
    print(loss.get())

tensor(4.2158, requires_grad=True)
tensor(4.1433, requires_grad=True)
tensor(4.1673, requires_grad=True)
tensor(3.9964, requires_grad=True)
tensor(4.0480, requires_grad=True)
tensor(4.0211, requires_grad=True)
tensor(4.0349, requires_grad=True)
tensor(4.0162, requires_grad=True)
tensor(3.9814, requires_grad=True)
tensor(3.9557, requires_grad=True)
tensor(3.8668, requires_grad=True)
tensor(4.2269, requires_grad=True)
tensor(3.9221, requires_grad=True)
tensor(3.9806, requires_grad=True)
tensor(4.2295, requires_grad=True)
tensor(4.0474, requires_grad=True)
tensor(3.9399, requires_grad=True)
tensor(3.6308, requires_grad=True)
tensor(4.1025, requires_grad=True)
tensor(4.4105, requires_grad=True)


#### I am looking train this model while it is split across two hosts. I attempt to transfer loss backward for the backpropogation by sending the gradients to join to the output of the previous layer. This works but not as well as the other model which is centralised. I am wondering how i can improve this loss transfer formula.