In [6]:
import torch
from torch import nn
from torch import optim

# A Toy Dataset
data = torch.tensor([[0,0,0,0],[1,0,0,0],[0,1,0,0],[0,0,1,0],[1,1,0,0],[1,0,1,0],[0,1,1,0],[1,1,1,0],[0,0,0,1],[1,0,0,1],[0,1,0,1],[0,0,1,1],[1,1,0,1],[1,0,1,1],[0,1,1,1],[1,1,1,1.]])
target = torch.tensor([[0],[0],[0],[0],[0],[0],[0],[0],[1],[1],[1],[1],[1],[1],[1],[1.]])


# A Toy Model
model = nn.Sequential(
        nn.Linear(4, 3),
        nn.Tanh(),
        nn.Linear(3, 1),
        nn.Sigmoid()
    )

def train():
    # Training Logic
    opt = optim.SGD(params=model.parameters(),lr=0.1)
    for iter in range(20):

        # 1) erase previous gradients (if they exist)
        opt.zero_grad()

        # 2) make a prediction
        pred = model(data)

        # 3) calculate how much we missed
        loss = ((pred - target)**2).sum()

        # 4) figure out which weights caused us to miss
        loss.backward()

        # 5) change those weights
        opt.step()

        # 6) print our progress
        print(loss.data)
        
train()

tensor(4.8574)
tensor(4.4600)
tensor(4.2028)
tensor(4.0372)
tensor(3.9175)
tensor(3.8152)
tensor(3.7132)
tensor(3.5996)
tensor(3.4641)
tensor(3.2974)
tensor(3.0917)
tensor(2.8441)
tensor(2.5604)
tensor(2.2561)
tensor(1.9520)
tensor(1.6680)
tensor(1.4170)
tensor(1.2042)
tensor(1.0281)
tensor(0.8844)


In [1]:

import syft as sy
import time
hook = sy.TorchHook(torch)

# from torchviz import make_dot, make_dot_from_trace
from torch.autograd import Variable

# A Toy Dataset
x = torch.tensor([[0,0,0,0],[1,0,0,0],[0,1,0,0],[0,0,1,0],[1,1,0,0],[1,0,1,0],[0,1,1,0],[1,1,1,0],[0,0,0,1],[1,0,0,1],[0,1,0,1],[0,0,1,1],[1,1,0,1],[1,0,1,1],[0,1,1,1],[1,1,1,1.]])
y = torch.tensor([[0],[0],[0],[0],[0],[0],[0],[0],[1],[1],[1],[1],[1],[1],[1],[1.]])

#   Variables for performance metrics
start_time = time.time()
epochs = 300
lr = 0.2
counter = 0


# Define 2 chained models
models = [
    nn.Sequential(
        nn.Linear(4, 3),
        nn.Tanh()
    ),
    nn.Sequential(
        nn.Linear(3, 1),
        nn.Sigmoid()
    )
]

# Create optimisers for each segment and link to their segment
optimizers = [
    optim.SGD(params=model.parameters(),lr=lr)
    for model in models
]

# create some workers
alice = sy.VirtualWorker(hook, id="alice")
bob = sy.VirtualWorker(hook, id="bob")
workers = alice, bob

# Send Model Segments to starting locations
model_locations = [alice, bob]

for model, location in zip(models, model_locations):
    model.send(location)

# Create a remote copy of the dataset for each worker
datasets = [
    sy.BaseDataset(x.send(alice), y.send(bob))
    for worker in workers
]

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])








In [2]:
def forward(models, x):

    inputs = []
    outputs = []
    
    # First: provide x as input
    inputs.append(x)
    outputs.append(models[0](inputs[-1]))
    
    # MOve a copy of the inputs from the previous layer to the layer in front
    inputs.append(outputs[-1].copy().move(bob))
    outputs.append(model[1](inputs[-1]))
    
    return inputs, outputs

In [3]:
def backward(models, optimizers, inputs, outputs, dataset):
    data, targets = dataset.data, dataset.targets
        
    # Destroy pre-existing gradient of final layer
    optimizers[-1].zero_grad()
    loss = (((outputs[-1] - targets)**2).sum())
    loss.backward()
    # End layer sends the gradient of the activation signal back to the layer behind
    input_gradient = segment_inputs[-1].grad.clone().move(alice)
    # End layer updates weights
    optimizers[1].step()

    # Compute Final Layer, same but now input is the real input data
    optimizers[0].zero_grad()
    segment_output = segment_outputs[0]
    # Dot join the gradient of the input to the layer in front to the output of this segment
    intermediate_loss = torch.matmul(torch.t(segment_output), input_gradient).sum()
    intermediate_loss.backward()
    optimizers[0].step()
        
    return segment_outputs[-1], loss

In [5]:
for i in range(20):
    inputs, outputs = forward(models, datasets[0].data)
    prediction, loss = backward(models, optimizers, inputs, outputs, datasets[0])
    print(loss.get())

tensor(12.7654, requires_grad=True)
tensor(8.9069, requires_grad=True)
tensor(7.8442, requires_grad=True)
tensor(8.5770, requires_grad=True)
tensor(6.9032, requires_grad=True)
tensor(5.5687, requires_grad=True)
tensor(4.1533, requires_grad=True)
tensor(3.9864, requires_grad=True)
tensor(3.9412, requires_grad=True)
tensor(3.9043, requires_grad=True)
tensor(3.8731, requires_grad=True)
tensor(3.8464, requires_grad=True)
tensor(3.8232, requires_grad=True)
tensor(3.8029, requires_grad=True)
tensor(3.7849, requires_grad=True)
tensor(3.7688, requires_grad=True)
tensor(3.7544, requires_grad=True)
tensor(3.7414, requires_grad=True)
tensor(3.7295, requires_grad=True)
tensor(3.7187, requires_grad=True)


#### I am looking train this model while it is split across two hosts. I attempt to transfer loss backward for the backpropogation by sending 