<a href="https://colab.research.google.com/github/inspire-lab/SecurePrivateAI/blob/master/8_fl_and_sl.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Privacy Preserving Machine Learning

First things first. Let's run the package installations. They take quite a while.

Add the end of the installation you need to hit the restart button. 

# installation

Due to the version conflict, you may stop this session and goes to

<a href="https://colab.research.google.com/github/inspire-lab/SecurePrivateAI/blob/master/9_sl.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Otherwise, you need to restart the runtime after the installation.

In [None]:
# first install it
# and it's conflict with tensorflow-federeated==0.19.0
# You need to restart the runtime

!pip install syft==0.2.5

# Split Learning and Pysyft

Split learning considers from another perspective

![pipeline](https://github.com/inspire-lab/SecurePrivateAI/raw/main/images/SL.png)


In [None]:
# you need to import the PyTorch library first
# otherwise, it's easy to raise an error.
import torch
from torchvision import datasets, transforms
from torch import nn, optim

In [None]:
import torch
import syft as sy

# allow pysyft to work its magic on torch tensors
hook = sy.TorchHook(torch)

# create a virtual worker. in an actual setting this would be on a different machine
client = sy.VirtualWorker( hook, id='client' )

# define a tensor and send it to the client
x = torch.tensor([1,2,3,4,5])
# this leaves us with a pointer to the tensor
x_pointer = x.send( client )

# check out some meta data
print( x_pointer )
print( client._objects )

# we can use this pointers like normal tensors
result = x_pointer + x_pointer
print( result )

# if we want the result we can call get() to send the tensor back to us
result_local = result.get()
# once we call get() it removes the tensor from the other side and our pointer
# becomes invalid
print( result_local )
print( client._objects )
# print( result )

In [None]:
import torch
from torchvision import datasets, transforms
from torch import nn, optim
import syft as sy
hook = sy.TorchHook(torch)

epochs = 5

# Data preprocessing
transform = transforms.Compose([transforms.ToTensor(),
                              transforms.Normalize((0.5,), (0.5,)),
                              ])
train_set = datasets.MNIST('mnist', download=True, train=True, transform=transform)
train_loader = torch.utils.data.DataLoader(train_set, batch_size=64, shuffle=True)

torch.manual_seed(0)

# Define our model segments

input_size = 784
hidden_sizes = [128, 640]
output_size = 10

models = [
    nn.Sequential(
                nn.Linear(input_size, hidden_sizes[0]),
                nn.ReLU(),
                nn.Linear(hidden_sizes[0], hidden_sizes[1]),
                nn.ReLU(),
    ),
    nn.Sequential(
                nn.Linear(hidden_sizes[1], output_size),
                nn.LogSoftmax(dim=1)
    )
]

# Create optimisers for each segment and link to their segment
optimizers = [
    optim.SGD(model.parameters(), lr=0.03,)
    for model in models
]

# create some workers
alice = sy.VirtualWorker(hook, id="alice")
bob = sy.VirtualWorker(hook, id="bob")
workers = alice, bob

# Send Model Segments to starting locations
model_locations = [alice, bob]

for model, location in zip(models, model_locations):
    model.send(location)

def train(x, target, models, optimizers):
    # Training Logic

    #1) erase previous gradients (if they exist)
    for opt in optimizers:
        opt.zero_grad()

    #2) make a prediction
    a = models[0](x)

    #3) break the computation graph link, and send the activation signal to the next model
    remote_a = a.move(models[1].location, requires_grad=True)

    #4) make prediction on next model using received signal
    pred = models[1](remote_a)

    #5) calculate how much we missed
    criterion = nn.NLLLoss()
    loss = criterion(pred, target)

    #6) figure out which weights caused us to miss
    loss.backward()

    # 7) send gradient of the received activation signal to the model behind
    # grad_a = remote_a.grad.copy().move(models[0].location)

    # 8) backpropagate on bottom model given this gradient
    # a.backward(grad_a)

    #9) change the weights
    for opt in optimizers:
        opt.step()

    #10) print our progress
    return loss.detach().get()

for i in range(epochs):
    running_loss = 0
    for images, labels in train_loader:
        images = images.send(alice)
        images = images.view(images.shape[0], -1)
        labels = labels.send(bob)

        loss = train(images, labels, models, optimizers)
        running_loss += loss

    else:
        print("Epoch {} - Training loss: {}".format(i, running_loss/len(train_loader)))


def test(x, target, models, optimizers):
    # Training Logic

    #1) erase previous gradients (if they exist)
    for opt in optimizers:
        opt.zero_grad()

    #2) make a prediction with 4 parties
    a = models[0](x)
    remote_a = a.move(models[1].location, requires_grad=True)

    pred = models[1](remote_a)

    # 3) print our progress
    return pred.detach().get()

num_correct = 0
total = 0
test_set = datasets.MNIST('mnist', download=True, train=False, transform=transform)
test_loader = torch.utils.data.DataLoader(test_set, batch_size=64, shuffle=True)

for images, labels in test_loader:
    images = images.send(alice)
    images = images.view(images.shape[0], -1)
    predictions = test(images, labels, models, optimizers)
    print(predictions.shape)
    print(labels.shape)
    num_correct += (predictions.max(dim=1)[1] == labels).sum()
    total += labels.size(0)
    print(f"Test Accuracy of the model: {float(num_correct)/float(total)*100:.2f}")

More references:

https://learnopencv.com/federated-learning-using-pytorch-and-pysyft/

https://github.com/OpenMined/PySyft/tree/syft_0.2.x/examples/tutorials/advanced/split_neural_network