<a href="https://colab.research.google.com/github/bloomingstars/Helpchain/blob/master/Copy_of_federated_learning.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
!pip install syft

Collecting syft
[?25l  Downloading https://files.pythonhosted.org/packages/ed/92/1d41de2cbb196dc315e083228ad41308107e7a298e7f547106daa756ee0c/syft-0.2.0a2-py3-none-any.whl (337kB)
[K     |████████████████████████████████| 337kB 2.7MB/s 
[?25hCollecting msgpack>=0.6.1
[?25l  Downloading https://files.pythonhosted.org/packages/3d/a8/e01fea81691749044a7bfd44536483a296d9c0a7ed4ec8810a229435547c/msgpack-0.6.2-cp36-cp36m-manylinux1_x86_64.whl (249kB)
[K     |████████████████████████████████| 256kB 77.0MB/s 
[?25hCollecting lz4>=2.1.6
[?25l  Downloading https://files.pythonhosted.org/packages/5d/5e/cedd32c203ce0303188b0c7ff8388bba3c33e4bf6da21ae789962c4fb2e7/lz4-2.2.1-cp36-cp36m-manylinux1_x86_64.whl (395kB)
[K     |████████████████████████████████| 399kB 76.7MB/s 
Collecting websockets>=7.0
[?25l  Downloading https://files.pythonhosted.org/packages/cf/cb/c35513c4a0ff24ca13e33f7336ba8c1a864449fad9fea8e37abdad11c38d/websockets-8.1-cp36-cp36m-manylinux1_x86_64.whl (73kB)
[K     |█████

In [0]:
import torch

In [0]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
import syft as sy 
import random 

class Arguments():
    def __init__(self):
        self.batch_size = 128
        self.test_batch_size = 1000
        self.epochs = 1
        self.lr = 0.01
        self.momentum = 0.5
        self.no_cuda = True
        self.seed = 200316905 ## TODO change seed to your studentID inside the class Arguments (line 17)
        self.log_interval = 30
        self.save_model = False

class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 20, 5, 1)
        self.conv2 = nn.Conv2d(20, 50, 5, 1)
        self.fc1 = nn.Linear(4*4*50, 500)
        self.fc2 = nn.Linear(500, 10)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = F.max_pool2d(x, 2, 2)
        x = F.relu(self.conv2(x))
        x = F.max_pool2d(x, 2, 2)
        x = x.view(-1, 4*4*50)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return F.log_softmax(x, dim=1)


def train(args, model, device, federated_train_loader, optimizer, epoch, participates):
    model.train()  # <-- initial training
    for batch_idx, (data, target) in enumerate(federated_train_loader): # <-- now it is a distributed dataset
        if target.location.id in participates:
            model.send(data.location) # <-- NEW: send the model to the right location
            data, target = data.to(device), target.to(device)
            optimizer.zero_grad()
            output = model(data)
            loss = F.nll_loss(output, target)
            loss.backward()
            optimizer.step()
            model.get() # <-- NEW: get the model back
            if batch_idx % args.log_interval == 0:
                loss = loss.get() # <-- NEW: get the loss back
                print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                    epoch, batch_idx * args.batch_size, len(federated_train_loader) * args.batch_size,
                    100. * batch_idx / len(federated_train_loader), loss.item()))


            
def test(args, model, device, test_loader):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += F.nll_loss(output, target, reduction='sum').item() # sum up batch loss
            pred = output.argmax(1, keepdim=True) # get the index of the max log-probability 
            correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(test_loader.dataset)

    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))


### main function

args = Arguments()
use_cuda = not args.no_cuda and torch.cuda.is_available()
torch.manual_seed(args.seed) 
device = torch.device("cuda" if use_cuda else "cpu")
kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {}

hook = sy.TorchHook(torch)  # <-- NEW: hook PyTorch ie add extra functionalities to support Federated Learning

node1 = sy.VirtualWorker(hook, id="node1")
node2 = sy.VirtualWorker(hook, id="node2")
node3 = sy.VirtualWorker(hook, id="node3")
node4 = sy.VirtualWorker(hook, id="node4")
node5 = sy.VirtualWorker(hook, id="node5")
node6 = sy.VirtualWorker(hook, id="node6")
node7 = sy.VirtualWorker(hook, id="node7")
node8 = sy.VirtualWorker(hook, id="node8")
node9 = sy.VirtualWorker(hook, id="node9")
node10 = sy.VirtualWorker(hook, id="node10")

##-------------------------------------------

## distribute data across nodes
federated_train_loader = sy.FederatedDataLoader( # <-- this is now a FederatedDataLoader 
    datasets.MNIST('./data', train=True, download=True,
                   transform=transforms.Compose([
                       transforms.ToTensor(),
                       transforms.Normalize((0.1307,), (0.3081,))
                   ]))
    .federate((node1,node2,node3,node4,node5,node6,node7,node8,node9,node10)), 
    batch_size=args.batch_size, shuffle=True, **kwargs)

## test dataset is always same at the central server
test_loader = torch.utils.data.DataLoader(
    datasets.MNIST('./data', train=False, transform=transforms.Compose([
                       transforms.ToTensor(),
                       transforms.Normalize((0.1307,), (0.3081,))
                   ])),
    batch_size=args.test_batch_size, shuffle=True, **kwargs)

## training models in a federated appraoch
model = Net().to(device)
optimizer = optim.SGD(model.parameters(), lr=args.lr) 

## TODO: select a random set of node ids that will be passed to the training function; these nodes will particiapte in the federated learning
full_node_list=['node1','node2','node3','node4','node5','node6','node7','node8','node9','node10']

#for varying k
for k_val in [3,5,7,10]:
    select_nodes=random.sample(full_node_list,k=k_val) #change this
    train(args, model, device, federated_train_loader, optimizer, 3,select_nodes ) ## TODO: pass the node_id list like ['node1','node2' ...]
    test(args, model, device, test_loader)

for n_val in [3,5,10]:
    select_nodes=random.sample(full_node_list,k=5) #change this
    train(args, model, device, federated_train_loader, optimizer, n_val,select_nodes ) ## TODO: pass the node_id list like ['node1','node2' ...]
    test(args, model, device, test_loader)
#create node_list 

##-------------------------------------------

for epoch in range(1, args.epochs + 1):
    train(args, model, device, federated_train_loader, optimizer, 3,select_nodes ) ## TODO: pass the node_id list like ['node1','node2' ...]
    test(args, model, device, test_loader)

if (args.save_model):
    torch.save(model.state_dict(), "mnist_cnn.pt")








0it [00:00, ?it/s]

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ./data/MNIST/raw/train-images-idx3-ubyte.gz


9920512it [00:02, 3975028.77it/s]                             


Extracting ./data/MNIST/raw/train-images-idx3-ubyte.gz to ./data/MNIST/raw


0it [00:00, ?it/s]

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ./data/MNIST/raw/train-labels-idx1-ubyte.gz


32768it [00:00, 57217.74it/s]                           
0it [00:00, ?it/s]

Extracting ./data/MNIST/raw/train-labels-idx1-ubyte.gz to ./data/MNIST/raw
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw/t10k-images-idx3-ubyte.gz


1654784it [00:01, 944710.38it/s]                             
0it [00:00, ?it/s]

Extracting ./data/MNIST/raw/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz


8192it [00:00, 21713.96it/s]            


Extracting ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw
Processing...
Done!

Test set: Average loss: 0.5875, Accuracy: 8410/10000 (84%)


Test set: Average loss: 0.2811, Accuracy: 9157/10000 (92%)


Test set: Average loss: 0.1832, Accuracy: 9453/10000 (95%)


Test set: Average loss: 0.1288, Accuracy: 9628/10000 (96%)


Test set: Average loss: 0.1184, Accuracy: 9658/10000 (97%)


Test set: Average loss: 0.0963, Accuracy: 9712/10000 (97%)


Test set: Average loss: 0.0866, Accuracy: 9748/10000 (97%)


Test set: Average loss: 0.0842, Accuracy: 9751/10000 (98%)

