<a href="https://colab.research.google.com/github/XinyiYS/FairAndPrivateFederatedLearning/blob/master/Federated_Dataset_with_Shapley.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Federated Dataset with Shapley.ipynb


In [0]:
!pip install 'syft[udacity]'

Collecting tensorflow<2,>=1.12.0
[?25l  Downloading https://files.pythonhosted.org/packages/9a/d9/fd234c7bf68638423fb8e7f44af7fcfce3bcaf416b51e6d902391e47ec43/tensorflow-1.15.2-cp36-cp36m-manylinux2010_x86_64.whl (110.5MB)
[K     |████████████████████████████████| 110.5MB 56kB/s 
Collecting tensorboard<1.16.0,>=1.15.0
[?25l  Downloading https://files.pythonhosted.org/packages/1e/e9/d3d747a97f7188f48aa5eda486907f3b345cd409f0a0850468ba867db246/tensorboard-1.15.0-py3-none-any.whl (3.8MB)
[K     |████████████████████████████████| 3.8MB 29.4MB/s 
Collecting tensorflow-estimator==1.15.1
[?25l  Downloading https://files.pythonhosted.org/packages/de/62/2ee9cd74c9fa2fa450877847ba560b260f5d0fb70ee0595203082dafcc9d/tensorflow_estimator-1.15.1-py2.py3-none-any.whl (503kB)
[K     |████████████████████████████████| 512kB 59.2MB/s 
Installing collected packages: tensorboard, tensorflow-estimator, tensorflow
  Found existing installation: tensorboard 2.2.0
    Uninstalling tensorboard-2.2.0:
   

In [0]:
%tensorflow_version 1.x

TensorFlow 1.x selected.


In [0]:
import random
from itertools import permutations
import copy
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms

import syft as sy  # <-- NEW: import the Pysyft library


Falling back to insecure randomness since the required custom op could not be found for the installed version of TensorFlow. Fix this by compiling custom ops. Missing file was '/usr/local/lib/python3.6/dist-packages/tf_encrypted/operations/secure_random/secure_random_module_tf_1.15.2.so'





In [0]:
class Arguments():
    def __init__(self):
        self.batch_size = 64 #@param
        self.test_batch_size = 1000 #@param
        self.epochs = epochs
        self.lr = 0.01 #@param
        self.momentum = 0.5
        self.no_cuda = False
        self.seed = 1
        self.log_interval = 150 #@param
        self.save_model = False

epochs = 15 #@param

args = Arguments()

use_cuda = not args.no_cuda and torch.cuda.is_available()

torch.manual_seed(args.seed)

device = torch.device("cuda" if use_cuda else "cpu")

kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {}


hook = sy.TorchHook(torch)  # <-- NEW: hook PyTorch ie add extra functionalities to support Federated Learning

num_workers = 3#@param

workers = [ sy.VirtualWorker(hook, id=str(i)) for i in range(num_workers) ]
workerIds = [worker.id for worker in workers]

# bob = sy.VirtualWorker(hook, id="bob")  # <-- NEW: define remote worker bob
# alice = sy.VirtualWorker(hook, id="alice")  # <-- NEW: and alice



In [0]:
federated_train_loader = sy.FederatedDataLoader( # <-- this is now a FederatedDataLoader 
    datasets.MNIST('../data', train=True, download=True,
                   transform=transforms.Compose([
                       transforms.ToTensor(),
                       transforms.Normalize((0.1307,), (0.3081,))
                   ]))
    .federate(workers), # <-- NEW: we distribute the dataset across all the workers, it's now a FederatedDataset
    # .federate((bob, alice)), # <-- NEW: we distribute the dataset across all the workers, it's now a FederatedDataset
    batch_size=args.batch_size, shuffle=True, **kwargs)

test_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../data', train=False, transform=transforms.Compose([
                       transforms.ToTensor(),
                       transforms.Normalize((0.1307,), (0.3081,))
                   ])),
    batch_size=args.test_batch_size, shuffle=True, **kwargs)


from collections import defaultdict
worker_counts = defaultdict(int)
worker_data_loader = defaultdict(list)
count = 0
for batch_idx, (data, target) in enumerate(federated_train_loader): # <-- now it is a distributed dataset
    count += 1
    worker_counts[data.location.id] += 1
    worker_data_loader[data.location.id].append((data, target))



In [0]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 20, 5, 1)
        self.conv2 = nn.Conv2d(20, 50, 5, 1)
        self.fc1 = nn.Linear(4*4*50, 500)
        self.fc2 = nn.Linear(500, 10)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = F.max_pool2d(x, 2, 2)
        x = F.relu(self.conv2(x))
        x = F.max_pool2d(x, 2, 2)
        x = x.view(-1, 4*4*50)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return F.log_softmax(x, dim=1)

In [0]:
def train(args, model, device, federated_train_loader, optimizer, epoch):
    model.train()
    for batch_idx, (data, target) in enumerate(federated_train_loader): # <-- now it is a distributed dataset
        model.send(data.location) # <-- NEW: send the model to the right location
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = F.nll_loss(output, target)
        loss.backward()
        optimizer.step()
        model.get() # <-- NEW: get the model back
        if batch_idx % args.log_interval == 0:
            loss = loss.get() # <-- NEW: get the loss back
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * args.batch_size, len(federated_train_loader) * args.batch_size,
                100. * batch_idx / len(federated_train_loader), loss.item()))

def test(args, model, device, test_loader, verbose=True):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += F.nll_loss(output, target, reduction='sum').item() # sum up batch loss
            pred = output.argmax(1, keepdim=True) # get the index of the max log-probability 
            correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(test_loader.dataset)

    if verbose:
        print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
            test_loss, correct, len(test_loader.dataset),
            100. * correct / len(test_loader.dataset)))
    test_acc = 1.* correct / len(test_loader.dataset)
    return test_acc

def averge_parameters(redundant_models):
    final_model = Net().to(device)
    for i, redundant_model in enumerate(redundant_models):
        for param_final, param_redundant in zip(final_model.parameters(), redundant_model.parameters()):
            if i == 0:
                param_final.data = param_redundant.data * 1./ len(redundant_models)
            else:
                param_final.data += param_redundant.data * 1./ len(redundant_models)
    return final_model

def train_shapley(args, model, device, worker_data_loader, optimizer, epoch, past_contributions):
    
    all_sequences = list(permutations(workerIds))
    random.shuffle(all_sequences)
    sequence_contribution_dict = {}
    test_acc_prev_epoch = test(args, model, device, test_loader, verbose=False)
    sequence_contribution_dict['-1'] = test_acc_prev_epoch

    model.train()
    
    # need to deep clone the model before starting the optimizer step and so on
    # in principle, there should be M different models/different sets of gradient updates after one epoch
    # M being the number of sequences tried
    model_prev_epoch = Net().to(device)
    model_prev_epoch.load_state_dict(model.state_dict())


    # another way of measuring contribution:
    # compute a global gradient update history (a up-to-date vector)
    # compare the cosine similarity between each individual worker with the global
    

    # for optimization: for each worker, no longer goes through the entire load: 1. random sampling or 2. organized iteration

    redundant_models = []
    marginal_contributions = torch.tensor([0.0 for i  in workerIds])
    for sequence in all_sequences:

        curr_contributiuons = []
        for workerId in sequence:

            for data, target in worker_data_loader[workerId]:
                model.send(data.location) # <-- NEW: send the model to the right location
                data, target = data.to(device), target.to(device)
                optimizer.zero_grad()
                output = model(data)
                loss = F.nll_loss(output, target)
                loss.backward()
                optimizer.step()

                model.get() # <-- NEW: get the model back
            
            test_acc = test(args, model, device, test_loader, verbose=False)
            contribution = test_acc
            if curr_contributiuons:
                marginal_contributions[int(workerId)] += contribution - curr_contributiuons[-1]
            else:
                marginal_contributions[int(workerId)] += contribution - sequence_contribution_dict['-1']

            curr_contributiuons.append(contribution)
        assert len(curr_contributiuons) == len(sequence), "Current contributions not equal to num of workers"    
        sequence_contribution_dict['_'.join(sequence)] = curr_contributiuons

        redundant_model = Net().to(device)
        redundant_model.load_state_dict(model.state_dict())
        redundant_models.append(redundant_model)

        model.load_state_dict(model_prev_epoch.state_dict())

    num_sequences = len(all_sequences)
    past_contributions += marginal_contributions/ num_sequences
    print("Marginal contributions this epoch:", marginal_contributions/ num_sequences)

    final_model = averge_parameters(redundant_models)
    model.load_state_dict(final_model.state_dict())
    del final_model
    del redundant_models

    return past_contributions


In [0]:
# try randomly sampling from all the possible sequences
# and compute an approximation to the Shapley values
# for each sequence, there is a contribution value for all workers involved
# and average out all the contribution values for a single worker, across all the sampled sequence to compute this iteration's Shapley Value

workerIds = [worker.id for worker in workers]

model = Net().to(device)
optimizer = optim.SGD(model.parameters(), lr=args.lr) # TODO momentum is not supported at the moment
past_contributions = torch.tensor([0.0 for i  in workerIds])

for epoch in range(1, args.epochs + 1):
    past_contributions = train_shapley(args, model, device, worker_data_loader, optimizer, epoch, past_contributions)
    test(args, model, device, test_loader)
    print(past_contributions)

if (args.save_model):
    torch.save(model.state_dict(), "mnist_cnn.pt")

Marginal contributions this epoch: tensor([0.2584, 0.3046, 0.2923])

Test set: Average loss: 0.1528, Accuracy: 9567/10000 (96%)

tensor([0.2584, 0.3046, 0.2923])
Marginal contributions this epoch: tensor([0.0052, 0.0020, 0.0067])

Test set: Average loss: 0.0873, Accuracy: 9739/10000 (97%)

tensor([0.2636, 0.3067, 0.2990])
Marginal contributions this epoch: tensor([ 0.0046, -0.0022,  0.0006])

Test set: Average loss: 0.0657, Accuracy: 9802/10000 (98%)

tensor([0.2682, 0.3044, 0.2996])
Marginal contributions this epoch: tensor([ 0.0031, -0.0036, -0.0003])

Test set: Average loss: 0.0550, Accuracy: 9827/10000 (98%)

tensor([0.2714, 0.3008, 0.2993])


Traceback (most recent call last):
Exception ignored in: <bound method _MultiProcessingDataLoaderIter.__del__ of <torch.utils.data.dataloader._MultiProcessingDataLoaderIter object at 0x7f9be7e05860>>
Traceback (most recent call last):
  File "/usr/local/lib/python3.6/dist-packages/torch/utils/data/dataloader.py", line 961, in __del__
    self._shutdown_workers()
  File "/usr/lib/python3.6/multiprocessing/queues.py", line 240, in _feed
    send_bytes(obj)
  File "/usr/local/lib/python3.6/dist-packages/torch/utils/data/dataloader.py", line 930, in _shutdown_workers
    self._worker_result_queue.close()
  File "/usr/lib/python3.6/multiprocessing/queues.py", line 134, in close
  File "/usr/lib/python3.6/multiprocessing/connection.py", line 200, in send_bytes
    self._send_bytes(m[offset:offset + size])
    self._reader.close()
  File "/usr/lib/python3.6/multiprocessing/connection.py", line 177, in close
    self._close()
  File "/usr/lib/python3.6/multiprocessing/connection.py", line 404,

In [0]:
past_contributions