In [None]:
pip install syft

In [None]:
pip install matplotlib
#pip install --upgrade tensorflow

In [None]:
pip install jupyter-tensorboard

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
import syft as sy
from syft.frameworks.torch.fl import utils
import matplotlib.pyplot as plt 
from collections import Counter 
#from torch.utils.tensorboard import SummaryWriter

In [None]:
#import wandb
#wandb.init()

In [2]:
class Parser:
    def __init__(self):
        self.epochs = 20
        self.lr = 0.001
        self.test_batch_size = 1000
        self.batch_size = 64
        self.log_interval = 10
        self.seed = 1
    
args = Parser()
torch.manual_seed(args.seed)

<torch._C.Generator at 0x12173e2f0>

In [3]:
hook = sy.TorchHook(torch)  
bob = sy.VirtualWorker(hook, id="bob")  
alice = sy.VirtualWorker(hook, id="alice")  
nodes=[bob,alice]

In [None]:
train_loader = torch.utils.data.DataLoader(datasets.MNIST('../data', train=True, download=False,
                   transform=transforms.Compose([transforms.ToTensor(),transforms.Normalize((0.1307,), (0.3081,))]))
    #.federate((bob, alice)), # <-- NEW: we distribute the dataset across all the workers, it's now a FederatedDataset
    ,batch_size=args.batch_size, shuffle=True)

test_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../data', train=False, transform=transforms.Compose([
                       transforms.ToTensor(),
                       transforms.Normalize((0.1307,), (0.3081,))
                   ])),
    batch_size=args.test_batch_size, shuffle=True)

#sy.FederatedDataLoader- when we were using federate 
#note one (x,y) is : number of x's in one are based on batch size - same way for y

In [None]:
len(train_loader.dataset)
len(train_loader)

In [None]:
remote_dataset = (list(), list())
for batch_idx, (data,target) in enumerate(train_loader):
    data = data.send(nodes[batch_idx % len(nodes)])# can send using .federate directly and using FederatedDataLoader but that was giving issues so sending manually
    target = target.send(nodes[batch_idx % len(nodes)])
    remote_dataset[batch_idx % len(nodes)].append((data, target))

In [None]:
len(remote_dataset[0]) #numbr of batches with Bob and same for Alice
#len(remote_dataset[0][468])
#x,y=remote_dataset[0][0]
#x

In [None]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, 3, 1)
        self.conv2 = nn.Conv2d(32, 64, 3, 1)
        self.dropout1 = nn.Dropout2d(0.25)
        self.dropout2 = nn.Dropout2d(0.5)
        self.fc1 = nn.Linear(9216, 128)
        self.fc2 = nn.Linear(128, 10)

    def forward(self, x):
        x = self.conv1(x)
        x = F.relu(x)
        x = self.conv2(x)
        x = F.relu(x)
        x = F.max_pool2d(x, 2)
        x = self.dropout1(x)
        x = torch.flatten(x, 1)
        x = self.fc1(x)
        x = F.relu(x)
        x = self.dropout2(x)
        x = self.fc2(x)
        output = F.log_softmax(x, dim=1)
        return output

In [None]:
bobs_model = Net()
alices_model = Net()
bobs_optimizer = optim.Adam(bobs_model.parameters(), lr=args.lr)
alices_optimizer = optim.Adam(alices_model.parameters(), lr=args.lr)

models = [bobs_model, alices_model]
optimizers = [bobs_optimizer, alices_optimizer]

In [None]:
global_model = Net()
print(global_model)


#weights=global_model.fc2.weight.data  #gives last layer weights before softmax output
#print(weights)
#plt.plot(weights)
#plt.show()
#w = list(global_model.parameters()) #gives all parameters(weights of all)
#print(w)

In [None]:
def plot_weights(model):
    model_weights=model.fc2.weight
    #print(model_weights)
    list_weights=model_weights.tolist()
    weights = [item for sublist in list_weights for item in sublist]#flattened list
    plt.hist(weights,15)
    plt.show()
        
def update(x, y, model, optimizer):
    model.send(x.location)#sending to correct location
    optimizer.zero_grad()
    prediction = model(x)
    loss = F.nll_loss(prediction, y)
    loss.backward()
    optimizer.step()
    return model

def train(count):
    for data_index in range(count,40+count): #training with 40 batches per epoch
        for worker_index in range(len(nodes)):
            x, y = remote_dataset[worker_index][data_index]# 2 datasets (one w bob and other w alice)
            models[worker_index] = update(x, y, models[worker_index], optimizers[worker_index])
        for model in models:
            model.get()
    
    plot_weights(models[0])
    plot_weights(models[1])
    return utils.federated_avg({
    "bob": models[0],
    "alice": models[1]# securely aggregate both models and return global model
    })

In [None]:
#trained_weights_bob=models[0].fc2.weight.data  #last layer weights bob
#plt.plot(trained_weights_bob)
#plt.show()
train()

In [None]:
def test(federated_model):
    federated_model.eval()
    test_loss = 0
    correct=0
    for x, y in test_loader:
        output = federated_model(x)
        test_loss += F.nll_loss(output, y, reduction='sum').item() # sum up batch loss
        pred = output.argmax(1, keepdim=True) # get the index of the max log-probability 
        #print(y.view_as(pred))
        #print(pred)
        correct += pred.eq(y.view_as(pred)).sum().item()
        #print(correct)
        
    test_loss /= len(test_loader.dataset)#this is total number of images in test data, whereas test_loader is z images grouped together depending on batch size,
    #so if batch size is 10 then 10 x's and 10 corresponding y's will go together thus y vector is like that
    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format
          (test_loss, correct, len(test_loader.dataset),100. * correct / len(test_loader.dataset)))
    #print('Test set: Average loss: {:.4f}'.format(test_loss))
        

In [None]:
#training process
i=0
for epoch in range(args.epochs):
    print(f"Epoch Number {epoch + 1}")
    federated_model = train(i)
    global_model = federated_model
    i+=40


In [None]:
#test
test(global_model)