# Federated diabetes predictor using PIMA Indian diabetes dataset 

#### In this code ,I have used PyTorch and PySyft to train a Deep Learning neural network using federated approach.

In [1]:
# import necessary libraries
import pickle
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader
import time
import copy
import numpy as np
import syft as sy
from syft.frameworks.torch.fl import utils
from syft.workers.websocket_client import WebsocketClientWorker
import pandas as pd
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt

In [2]:
#Initialising the training parameters

class Parser:
    def __init__(self):
        self.epochs = 100
        self.lr = 0.001
        self.test_batch_size = 8
        self.batch_size = 8
        self.log_interval = 10
        self.seed = 1
    
args = Parser()
torch.manual_seed(args.seed)

<torch._C.Generator at 0x1bf9225ff70>

In [3]:
# Loading the dataset and getting it ready 

col_Names=["Pregnancies", "Glucose", "BloodPressure", "SkinThickness","Insulin","BMI","DiabetesPedigreeFunction","Age","Outcome"]
df= pd.read_csv("pimadiabetes.csv",names=col_Names)

In [4]:
df.head()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1


In [5]:
# adding a label in the end for clear distinction
df['OutcomeCat'] = np.where(df.Outcome == 1,"Diabetic","Not Diabetic")

In [6]:
df.head()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome,OutcomeCat
0,6,148,72,35,0,33.6,0.627,50,1,Diabetic
1,1,85,66,29,0,26.6,0.351,31,0,Not Diabetic
2,8,183,64,0,0,23.3,0.672,32,1,Diabetic
3,1,89,66,23,94,28.1,0.167,21,0,Not Diabetic
4,0,137,40,35,168,43.1,2.288,33,1,Diabetic


In [7]:
#Splitting the data into test and train sets.

X_train, X_test, y_train, y_test = train_test_split(df.drop(['Outcome','OutcomeCat'],axis=1),df.Outcome,test_size  = 0.2, random_state=0)

In [8]:
x = torch.from_numpy(X_train.to_numpy()).float()
y = torch.from_numpy(y_train.to_numpy()).float()
x_test = torch.from_numpy(X_test.to_numpy()).float()
y_test = torch.from_numpy(y_test.to_numpy()).float()

In [9]:
train = TensorDataset(x, y)
test = TensorDataset(x_test, y_test)
train_loader = DataLoader(train, batch_size=args.batch_size, shuffle=True)
test_loader = DataLoader(test, batch_size=args.test_batch_size, shuffle=True)

In [19]:
# Neural network architecture
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(8, 32)
        self.fc2 = nn.Linear(32, 24)
        self.fc4 = nn.Linear(24, 16)
        self.fc3 = nn.Linear(16, 1)

    def forward(self, x):
        x = x.view(-1, 8)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = F.relu(self.fc4(x))
        x = self.fc3(x)
        return x

In [20]:
# creating virtual workers

hook = sy.TorchHook(torch)
bob_worker = sy.VirtualWorker(hook, id="bob")
alice_worker = sy.VirtualWorker(hook, id="alice")

compute_nodes = [bob_worker, alice_worker]




In [21]:
# send data to virtual workers
remote_dataset = (list(), list())
train_distributed_dataset = []

for batch_idx, (data,target) in enumerate(train_loader):
    data = data.send(compute_nodes[batch_idx % len(compute_nodes)])
    target = target.send(compute_nodes[batch_idx % len(compute_nodes)])
    remote_dataset[batch_idx % len(compute_nodes)].append((data, target))

In [22]:

bobs_model = Net()
alices_model = Net()
bobs_optimizer = optim.SGD(bobs_model.parameters(), lr=args.lr)
alices_optimizer = optim.SGD(alices_model.parameters(), lr=args.lr)

In [23]:
models = [bobs_model, alices_model]
optimizers = [bobs_optimizer, alices_optimizer]

In [24]:
model = Net()
model

Net(
  (fc1): Linear(in_features=8, out_features=32, bias=True)
  (fc2): Linear(in_features=32, out_features=24, bias=True)
  (fc4): Linear(in_features=24, out_features=16, bias=True)
  (fc3): Linear(in_features=16, out_features=1, bias=True)
)

In [25]:
def update(data, target, model, optimizer):
    model.send(data.location)
    optimizer.zero_grad()
    prediction = model(data)
    loss = F.mse_loss(prediction.view(-1), target)
    loss.backward()
    optimizer.step()
    return model

def train():
    for data_index in range(len(remote_dataset[0])-1):
        for remote_index in range(len(compute_nodes)):
            data, target = remote_dataset[remote_index][data_index]
            models[remote_index] = update(data, target, models[remote_index], optimizers[remote_index])
        for model in models:
            model.get()
        return utils.federated_avg({
            "bob": models[0],
            "alice": models[1]
        })

In [26]:

def test(federated_model):
    federated_model.eval()
    test_loss = 0
    for data, target in test_loader:
        output = federated_model(data)
        test_loss += F.mse_loss(output.view(-1), target, reduction='sum').item()
        predection = output.data.max(1, keepdim=True)[1]
        
    test_loss /= len(test_loader.dataset)
    print('Test set: Average loss: {:.4f}'.format(test_loss))
    return test_loss

In [27]:
#save the model with least loss
least_loss = 100000000
for epoch in range(args.epochs):
    start_time = time.time()
    print(f"Epoch Number {epoch + 1}")
    federated_model = train()
    model = federated_model
    test_loss = test(federated_model)
    if(test_loss < least_loss):
        least_loss = test_loss
        torch.save(model.state_dict(), 'pima_model.pt')
    total_time = time.time() - start_time
    print('Communication time over the network', round(total_time, 2), 's\n')

Epoch Number 1
Test set: Average loss: 2.0174
Communication time over the network 0.21 s

Epoch Number 2
Test set: Average loss: 0.1738
Communication time over the network 0.2 s

Epoch Number 3
Test set: Average loss: 0.6883
Communication time over the network 0.17 s

Epoch Number 4
Test set: Average loss: 0.6709
Communication time over the network 0.18 s

Epoch Number 5
Test set: Average loss: 1.0260
Communication time over the network 0.19 s

Epoch Number 6
Test set: Average loss: 0.6573
Communication time over the network 0.21 s

Epoch Number 7
Test set: Average loss: 4.8092
Communication time over the network 0.21 s

Epoch Number 8
Test set: Average loss: 0.2657
Communication time over the network 0.19 s

Epoch Number 9
Test set: Average loss: 0.7715
Communication time over the network 0.19 s

Epoch Number 10
Test set: Average loss: 2.7647
Communication time over the network 0.22 s

Epoch Number 11
Test set: Average loss: 0.1890
Communication time over the network 0.22 s

Epoch Num

Test set: Average loss: 0.2351
Communication time over the network 0.25 s

Epoch Number 94
Test set: Average loss: 0.4696
Communication time over the network 0.21 s

Epoch Number 95
Test set: Average loss: 0.9102
Communication time over the network 0.19 s

Epoch Number 96
Test set: Average loss: 0.2295
Communication time over the network 0.21 s

Epoch Number 97
Test set: Average loss: 0.2228
Communication time over the network 0.18 s

Epoch Number 98
Test set: Average loss: 0.5786
Communication time over the network 0.19 s

Epoch Number 99
Test set: Average loss: 2.5061
Communication time over the network 0.2 s

Epoch Number 100
Test set: Average loss: 0.5874
Communication time over the network 0.24 s



In [28]:
final_model = Net()
final_model.load_state_dict(torch.load('pima_model.pt'))

<All keys matched successfully>

In [29]:
final_model.eval()

Net(
  (fc1): Linear(in_features=8, out_features=32, bias=True)
  (fc2): Linear(in_features=32, out_features=24, bias=True)
  (fc4): Linear(in_features=24, out_features=16, bias=True)
  (fc3): Linear(in_features=16, out_features=1, bias=True)
)