<a href="https://colab.research.google.com/github/harshitadd/CrossSiloFLDemo/blob/master/PreVID_PySyft_Federated_Averaging.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
#!pip install syft --quiet 

In [1]:
##Importing Mini Batch Data 
import os 
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
#!pip install pydicom --quiet
import pydicom 
import cv2
import matplotlib.pyplot as plt 

In [3]:
dcm_path=os.listdir('/content/drive/My Drive/Fed_Covid/minibatch/')
dcm_data={}

alpha = 1.5 # Contrast control (1.0-3.0)
beta = 0 # Brightness control (0-100)

ctr=0
for file in dcm_path:
  name = '/content/drive/My Drive/Fed_Covid/minibatch/' + file
  temp = pydicom.dcmread(name)
  image = temp.pixel_array
  image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
  adjusted = cv2.resize(image,(64,64))
  dcm_data[file]=adjusted
  ctr+=1

In [4]:
import csv 
labels=[]
with open('/content/drive/My Drive/Fed_Covid/stage_2_train_labels.csv','r') as file:
  reader = csv.reader(file)
  for row in reader:
    labels.append(row)

In [5]:
from sklearn.preprocessing import MinMaxScaler 
import numpy as np
scaler = MinMaxScaler()
pid=[]
dicom=[]
label=[]
cid = 0
for PID in labels:
  for key in dcm_data:
    if(key[:-4]==PID[0]):
      l=[]
      for val in dcm_data[key]:
        l.append(scaler.fit_transform(val))
      l = np.reshape(l,(3,64,64))
      dicom.append(l)
      label.append(int(PID[5]))
      

In [6]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import syft as sy
from torch.utils.data import TensorDataset, DataLoader
import time
import copy
import numpy as np
from torch.utils.data import Dataset
from syft.frameworks.torch.fl import utils
from syft.workers.websocket_client import WebsocketClientWorker

In [7]:
class Arguments():
    def __init__(self):
        self.batch_size = 4
        self.test_batch_size = 4
        self.epochs = 5
        self.lr = 0.01
        self.momentum = 0.5
        self.seed = 1
        self.log_interval = 10
        self.save_model = False

args = Arguments()

In [8]:
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(dicom,label,test_size=0.3)

In [9]:
import pandas as pd 
test_df = pd.DataFrame()
test_df['features']=x_test
test_df['labels']=y_test

In [10]:
x_train= np.array(x_train)
y_train= np.array(y_train)
x_test= np.array(x_test)
y_test= np.array(y_test)

In [11]:
class data_maker(Dataset):
  def __init__(self, images, labels):
        self.data = images 
        self.targets = labels 

        self.to_torchtensor()
        
  def __len__(self):
      return len(self.data)

  def to_torchtensor(self):      
      self.data=torch.from_numpy(self.data)
      self.labels=torch.from_numpy(self.targets)
  
  def __getitem__(self, idx):
      sample=self.data[idx]
      target=self.targets[idx]
      return sample,target

In [12]:
hook = sy.TorchHook(torch)
hospital = sy.VirtualWorker(hook, id="hospital")  
clinic = sy.VirtualWorker(hook, id="clinic")  

In [13]:
federated_train_data = data_maker(x_train,y_train).federate((hospital,clinic))

In [14]:
federated_train_loader = sy.FederatedDataLoader(federated_train_data,batch_size=args.batch_size)
test_data = data_maker(x_test,y_test)
test_loader = torch.utils.data.DataLoader(test_data, batch_size=args.test_batch_size)

In [17]:
class Net(nn.Module):
    def __init__(self):
        super().__init__()

        # Input - (32,1024,1024)
  
        self.conv1 = nn.Conv2d(3,32, kernel_size=8, stride = 2)  #I/P - 1*32
        self.pool1 = nn.MaxPool2d(kernel_size=4,stride=2,padding=0)  
        self.conv2 = nn.Conv2d(32, 64, 8)
        self.pool2 = nn.MaxPool2d(4,2,padding=0)
        self.fc1 = nn.Linear(256, 64*64)
        self.fc2 = nn.Linear(64*64, 64)
        self.fc3 = nn.Linear(64, 2)

    def forward(self, x):
        
        x = F.relu(self.conv1(x))
        # Input of 1*1024*1024 going to 32*1024*1024  - 1*64*64 going to 32*64*64
        x = self.pool1(x)
        # Input of 32*1024*1024 going to 32*256*256 - 32*64*64 going to 32*16*16
        x = F.relu(self.conv2(x))
        # Input of 32*256*256 to 64*256*256 - 32*16*16 to 64*16*16
        x = self.pool2(x)
        # Input of 64*256*256 to 64*64*64 - 64*16*16 going to 64*4*4

        x = x.view(-1,256)
     
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)

        return F.log_softmax(x,dim=1)

In [32]:
hospital_model = Net().double()
clinic_model = Net().double()

hospital_optimizer = optim.SGD(hospital_model.parameters(), lr=0.00001)
clinic_optimizer = optim.SGD(clinic_model.parameters(), lr=0.00001)

models = [hospital_model, clinic_model]
optimizers = [hospital_optimizer, clinic_optimizer]

model = Net().double()
model

Net(
  (conv1): Conv2d(3, 32, kernel_size=(8, 8), stride=(2, 2))
  (pool1): MaxPool2d(kernel_size=4, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(32, 64, kernel_size=(8, 8), stride=(1, 1))
  (pool2): MaxPool2d(kernel_size=4, stride=2, padding=0, dilation=1, ceil_mode=False)
  (fc1): Linear(in_features=256, out_features=4096, bias=True)
  (fc2): Linear(in_features=4096, out_features=64, bias=True)
  (fc3): Linear(in_features=64, out_features=2, bias=True)
)

In [33]:
compute_nodes = [hospital, clinic]

In [34]:
def update(data, target, model, optimizer):
    loss = nn.CrossEntropyLoss()
    model.send(data.location) 
    optimizer.zero_grad()
    output = model(data)
    out = loss(output, target)
    out.backward()
    optimizer.step()
    return model 

In [35]:
def train():
  for batch_idx, (data, target) in enumerate(federated_train_loader):
    for index in range(len(compute_nodes)):
      models[index] = update(data,target,models[index],optimizers[index]) 
    for model in models:
      model.get()
    return utils.federated_avg({"hospital": models[0],"clinic":models[1]})


In [36]:
def test(federated_model):
  federated_model.eval()
  test_loss = 0
  correct = 0
  loss = nn.CrossEntropyLoss()
  for data, target in test_loader:
        output = model(data)
        out = loss(output,target)
        test_loss +=out
        pred = output.argmax(1, keepdim=True) # get the index of the max log-probability 
        correct += pred.eq(target.view_as(pred))

  test_loss /= len(test_loader.dataset)

  print('For Test Loss:' + str(test_loss))

In [38]:
for epoch in range(25):
  start_time = time.time()
  print(f"Epoch Number {epoch}")
  federated_model = train()
  test(federated_model)
  total_time = time.time() - start_time 
  print('Comm time', round(total_time,2),'s\n')

Epoch Number 0
For Test Loss:tensor(0.1783, dtype=torch.float64, grad_fn=<DivBackward0>)
Comm time 1.01 s

Epoch Number 1
For Test Loss:tensor(0.1783, dtype=torch.float64, grad_fn=<DivBackward0>)
Comm time 0.97 s

Epoch Number 2
For Test Loss:tensor(0.1783, dtype=torch.float64, grad_fn=<DivBackward0>)
Comm time 0.97 s

Epoch Number 3
For Test Loss:tensor(0.1783, dtype=torch.float64, grad_fn=<DivBackward0>)
Comm time 0.96 s

Epoch Number 4
For Test Loss:tensor(0.1783, dtype=torch.float64, grad_fn=<DivBackward0>)
Comm time 0.97 s

Epoch Number 5
For Test Loss:tensor(0.1783, dtype=torch.float64, grad_fn=<DivBackward0>)
Comm time 0.95 s

Epoch Number 6
For Test Loss:tensor(0.1783, dtype=torch.float64, grad_fn=<DivBackward0>)
Comm time 0.94 s

Epoch Number 7
For Test Loss:tensor(0.1783, dtype=torch.float64, grad_fn=<DivBackward0>)
Comm time 0.95 s

Epoch Number 8
For Test Loss:tensor(0.1783, dtype=torch.float64, grad_fn=<DivBackward0>)
Comm time 0.93 s

Epoch Number 9
For Test Loss:tensor(0