In [1]:
# https://github.com/AshwinRJ/Federated-Learning-PyTorch
import copy

import torch
from torchsummary import summary

import numpy as np
import random

from models import ResNet50
# from utils import get_dataset, average_weights, exp_details
from utils_v2 import get_dataset, average_weights, exp_details
from update import LocalUpdate, test_inference

In [2]:
# parameters
iid = 1 # if the data is i.i.d or not
unbalanced = 0 # in non i.i.d. setting split the data between clients equally or not
num_users = 100 # number of client
frac = 0.1 # fraction of the clients to be used for federated updates
n_epochs = 100
gpu = 0
optimizer = "sgd" #sgd or adam
local_batch_size = 10 # batch size of local updates in each user
lr = 0.001 # learning rate
local_epochs = 1
loss_function = "CrossEntropyLoss"

num_groups = 4  # 0 for BatchNorm, > 0 for GroupNorm
if num_groups == 0:
    normalization_type = "BatchNorm"
else:
    normalization_type = "GroupNorm"

In [3]:
exp_details("ResNet50", optimizer, lr, normalization_type, n_epochs, iid, frac,
            local_batch_size, local_epochs, unbalanced, num_users)


Experimental details:
    Model     : ResNet50
    Optimizer : sgd
    Learning  : 0.001
    Normalization  : GroupNorm
    Global Rounds   : 100

    Federated parameters:
    IID
    NUmber of users  : 100
    Fraction of users  : 0.1
    Local Batch size   : 10
    Local Epochs       : 1



In [4]:
# for REPRODUCIBILITY https://pytorch.org/docs/stable/notes/randomness.html
torch.manual_seed(0)

g = torch.Generator()
g.manual_seed(0)

np.random.seed(0)

In [5]:
train_dataset, test_dataset, user_groups = get_dataset(iid=iid, unbalanced=unbalanced,
                                                       num_users=num_users)

Files already downloaded and verified
Files already downloaded and verified


In [6]:
model = ResNet50(n_type=normalization_type)
# model = CNNCifar()

train_on_gpu = torch.cuda.is_available()

if not train_on_gpu:
    print('CUDA is not available.  Training on CPU ...')
    device = torch.device("cpu")
    gpu = 0
else:
    print('CUDA is available!  Training on GPU ...')
    device = torch.device("cuda")
    gpu = 1

model.to(device)

# set the model to train
model.train()

CUDA is available!  Training on GPU ...


ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
  (bn1): GroupNorm(4, 64, eps=1e-05, affine=True)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): GroupNorm(4, 64, eps=1e-05, affine=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): GroupNorm(4, 64, eps=1e-05, affine=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): GroupNorm(4, 256, eps=1e-05, affine=True)
      (shortcut): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (1): GroupNorm(4, 256, eps=1e-05, affine=True)
      )
    )
    (1): Bottleneck(
      (conv1): Conv2d(256, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): GroupNorm(4, 64, eps=1e-05, affine=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), paddi

In [7]:
# total number of params 591,322
summary(model, (3, 32, 32))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 64, 32, 32]           1,728
         GroupNorm-2           [-1, 64, 32, 32]             128
            Conv2d-3           [-1, 64, 32, 32]           4,096
         GroupNorm-4           [-1, 64, 32, 32]             128
            Conv2d-5           [-1, 64, 32, 32]          36,864
         GroupNorm-6           [-1, 64, 32, 32]             128
            Conv2d-7          [-1, 256, 32, 32]          16,384
         GroupNorm-8          [-1, 256, 32, 32]             512
            Conv2d-9          [-1, 256, 32, 32]          16,384
        GroupNorm-10          [-1, 256, 32, 32]             512
       Bottleneck-11          [-1, 256, 32, 32]               0
           Conv2d-12           [-1, 64, 32, 32]          16,384
        GroupNorm-13           [-1, 64, 32, 32]             128
           Conv2d-14           [-1, 64,

In [8]:
# copy weights
global_weights = model.state_dict()

In [None]:
# training
train_loss, train_accuracy = [], []
val_acc_list, net_list = [], []
cv_loss, cv_acc = [], []

for epoch in range(1, n_epochs+1):
    local_weights = []
    local_losses = []

    print(f'Epoch: {epoch} \n')


    ###################
    # train the model #
    ###################
    model.train()

    # different clients at each epoch
    m = max(int(frac * num_users), 1) # number of users to be used for federated updates, at least 1
    idxs_users = np.random.choice(range(num_users), m, replace=False) # choose randomly m users

    for idx in idxs_users:  # for each user
        # get local model
        local_model = LocalUpdate(dataset=train_dataset, idxs=user_groups[idx],
                                  gpu=gpu, optimizer=optimizer,
                                  local_batch_size=local_batch_size, lr=lr,
                                  local_epochs=local_epochs, loss_function=loss_function)

        # get updated weight and loss from local model
        w, loss = local_model.update_weights(model=copy.deepcopy(model), # pass the global model to the clients
                                             global_round=epoch)

        print('| Client : {} | Average Loss: {:.4f} '.format(
            idx, loss))

        local_weights.append(copy.deepcopy(w))
        local_losses.append(copy.deepcopy(loss))

    # compute global weights (average of local weights)
    global_weights = average_weights(local_weights)
    # update weights of the global model
    model.load_state_dict(global_weights)

    # compute average loss
    loss_avg = sum(local_losses) / len(local_losses)
    train_loss.append(loss_avg)

    ######################
    # validate the model #
    ######################
    model.eval()
    # calculate avg training accuracy over all users at every epoch
    list_acc, list_loss = [], []
    for client in range(num_users): # for each client
        # get local model
        local_model = LocalUpdate(dataset=train_dataset, idxs=user_groups[client],
                                  gpu=gpu, optimizer=optimizer,
                                  local_batch_size=local_batch_size, lr=lr,
                                  local_epochs=local_epochs, loss_function=loss_function)

        # get accuracy and loss of local model
        acc, loss = local_model.inference(model=model)
        list_acc.append(acc)
        list_loss.append(loss)

    # compute average accuracy
    train_accuracy.append(sum(list_acc)/len(list_acc))

    # print stats
    print(f'\nAverage training statistics (global epoch : {epoch}')
    print(f'|---- Trainig Loss : {np.mean(np.array(train_loss))}')
    print('|---- Training Accuracy: {:.2f}% \n'.format(100*train_accuracy[-1]))

Epoch: 1 



  return torch.tensor(image), torch.tensor(label)


| Global Round : 1 | Local Epoch : 1 | Train Loss: 3.4358 | Train Accuracy: 0.10
| Global Round : 1 | Average Train Loss: 3.4358 
| Client : 43 | Average Loss: 3.4358 
| Global Round : 1 | Local Epoch : 1 | Train Loss: 3.3412 | Train Accuracy: 0.08
| Global Round : 1 | Average Train Loss: 3.3412 
| Client : 36 | Average Loss: 3.3412 
| Global Round : 1 | Local Epoch : 1 | Train Loss: 3.2304 | Train Accuracy: 0.12
| Global Round : 1 | Average Train Loss: 3.2304 
| Client : 92 | Average Loss: 3.2304 
| Global Round : 1 | Local Epoch : 1 | Train Loss: 3.3639 | Train Accuracy: 0.10
| Global Round : 1 | Average Train Loss: 3.3639 
| Client : 80 | Average Loss: 3.3639 
| Global Round : 1 | Local Epoch : 1 | Train Loss: 3.5930 | Train Accuracy: 0.10
| Global Round : 1 | Average Train Loss: 3.5930 
| Client : 15 | Average Loss: 3.5930 
| Global Round : 1 | Local Epoch : 1 | Train Loss: 3.4897 | Train Accuracy: 0.13
| Global Round : 1 | Average Train Loss: 3.4897 
| Client : 26 | Average Loss: 

In [None]:
# save train loss and accuracy
import pandas as pd

filename_csv = 'fedAVG_results/{}_{}_{}_lr_[{}]_C[{}]_iid[{}]_unbalanced[{}]_E[{}]_B[{}]_{}_numGroups[{}].csv'\
    .format("ResNet50", n_epochs, optimizer, lr, frac, iid, unbalanced,
           local_epochs, local_batch_size, normalization_type, num_groups)

data = list(zip(train_loss, train_accuracy))
pd.DataFrame(data, columns=['train_loss','train_accuracy']).to_csv(filename_csv)

In [None]:
# save the trained model

filename_pt = 'fedAVG_results/{}_{}_{}_lr_[{}]_C[{}]_iid[{}]_unbalanced[{}]_E[{}]_B[{}]_{}_numGroups[{}].pt'\
    .format("ResNet50", n_epochs, optimizer, lr, frac, iid, unbalanced,
            local_epochs, local_batch_size, normalization_type, num_groups)
torch.save(model.state_dict(), filename_pt)

In [None]:
# test the trained model

test_acc, test_loss = test_inference(model=model, test_dataset=test_dataset, gpu=gpu,
                                     loss_function=loss_function)

print(f'\nResults after {n_epochs} global rounds of training:')
print("|---- Avgerage Train Accuracy: {:.2f}%".format(100*train_accuracy[-1]))
print("|---- Test Accuracy: {:.2f}%".format(100*test_acc))