# Federated Learning with PyTorch
This notebook runs the Federated Learning project in Colab using CPU.

In [1]:
# Install necessary libraries if needed
!pip install torch torchvision matplotlib tensorboard numpy tqdm



In [3]:
# Import required libraries and uploaded scripts
import os
import copy
import time
import pickle
import numpy as np
from tqdm import tqdm

import torch
from torch.utils.tensorboard import SummaryWriter

from options import args_parser
from update import LocalUpdate, test_inference
from models import MLP, CNNMnist, CNNFashion_Mnist, CNNCifar
from utils import get_dataset, average_weights, exp_details

In [4]:
# Define the main logic of federated_main.py

start_time = time.time()

# Define paths
path_project = os.path.abspath('..')
logger = SummaryWriter('../logs')

# Set the arguments for the experiment manually
class Args:
    def __init__(self):
        self.epochs = 10
        self.num_users = 100
        self.frac = 0.1
        self.local_ep = 10
        self.local_bs = 10
        self.lr = 0.01
        self.momentum = 0.5
        self.model = 'cnn'
        self.kernel_num = 9
        self.gpu_id = None
        self.kernel_sizes = '3,4,5'
        self.num_channels = 1
        self.norm = 'batch_norm'
        self.num_filters = 32
        self.max_pool = True
        self.dataset = 'cifar'
        self.num_classes = 10
        self.gpu = False  # Set to False to use CPU
        self.optimizer = 'sgd'
        self.iid = True
        self.unequal = False
        self.stopping_rounds = 10
        self.verbose = 1
        self.seed = 42

args = Args()
exp_details(args)

device = 'cpu'  # Since Colab is using CPU
train_dataset, test_dataset, user_groups = get_dataset(args)

# Build and train the model
if args.model == 'cnn':
    if args.dataset == 'mnist':
        global_model = CNNMnist(args=args)
    elif args.dataset == 'fmnist':
        global_model = CNNFashion_Mnist(args=args)
    elif args.dataset == 'cifar':
        global_model = CNNCifar(args=args)
elif args.model == 'mlp':
    img_size = train_dataset[0][0].shape
    len_in = 1
    for x in img_size:
        len_in *= x
    global_model = MLP(dim_in=len_in, dim_hidden=64, dim_out=args.num_classes)
else:
    exit('Error: unrecognized model')

# Set the model to train and send it to device.
global_model.to(device)
global_model.train()
print(global_model)

# Copy weights
global_weights = global_model.state_dict()

# Training
train_loss, train_accuracy = [], []
print_every = 2

for epoch in tqdm(range(args.epochs)):
    local_weights, local_losses = [], []
    print(f'\n | Global Training Round : {epoch+1} |\n')

    global_model.train()
    m = max(int(args.frac * args.num_users), 1)
    idxs_users = np.random.choice(range(args.num_users), m, replace=False)

    for idx in idxs_users:
        local_model = LocalUpdate(args=args, dataset=train_dataset,
                                  idxs=user_groups[idx], logger=logger)
        w, loss = local_model.update_weights(
            model=copy.deepcopy(global_model), global_round=epoch)
        local_weights.append(copy.deepcopy(w))
        local_losses.append(copy.deepcopy(loss))

    # Update global weights
    global_weights = average_weights(local_weights)
    global_model.load_state_dict(global_weights)

    loss_avg = sum(local_losses) / len(local_losses)
    train_loss.append(loss_avg)

    # Calculate avg training accuracy over all users at every epoch
    list_acc = []
    global_model.eval()
    for c in range(args.num_users):
        local_model = LocalUpdate(args=args, dataset=train_dataset,
                                  idxs=user_groups[idx], logger=logger)
        acc, _ = local_model.inference(model=global_model)
        list_acc.append(acc)
    train_accuracy.append(sum(list_acc)/len(list_acc))

    # Print global training loss after every 'print_every' rounds
    if (epoch+1) % print_every == 0:
        print(f' \nAvg Training Stats after {epoch+1} global rounds:')
        print(f'Training Loss : {np.mean(np.array(train_loss))}')
        print('Train Accuracy: {:.2f}% \n'.format(100*train_accuracy[-1]))

# Test inference after completion of training
test_acc, test_loss = test_inference(args, global_model, test_dataset)

print(f' \n Results after {args.epochs} global rounds of training:')
print("|---- Avg Train Accuracy: {:.2f}%".format(100*train_accuracy[-1]))
print("|---- Test Accuracy: {:.2f}%".format(100*test_acc))

print('\n Total Run Time: {0:0.4f}'.format(time.time()-start_time))



Experimental details:
    Model     : cnn
    Optimizer : sgd
    Learning  : 0.01
    Global Rounds   : 10

    Federated parameters:
    IID
    Fraction of users  : 0.1
    Local Batch size   : 10
    Local Epochs       : 10

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ../data/cifar/cifar-10-python.tar.gz


100%|██████████| 170498071/170498071 [00:06<00:00, 28119577.96it/s]


Extracting ../data/cifar/cifar-10-python.tar.gz to ../data/cifar/
Files already downloaded and verified
CNNCifar(
  (conv1): Conv2d(3, 6, kernel_size=(5, 5), stride=(1, 1))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=400, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=10, bias=True)
)


  0%|          | 0/10 [00:00<?, ?it/s]


 | Global Training Round : 1 |



  return torch.tensor(image), torch.tensor(label)




 10%|█         | 1/10 [00:47<07:10, 47.83s/it]


 | Global Training Round : 2 |



 20%|██        | 2/10 [01:34<06:15, 46.92s/it]

 
Avg Training Stats after 2 global rounds:
Training Loss : 2.1409125137180087
Train Accuracy: 32.00% 


 | Global Training Round : 3 |



 30%|███       | 3/10 [02:22<05:31, 47.39s/it]


 | Global Training Round : 4 |



 40%|████      | 4/10 [03:08<04:41, 46.97s/it]

 
Avg Training Stats after 4 global rounds:
Training Loss : 1.8892209587562827
Train Accuracy: 40.00% 


 | Global Training Round : 5 |



 50%|█████     | 5/10 [03:57<03:58, 47.78s/it]


 | Global Training Round : 6 |



 60%|██████    | 6/10 [04:43<03:09, 47.28s/it]

 
Avg Training Stats after 6 global rounds:
Training Loss : 1.623962566077709
Train Accuracy: 36.00% 


 | Global Training Round : 7 |



 70%|███████   | 7/10 [05:34<02:25, 48.36s/it]


 | Global Training Round : 8 |



 80%|████████  | 8/10 [06:22<01:36, 48.23s/it]

 
Avg Training Stats after 8 global rounds:
Training Loss : 1.3818679051766085
Train Accuracy: 36.00% 


 | Global Training Round : 9 |



 90%|█████████ | 9/10 [07:12<00:48, 48.65s/it]


 | Global Training Round : 10 |



100%|██████████| 10/10 [08:00<00:00, 48.02s/it]

 
Avg Training Stats after 10 global rounds:
Training Loss : 1.212232398556426
Train Accuracy: 46.00% 






 
 Results after 10 global rounds of training:
|---- Avg Train Accuracy: 46.00%
|---- Test Accuracy: 44.81%

 Total Run Time: 495.9221
