<a href="https://colab.research.google.com/github/denisangelo/Federated_Learning/blob/main/Federated_Learning%20/Notebooks/Aula_2_Treinamento_Federado.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [81]:
# Instalando a plataforma/bibliotecas e suas dependências
!pip install -q flwr[simulation] torch torchvision matplotlib

In [82]:
from collections import OrderedDict
from typing import List, Tuple
import matplotlib.pyplot as plt
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader, random_split
from torchvision.datasets import CIFAR10

import flwr as fl
from flwr.common import Metrics
DEVICE = torch.device("cpu")  # Try "cuda" to train on GPU
print(
    f"Training on {DEVICE} using PyTorch {torch.__version__} and Flower {fl.__version__}"
)

Training on cpu using PyTorch 2.0.1+cu118 and Flower 1.4.0


In [83]:
# Define o número de clientes a serem realizados no treinamento:
NUM_CLIENTS =5
CLASSES =('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck') 
BATCH_SIZE= 64

In [84]:
  # Carregando e Normalizando os Dados CIFAR10
  # Definindo as Transformações a serem aplicadas às imagens
def load_data():
    transform = transforms.Compose(
        [transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]
    )
    trainset = CIFAR10(root="./data", train=True, download=True, transform=transform)
    testset = CIFAR10(root="./data", train=False, download=True, transform=transform)
    partition_size = len(trainset) // NUM_CLIENTS
    lengths = [partition_size] * NUM_CLIENTS
    datasets = random_split(trainset, lengths, torch.Generator().manual_seed(42))
    trainloaders = []
    valloaders = []
    for ds in datasets:
        len_val = len(ds) // 10 # 10% do conjunto para testes
        len_train = len(ds) - len_val
        lengths =[len_train, len_val]
        ds_train, ds_val = random_split(ds, lengths, torch.Generator().manual_seed(42))
        trainloaders.append(DataLoader(ds_train, batch_size=BATCH_SIZE, shuffle=True,num_workers=2))
        valloaders.append(DataLoader(ds_val, batch_size=BATCH_SIZE, shuffle=False,num_workers=2))
        testloader = DataLoader(testset,batch_size=BATCH_SIZE, shuffle=False,num_workers=2)
    return trainloaders, valloaders, testloader
    
trainloaders, valloaders, testloader = load_data()

Files already downloaded and verified
Files already downloaded and verified


In [85]:
# Definindo o Modelo
class Net(nn.Module):
    def __init__(self) -> None:
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(3, 6, 5)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.fc1 = nn.Linear(16 * 5 * 5, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(-1, 16 * 5 * 5)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

In [86]:
# Definindo as funções de treinamento
def train(net, trainloader, epochs: int, verbose=False):
    """Train the network on the training set."""
    criterion = torch.nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(net.parameters())
    net.train()
    for epoch in range(epochs):
        correct, total, epoch_loss = 0, 0, 0.0
        for images, labels in trainloader:
            images, labels = images.to(DEVICE), labels.to(DEVICE)
            optimizer.zero_grad()
            outputs = net(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            # Metrics
            epoch_loss += loss
            total += labels.size(0)
            correct += (torch.max(outputs.data, 1)[1] == labels).sum().item()
        epoch_loss /= len(trainloader.dataset)
        epoch_acc = correct / total
        if verbose:
            print(f"Epoch {epoch+1}: train loss {epoch_loss}, accuracy {epoch_acc}")


def test(net, testloader):
    """Evaluate the network on the entire test set."""
    criterion = torch.nn.CrossEntropyLoss()
    correct, total, loss = 0, 0, 0.0
    net.eval()
    with torch.no_grad():
        for images, labels in testloader:
            images, labels = images.to(DEVICE), labels.to(DEVICE)
            outputs = net(images)
            loss += criterion(outputs, labels).item()
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    loss /= len(testloader.dataset)
    accuracy = correct / total
    return loss, accuracy

In [87]:
# Funções de atualização de parâmetros do modelo
def get_parameters(net) -> List[np.ndarray]:
    return [val.cpu().numpy() for _, val in net.state_dict().items()]


def set_parameters(net, parameters: List[np.ndarray]):
    params_dict = zip(net.state_dict().keys(), parameters)
    state_dict = OrderedDict({k: torch.Tensor(v) for k, v in params_dict})
    net.load_state_dict(state_dict, strict=True)

In [88]:
# Definindo a classe de cliente Virtual
class FlowerNumPyClient(fl.client.NumPyClient):
    def __init__(self, cid, net, trainloader, valloader):
        self.cid = cid
        self.net = net
        self.trainloader = trainloader
        self.valloader = valloader

    def get_parameters(self, config):
        print(f"[Client {self.cid}] get_parameters")
        return get_parameters(self.net)

    def fit(self, parameters, config):
        print(f"[Client {self.cid}] fit, config: {config}")
        set_parameters(self.net, parameters)
        train(self.net, self.trainloader, epochs=1)
        return get_parameters(self.net), len(self.trainloader), {}

    def evaluate(self, parameters, config):
        print(f"[Client {self.cid}] evaluate, config: {config}")
        set_parameters(self.net, parameters)
        loss, accuracy = test(self.net, self.valloader)
        return float(loss), len(self.valloader), {"accuracy": float(accuracy)}

def numpyclient_fn(cid) -> FlowerNumPyClient:
        net = Net().to(DEVICE)
        trainloader = trainloaders[int(cid)]
        valloader = valloaders[int(cid)]
        return FlowerNumPyClient(cid, net, trainloader, valloader)

In [89]:
# Realizando a agregação
def weighted_average(metrics: List[Tuple[int, Metrics]]) -> Metrics:
    # Multiply accuracy of each client by number of examples used
    accuracies = [num_examples * m["accuracy"] for num_examples, m in metrics]
    examples = [num_examples for num_examples, _ in metrics]

    # Aggregate and return custom metric (weighted average)
    return {"accuracy": sum(accuracies) / sum(examples)}

In [90]:
# Iniciando o treino federado
# Criando a Função de agregação FedAvg
strategy = fl.server.strategy.FedAvg(
    fraction_fit=1.0,#-->Fração de clientes necessários para treinamento
    fraction_evaluate=0.5, #--> Fração de clientes que serão utilizados para teste
    min_fit_clients=NUM_CLIENTS, #--> Número mínimo que serão utlilizados para treinamento
    min_evaluate_clients=2, #--> Número mínimo de clientes que serão realizados para teste
    min_available_clients=NUM_CLIENTS,#--> Número mínimo de clientes que serão utilizados no treinamento
    evaluate_metrics_aggregation_fn=weighted_average,  
)
client_resources = None
if DEVICE.type == "cuda":
    client_resources = {"num_gpus": 1}
# Inicia a simulação
fl.simulation.start_simulation(
    client_fn=numpyclient_fn,
    num_clients=NUM_CLIENTS,
    config=fl.server.ServerConfig(num_rounds=5),
    strategy=strategy,
    client_resources=client_resources,
)

INFO flwr 2023-06-02 15:00:10,119 | app.py:146 | Starting Flower simulation, config: ServerConfig(num_rounds=5, round_timeout=None)
INFO:flwr:Starting Flower simulation, config: ServerConfig(num_rounds=5, round_timeout=None)
2023-06-02 15:00:15,150	INFO worker.py:1625 -- Started a local Ray instance.
INFO flwr 2023-06-02 15:00:17,119 | app.py:180 | Flower VCE: Ray initialized with resources: {'CPU': 2.0, 'memory': 7774362011.0, 'object_store_memory': 3887181004.0, 'node:172.28.0.12': 1.0}
INFO:flwr:Flower VCE: Ray initialized with resources: {'CPU': 2.0, 'memory': 7774362011.0, 'object_store_memory': 3887181004.0, 'node:172.28.0.12': 1.0}
INFO flwr 2023-06-02 15:00:17,125 | server.py:86 | Initializing global parameters
INFO:flwr:Initializing global parameters
INFO flwr 2023-06-02 15:00:17,129 | server.py:273 | Requesting initial parameters from one random client
INFO:flwr:Requesting initial parameters from one random client
INFO flwr 2023-06-02 15:00:31,341 | server.py:277 | Received i

[2m[36m(launch_and_get_parameters pid=5525)[0m [Client 2] get_parameters
[2m[36m(launch_and_fit pid=5525)[0m [Client 2] fit, config: {}




[2m[36m(launch_and_fit pid=5525)[0m [Client 0] fit, config: {}
[2m[36m(launch_and_fit pid=5526)[0m [Client 3] fit, config: {}
[2m[36m(launch_and_fit pid=5526)[0m [Client 4] fit, config: {}[32m [repeated 2x across cluster] (Ray deduplicates logs by default. Set RAY_DEDUP_LOGS=0 to disable log deduplication, or see https://docs.ray.io/en/master/ray-observability/ray-logging.html#log-deduplication for more options.)[0m


DEBUG flwr 2023-06-02 15:01:13,077 | server.py:232 | fit_round 1 received 5 results and 0 failures
DEBUG:flwr:fit_round 1 received 5 results and 0 failures
DEBUG flwr 2023-06-02 15:01:13,108 | server.py:168 | evaluate_round 1: strategy sampled 2 clients (out of 5)
DEBUG:flwr:evaluate_round 1: strategy sampled 2 clients (out of 5)


[2m[36m(launch_and_evaluate pid=5525)[0m [Client 3] evaluate, config: {}


DEBUG flwr 2023-06-02 15:01:18,622 | server.py:182 | evaluate_round 1 received 2 results and 0 failures
DEBUG:flwr:evaluate_round 1 received 2 results and 0 failures
DEBUG flwr 2023-06-02 15:01:18,627 | server.py:218 | fit_round 2: strategy sampled 5 clients (out of 5)
DEBUG:flwr:fit_round 2: strategy sampled 5 clients (out of 5)


[2m[36m(launch_and_fit pid=5525)[0m [Client 1] fit, config: {}
[2m[36m(launch_and_evaluate pid=5526)[0m [Client 1] fit, config: {}
[2m[36m(launch_and_fit pid=5526)[0m [Client 3] fit, config: {}[32m [repeated 2x across cluster][0m
[2m[36m(launch_and_fit pid=5526)[0m [Client 4] fit, config: {}[32m [repeated 2x across cluster][0m


DEBUG flwr 2023-06-02 15:01:57,928 | server.py:232 | fit_round 2 received 5 results and 0 failures
DEBUG:flwr:fit_round 2 received 5 results and 0 failures
DEBUG flwr 2023-06-02 15:01:57,950 | server.py:168 | evaluate_round 2: strategy sampled 2 clients (out of 5)
DEBUG:flwr:evaluate_round 2: strategy sampled 2 clients (out of 5)


[2m[36m(launch_and_evaluate pid=5526)[0m [Client 3] evaluate, config: {}


DEBUG flwr 2023-06-02 15:02:02,055 | server.py:182 | evaluate_round 2 received 2 results and 0 failures
DEBUG:flwr:evaluate_round 2 received 2 results and 0 failures
DEBUG flwr 2023-06-02 15:02:02,060 | server.py:218 | fit_round 3: strategy sampled 5 clients (out of 5)
DEBUG:flwr:fit_round 3: strategy sampled 5 clients (out of 5)


[2m[36m(launch_and_fit pid=5525)[0m [Client 3] fit, config: {}
[2m[36m(launch_and_evaluate pid=5525)[0m [Client 3] fit, config: {}
[2m[36m(launch_and_fit pid=5525)[0m [Client 0] fit, config: {}[32m [repeated 2x across cluster][0m
[2m[36m(launch_and_fit pid=5525)[0m [Client 4] fit, config: {}[32m [repeated 2x across cluster][0m


DEBUG flwr 2023-06-02 15:02:39,394 | server.py:232 | fit_round 3 received 5 results and 0 failures
DEBUG:flwr:fit_round 3 received 5 results and 0 failures
DEBUG flwr 2023-06-02 15:02:39,414 | server.py:168 | evaluate_round 3: strategy sampled 2 clients (out of 5)
DEBUG:flwr:evaluate_round 3: strategy sampled 2 clients (out of 5)


[2m[36m(launch_and_evaluate pid=5525)[0m [Client 0] evaluate, config: {}


DEBUG flwr 2023-06-02 15:02:43,665 | server.py:182 | evaluate_round 3 received 2 results and 0 failures
DEBUG:flwr:evaluate_round 3 received 2 results and 0 failures
DEBUG flwr 2023-06-02 15:02:43,669 | server.py:218 | fit_round 4: strategy sampled 5 clients (out of 5)
DEBUG:flwr:fit_round 4: strategy sampled 5 clients (out of 5)


[2m[36m(launch_and_fit pid=5525)[0m [Client 2] fit, config: {}
[2m[36m(launch_and_evaluate pid=5526)[0m [Client 2] fit, config: {}
[2m[36m(launch_and_fit pid=5526)[0m [Client 4] fit, config: {}[32m [repeated 2x across cluster][0m
[2m[36m(launch_and_fit pid=5525)[0m [Client 3] fit, config: {}[32m [repeated 2x across cluster][0m


DEBUG flwr 2023-06-02 15:03:23,353 | server.py:232 | fit_round 4 received 5 results and 0 failures
DEBUG:flwr:fit_round 4 received 5 results and 0 failures
DEBUG flwr 2023-06-02 15:03:23,387 | server.py:168 | evaluate_round 4: strategy sampled 2 clients (out of 5)
DEBUG:flwr:evaluate_round 4: strategy sampled 2 clients (out of 5)


[2m[36m(launch_and_evaluate pid=5525)[0m [Client 3] evaluate, config: {}


DEBUG flwr 2023-06-02 15:03:28,348 | server.py:182 | evaluate_round 4 received 2 results and 0 failures
DEBUG:flwr:evaluate_round 4 received 2 results and 0 failures
DEBUG flwr 2023-06-02 15:03:28,354 | server.py:218 | fit_round 5: strategy sampled 5 clients (out of 5)
DEBUG:flwr:fit_round 5: strategy sampled 5 clients (out of 5)


[2m[36m(launch_and_fit pid=5526)[0m [Client 2] fit, config: {}
[2m[36m(launch_and_evaluate pid=5526)[0m [Client 2] fit, config: {}
[2m[36m(launch_and_fit pid=5526)[0m [Client 0] fit, config: {}[32m [repeated 2x across cluster][0m
[2m[36m(launch_and_fit pid=5526)[0m [Client 1] fit, config: {}[32m [repeated 2x across cluster][0m


DEBUG flwr 2023-06-02 15:04:06,288 | server.py:232 | fit_round 5 received 5 results and 0 failures
DEBUG:flwr:fit_round 5 received 5 results and 0 failures
DEBUG flwr 2023-06-02 15:04:06,314 | server.py:168 | evaluate_round 5: strategy sampled 2 clients (out of 5)
DEBUG:flwr:evaluate_round 5: strategy sampled 2 clients (out of 5)


[2m[36m(launch_and_evaluate pid=5525)[0m [Client 1] evaluate, config: {}


DEBUG flwr 2023-06-02 15:04:10,712 | server.py:182 | evaluate_round 5 received 2 results and 0 failures
DEBUG:flwr:evaluate_round 5 received 2 results and 0 failures
INFO flwr 2023-06-02 15:04:10,718 | server.py:147 | FL finished in 219.3627942469999
INFO:flwr:FL finished in 219.3627942469999
INFO flwr 2023-06-02 15:04:10,724 | app.py:218 | app_fit: losses_distributed [(1, 0.03073211431503296), (2, 0.026730689287185666), (3, 0.024968858659267425), (4, 0.024011496186256406), (5, 0.023463268995285034)]
INFO:flwr:app_fit: losses_distributed [(1, 0.03073211431503296), (2, 0.026730689287185666), (3, 0.024968858659267425), (4, 0.024011496186256406), (5, 0.023463268995285034)]
INFO flwr 2023-06-02 15:04:10,727 | app.py:219 | app_fit: metrics_distributed_fit {}
INFO:flwr:app_fit: metrics_distributed_fit {}
INFO flwr 2023-06-02 15:04:10,732 | app.py:220 | app_fit: metrics_distributed {'accuracy': [(1, 0.3035), (2, 0.39249999999999996), (3, 0.429), (4, 0.4585), (5, 0.479)]}
INFO:flwr:app_fit: me

History (loss, distributed):
	round 1: 0.03073211431503296
	round 2: 0.026730689287185666
	round 3: 0.024968858659267425
	round 4: 0.024011496186256406
	round 5: 0.023463268995285034
History (metrics, distributed, evaluate):
{'accuracy': [(1, 0.3035), (2, 0.39249999999999996), (3, 0.429), (4, 0.4585), (5, 0.479)]}