# **USE CASE 1.** Image classification in Flower

## Required libraries and configuration

In [None]:
pip install flwr

Collecting flwr
  Downloading flwr-1.18.0-py3-none-any.whl.metadata (15 kB)
Collecting cryptography<45.0.0,>=44.0.1 (from flwr)
  Downloading cryptography-44.0.3-cp39-abi3-manylinux_2_34_x86_64.whl.metadata (5.7 kB)
Collecting iterators<0.0.3,>=0.0.2 (from flwr)
  Downloading iterators-0.0.2-py3-none-any.whl.metadata (2.5 kB)
Collecting pathspec<0.13.0,>=0.12.1 (from flwr)
  Downloading pathspec-0.12.1-py3-none-any.whl.metadata (21 kB)
Collecting protobuf<5.0.0,>=4.21.6 (from flwr)
  Downloading protobuf-4.25.7-cp37-abi3-manylinux2014_x86_64.whl.metadata (541 bytes)
Collecting pycryptodome<4.0.0,>=3.18.0 (from flwr)
  Downloading pycryptodome-3.23.0-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.4 kB)
Collecting tomli<3.0.0,>=2.0.1 (from flwr)
  Downloading tomli-2.2.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (11 kB)
Collecting tomli-w<2.0.0,>=1.0.0 (from flwr)
  Downloading tomli_w-1.2.0-py3-none-any.whl.metadata (5.7 kB)
Collecting typ

Import required libraries

In [None]:
import random

from collections import OrderedDict
from typing import List, Tuple

import flwr as fl
import numpy as np
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torchvision
import torch.nn.functional as F
import torchvision.transforms as transforms
from flwr.common import Metrics
from torch.utils.data import DataLoader, random_split, Subset
from torchvision.datasets import MNIST

DEVICE = torch.device("cpu")  # Try "cuda" to train on GPU

Define some parameters for the simulation, such as the number of clients in the federated scenario, the number of federated rounds, the number of epochs of each client before communicating, and the batch size for training phase

In [None]:
# Some parameters
NUM_CLIENTS = 10 # Number of clients in the federated scenario
NUM_ROUNDS = 10 # Number of learning rounds in the federated computation
NUM_EPOCHS = 5 # Number of epochs that the local dataset is seen each round
BATCH_SIZE = 20 # Batch size for training phase

# Define the seed for random numbers
seed = 10
np.random.seed(seed)
torch.manual_seed(seed)
random.seed(seed)

## Loading and preparing the input data

Load the MNIST dataset from torchvision. Later, split evenly and randomly the available training and testing data among the clients.

In [None]:
# Download and transform MNIST (train and test)
mnist_train = MNIST("./dataset", train=True, download=True, transform=transforms.ToTensor())
mnist_test = MNIST("./dataset", train=False, download=True, transform=transforms.ToTensor())

# For simulation purposes, we select a subset (10%) of the original data
# mnist_train = Subset(mnist_train, list(range(len(mnist_train)//10)))
# mnist_test = Subset(mnist_test, list(range(len(mnist_test)//10)))

# Split training and testing sets into NUM_CLIENTS partitions to simulate the individual datasets
train_lengths = [len(mnist_train) // NUM_CLIENTS] * NUM_CLIENTS
test_lengths = [len(mnist_test) // NUM_CLIENTS] * NUM_CLIENTS
train_splits = random_split(mnist_train, train_lengths, torch.Generator().manual_seed(seed))
test_splits = random_split(mnist_test, test_lengths, torch.Generator().manual_seed(seed))

# Create DataLoaders for each client
train_data = []
test_data = []
for i in range(NUM_CLIENTS):
    train_data.append(DataLoader(train_splits[i], batch_size=BATCH_SIZE, shuffle=True))
    test_data.append(DataLoader(test_splits[i], batch_size=BATCH_SIZE))

100%|██████████| 9.91M/9.91M [00:00<00:00, 18.1MB/s]
100%|██████████| 28.9k/28.9k [00:00<00:00, 490kB/s]
100%|██████████| 1.65M/1.65M [00:00<00:00, 3.86MB/s]
100%|██████████| 4.54k/4.54k [00:00<00:00, 8.52MB/s]


## Create a Deep Learning model

For a fair comparison with the rest of frameworks, here we propose two different network architectures: one with a CNN layer, which are widely used for image classification, and another one with only dense layers.

Although these architectures are used here, note that any other network architecture supported by pytorch can be used.

In [None]:
# Define network with a CNN
class CNN_Net(nn.Module):
    def __init__(self) -> None:
        super(CNN_Net, self).__init__()
        self.cnn1 = nn.Conv2d(in_channels=1, out_channels=32, kernel_size=5, stride=1, padding=2)
        self.relu1 = nn.ReLU()
        self.maxpool1 = nn.MaxPool2d(kernel_size=2)
        self.fc1 = nn.Linear(32 * 14 * 14, 10)


    def forward(self, x: torch.Tensor) -> torch.Tensor:
        out = self.cnn1(x)
        out = self.relu1(out)
        out = self.maxpool1(out)
        out = out.view(out.size(0), -1)
        out = self.fc1(out)
        return out

# Define network with only dense/linear layers
class Dense_Net(nn.Module):
    def __init__(self) -> None:
        super(Dense_Net, self).__init__()
        self.fc1 = nn.Linear(784, 32)
        self.fc2 = nn.Linear(32, 10)

    def forward(self, x):
        # make sure input tensor is flattened
        x = x.view(x.shape[0], -1)

        # The Relu and softmax layers may be used in forward method without defining in __init__
        x = F.relu(self.fc1(x))
        x = F.log_softmax(self.fc2(x), dim=1)

        return x

Define the methods for training and evaluating the model in each local client. This methods receive the network to use.

In [None]:
def train(net, trainloader, epochs: int, verbose=True):
    # Indicate the loss and optimizer to use
    criterion = torch.nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(net.parameters())
    net.train()

    # Train each epoch with local data
    for epoch in range(epochs):
        correct, total, epoch_loss = 0, 0, 0.0
        for images, labels in trainloader:
            images, labels = images.to(DEVICE), labels.to(DEVICE)
            optimizer.zero_grad()
            outputs = net(images)
            loss = criterion(net(images), labels)
            loss.backward()
            optimizer.step()

            # Metrics
            epoch_loss += loss
            total += labels.size(0)
            correct += (torch.max(outputs.data, 1)[1] == labels).sum().item()

        epoch_loss /= len(trainloader.dataset)
        epoch_acc = correct / total

    if verbose:
        print(f"Train loss {epoch_loss}, accuracy {epoch_acc}")

def test(net, testloader):
    criterion = torch.nn.CrossEntropyLoss()
    correct, total, loss = 0, 0, 0.0
    net.eval()

    with torch.no_grad():
        for images, labels in testloader:
            images, labels = images.to(DEVICE), labels.to(DEVICE)
            outputs = net(images)
            loss += criterion(outputs, labels).item()
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    loss /= len(testloader.dataset)
    accuracy = correct / total

    return loss, accuracy

## Training in the federated scenario

First, we create a FlowerClient class, that includes the information of each simulated client. The class has three methods:
 * `get_parameters`: Get the parameters of the model to send them to the server
 * `fit`: Reveives the model parameters from the server, trains it with local data, and return the updated model parameters to the server
 * `evaluate`: Receives the model from the server and evaluates it with local data

In [None]:
def get_parameters(net) -> List[np.ndarray]:
    return [val.cpu().numpy() for _, val in net.state_dict().items()]

def set_parameters(net, parameters: List[np.ndarray]):
    params_dict = zip(net.state_dict().keys(), parameters)
    state_dict = OrderedDict({k: torch.Tensor(v) for k, v in params_dict})
    net.load_state_dict(state_dict, strict=True)

In [None]:
class FlowerClient(fl.client.NumPyClient):
    def __init__(self, net, trainloader, testloader=None):
        self.net = net
        self.trainloader = trainloader
        if testloader is None:
            print('Train data will be used as test data too.')
            self.testloader = trainloader
        else:
            self.testloader = testloader

    def get_parameters(self, config):
        return get_parameters(self.net)

    def fit(self, parameters, config):
        set_parameters(self.net, parameters)
        train(self.net, self.trainloader, epochs=NUM_EPOCHS)
        return get_parameters(self.net), len(self.trainloader), {}

    def evaluate(self, parameters, config):
        set_parameters(self.net, parameters)
        loss, accuracy = test(self.net, self.testloader)
        return float(loss), len(self.testloader), {"accuracy": float(accuracy)}

To simulate the federated scenario in a single machine, the client_fn method allows to create FlowerClients on demand, given the client id.

Note that each client is passed both training and testing local data, so the evaluation over test data is done during the simulation itself.

In [None]:
def client_fn(cid: str) -> FlowerClient:
    # Load model
    net = CNN_Net().to(DEVICE)

    # Note: each client gets a different train/test data
    trainloader = train_data[int(cid)]
    testloader = test_data[int(cid)]

    # Create a  single Flower client representing a single organization
    return FlowerClient(net, trainloader, testloader)

In order to show averaged evaluations metrics beyond loss, we should define a method to do that; in this case, the accuracy is weighted averaged.

In [None]:
def weighted_average(metrics: List[Tuple[int, Metrics]]) -> Metrics:
    # Multiply accuracy of each client by number of examples used
    accuracies = [num_examples * m["accuracy"] for num_examples, m in metrics]
    examples = [num_examples for num_examples, _ in metrics]

    # Aggregate and return custom metric (weighted average)
    return {"accuracy": sum(accuracies) / sum(examples)}

Train with weighted FedAvg algorithm.

Then, start the simulation indicating the method to create clients, the number of clients in the simulation, the number of rounds, and the strategy (i.e., the FedAvg strategy to combine local updates). The simulation covers both the federated model training as well as evaluating the model with each local test data.

In [None]:
!pip install -U "flwr[simulation]"

Collecting ray==2.31.0 (from flwr[simulation])
  Downloading ray-2.31.0-cp311-cp311-manylinux2014_x86_64.whl.metadata (13 kB)
Downloading ray-2.31.0-cp311-cp311-manylinux2014_x86_64.whl (66.7 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m66.7/66.7 MB[0m [31m11.8 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: ray
Successfully installed ray-2.31.0


In [None]:
!pip install ray



In [None]:
!pip show flwr ray

Name: flwr
Version: 1.18.0
Summary: Flower: A Friendly Federated AI Framework
Home-page: https://flower.ai
Author: The Flower Authors
Author-email: hello@flower.ai
License: Apache-2.0
Location: /usr/local/lib/python3.11/dist-packages
Requires: cryptography, grpcio, iterators, numpy, pathspec, protobuf, pycryptodome, pyyaml, requests, rich, tomli, tomli-w, typer
Required-by: 
---
Name: ray
Version: 2.31.0
Summary: Ray provides a simple, universal API for building distributed applications.
Home-page: https://github.com/ray-project/ray
Author: Ray Team
Author-email: ray-dev@googlegroups.com
License: Apache 2.0
Location: /usr/local/lib/python3.11/dist-packages
Requires: aiosignal, click, filelock, frozenlist, jsonschema, msgpack, packaging, protobuf, pyyaml, requests
Required-by: 


In [None]:
import flwr as fl
from flwr.server.strategy import FedAvg
from flwr.common import Metrics

In [None]:
import flwr as fl
from flwr.server.strategy import FedAvg
from flwr.common import Metrics

# Configuration
NUM_CLIENTS = 2
NUM_ROUNDS = 3

# 1. Define configuration functions
def fit_config(server_round: int):
    """Return training configuration dict for each round."""
    return {
        "server_round": server_round,
        "local_epochs": 1,  # Number of local epochs
    }

def evaluate_config(server_round: int):
    """Return evaluation configuration dict for each round."""
    return {
        "server_round": server_round,
    }

# 2. Define metric aggregation function
def weighted_average(metrics: list[tuple[int, Metrics]]) -> Metrics:
    """Aggregate metrics by multiplying accuracy by number of examples."""
    # Multiply accuracy of each client by number of examples used
    accuracies = [num_examples * m["accuracy"] for num_examples, m in metrics]
    examples = [num_examples for num_examples, _ in metrics]

    # Aggregate and return custom metric (weighted average)
    return {"accuracy": sum(accuracies) / sum(examples)}

# 3. Define client function
def client_fn(cid: str):
    """Create and return a simple Flower client."""
    from flwr.client import NumPyClient
    import numpy as np

    class SimpleClient(NumPyClient):
        def get_parameters(self, config):
            return [np.zeros(1)]  # Dummy parameters

        def fit(self, parameters, config):
            print(f"Client {cid} training for round {config['server_round']}")
            return [np.ones(1)], 1, {"accuracy": 0.5}  # Dummy results

        def evaluate(self, parameters, config):
            return 0.5, 1, {"accuracy": 0.5}  # Dummy evaluation

    return SimpleClient()

# 4. Define and run simulation
strategy = FedAvg(
    min_fit_clients=NUM_CLIENTS,
    min_evaluate_clients=NUM_CLIENTS,
    min_available_clients=NUM_CLIENTS,
    on_fit_config_fn=fit_config,
    on_evaluate_config_fn=evaluate_config,
    evaluate_metrics_aggregation_fn=weighted_average,
)

fl.simulation.start_simulation(
    client_fn=client_fn,
    num_clients=NUM_CLIENTS,
    config=fl.server.ServerConfig(num_rounds=NUM_ROUNDS),
    strategy=strategy,
)

	Instead, use the `flwr run` CLI command to start a local simulation in your Flower app, as shown for example below:

		$ flwr new  # Create a new Flower app from a template

		$ flwr run  # Run the Flower app in Simulation Mode

	Using `start_simulation()` is deprecated.

            This is a deprecated feature. It will be removed
            entirely in future versions of Flower.
        
	Instead, use the `flwr run` CLI command to start a local simulation in your Flower app, as shown for example below:

		$ flwr new  # Create a new Flower app from a template

		$ flwr run  # Run the Flower app in Simulation Mode

	Using `start_simulation()` is deprecated.

            This is a deprecated feature. It will be removed
            entirely in future versions of Flower.
        
[92mINFO [0m:      Starting Flower simulation, config: num_rounds=3, no round_timeout
2025-05-26 06:51:38,315	INFO worker.py:1771 -- Started a local Ray instance.
[92mINFO [0m:      Flower VCE: Ray initiali

[36m(ClientAppActor pid=4790)[0m Client 0 training for round 1


History (loss, distributed):
	round 1: 0.5
	round 2: 0.5
	round 3: 0.5
History (metrics, distributed, evaluate):
{'accuracy': [(1, 0.5), (2, 0.5), (3, 0.5)]}

In [None]:
# Create FedAvg strategy, indicating the metric aggregation function
strategy = fl.server.strategy.FedAvg(
    evaluate_metrics_aggregation_fn=weighted_average
)

# Start simulation
fl_sim = fl.simulation.start_simulation(
    client_fn=client_fn,
    num_clients=NUM_CLIENTS,
    config=fl.server.ServerConfig(num_rounds=NUM_ROUNDS),
    strategy=strategy,
)

	Instead, use the `flwr run` CLI command to start a local simulation in your Flower app, as shown for example below:

		$ flwr new  # Create a new Flower app from a template

		$ flwr run  # Run the Flower app in Simulation Mode

	Using `start_simulation()` is deprecated.

            This is a deprecated feature. It will be removed
            entirely in future versions of Flower.
        
[92mINFO [0m:      Starting Flower simulation, config: num_rounds=3, no round_timeout
2025-05-26 06:51:56,696	INFO worker.py:1771 -- Started a local Ray instance.
[92mINFO [0m:      Flower VCE: Ray initialized with resources: {'node:__internal_head__': 1.0, 'CPU': 2.0, 'memory': 7957964391.0, 'node:172.28.0.12': 1.0, 'object_store_memory': 3978982195.0, 'accelerator_type:T4': 1.0, 'GPU': 1.0}
[92mINFO [0m:      Optimize your simulation with Flower VCE: https://flower.ai/docs/framework/how-to-run-simulations.html
[92mINFO [0m:      No `client_resources` specified. Using minimal resources for

## Evaluation with test data

The evaluation has been done during the simulation. Following, we show the averaged results over test data.
The result of the simulation includes the results on all rounds, so we retrieve those of the last round.

In [None]:
print('Test data, \t Loss={:.4f}, \t Accuracy={:.4f}'.format(fl_sim.losses_distributed[-1][1], fl_sim.metrics_distributed['accuracy'][-1][1]))

Test data, 	 Loss=0.5000, 	 Accuracy=0.5000


In [None]:
fl_sim

History (loss, distributed):
	round 1: 0.5
	round 2: 0.5
	round 3: 0.5
History (metrics, distributed, evaluate):
{'accuracy': [(1, 0.5), (2, 0.5), (3, 0.5)]}