In [1]:
from sklearn.datasets import load_breast_cancer
import numpy as np
from typing import Tuple, Union, List

random_state = 88844
rng = np.random.default_rng(seed=random_state)

data = load_breast_cancer()
X, y = data["data"], data["target"]
feature_names = data["feature_names"]
target_names = data["target_names"]

print(X.shape, y.shape)
print("Target classes:", target_names)


(569, 30) (569,)
Target classes: ['malignant' 'benign']


# Baseline model

In [2]:
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.pipeline import make_pipeline
from sklearn.metrics import classification_report

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=random_state
)
print(f"X_train size: {X_train.shape}")
print(f"X_test size: {X_test.shape}")

pipe = make_pipeline(StandardScaler(), LogisticRegression())

pipe.fit(X_train, y_train)

y_test_preds = pipe.predict(X_test)
print(classification_report(y_test, y_test_preds))


X_train size: (455, 30)
X_test size: (114, 30)
              precision    recall  f1-score   support

           0       0.98      0.96      0.97        45
           1       0.97      0.99      0.98        69

    accuracy                           0.97       114
   macro avg       0.97      0.97      0.97       114
weighted avg       0.97      0.97      0.97       114



# Implementing the same model using `flower`

## Ingredients:
1. Split the data into $n$ datasets, each representing a different location where the data lives
2. Define functions for:
    - Getting model parameters
    - Setting model parameters
3. Define `FlowerClient` 
    - Should contain the following:
        - `get_parameters`
        - `fit`
        - `evaluate`
4. Set Federated learning strategy
6. Simulate!

## 1. Data setup
Since we have ~450 training instances, let's say we have $n=5$ different hospitals where the breast cancer data lives

In [3]:
def split_dataset(
    X: np.array, y: np.array, n_clients: int = 5
) -> List[Tuple[np.array, np.array]]:
    n_clients = 5
    partition_sizes = len(X) // n_clients

    partition_indices = rng.choice(
        len(X), size=(n_clients, partition_sizes), replace=False
    )
    sets = [(X[pidx], y[pidx]) for pidx in partition_indices]
    return sets


train_sets = split_dataset(X_train, y_train)
train_sets[0][0].shape, train_sets[0][1].shape  # Number of samples for our first client


((91, 30), (91,))

We'll have a single test set shared across all clients

In [4]:
test_set = (X_test, y_test)

## 2. Fetching/reloading model parameters
Taken from the `flower` [sklearn example](https://github.com/adap/flower/blob/main/examples/sklearn-logreg-mnist/utils.py)

In [5]:
XY = Tuple[np.ndarray, np.ndarray]
LogRegParams = Union[XY, Tuple[np.ndarray]]


def get_model_parameters(model: LogisticRegression) -> LogRegParams:
    """Returns the paramters of a sklearn LogisticRegression model."""
    if model.fit_intercept:
        params = [
            model.coef_,
            model.intercept_,
        ]
    else:
        params = [
            model.coef_,
        ]
    return params


def set_model_params(
    model: LogisticRegression, params: LogRegParams
) -> LogisticRegression:
    """Sets the parameters of a sklean LogisticRegression model."""
    model.coef_ = params[0]
    if model.fit_intercept:
        model.intercept_ = params[1]
    return model


## 3. Define FlowerClient

In [6]:
import warnings
import flwr as fl
from sklearn.metrics import log_loss

class BreastCancerClient(fl.client.NumPyClient):
    def __init__(self, model, train_set, test_set):
        self.pipe = pipe
        self.clf = self.pipe.steps[-1][0]
        self.X_train, self.y_train = train_set
        self.X_test, self.y_test = test_set

    def get_parameters(self, config):
        return get_model_parameters(self.pipe)

    def fit(self, parameters, config):
        set_model_params(self.pipe, parameters)
        with warnings.catch_warnings():
            warnings.simplefilter("ignore")
            self.pip.fit(self.X_train, self.y_train)

        print(f"Training finished for round {config['server_round']}")
        return get_model_parameters(self.pipe), len(self.X_train), {}

    def evaluate(self, parameters, config):  # type: ignore
        set_model_params(self.pipe, parameters)
        loss = log_loss(self.y_test, self.clf.predict_proba(self.X_test))
        accuracy = self.clf.score(self.X_test, self.y_test)
        return loss, len(self.X_test), {"accuracy": accuracy}

def launch_client(client_id, train_sets=train_sets, test_set=test_set):
    model = make_pipeline(StandardScaler(), LogisticRegression(max_iter=1, warm_start=True))
    train_set = train_sets[client_id]
    return BreastCancerClient(model, train_set, test_set)

## 5. Set Federated learning strategy


In [7]:
help(fl.server.strategy.FedAvg)

Help on class FedAvg in module flwr.server.strategy.fedavg:

class FedAvg(flwr.server.strategy.strategy.Strategy)
 |  FedAvg(*, fraction_fit: float = 1.0, fraction_evaluate: float = 1.0, min_fit_clients: int = 2, min_evaluate_clients: int = 2, min_available_clients: int = 2, evaluate_fn: Optional[Callable[[int, List[numpy.ndarray[Any, numpy.dtype[Any]]], Dict[str, Union[bool, bytes, float, int, str]]], Optional[Tuple[float, Dict[str, Union[bool, bytes, float, int, str]]]]]] = None, on_fit_config_fn: Optional[Callable[[int], Dict[str, Union[bool, bytes, float, int, str]]]] = None, on_evaluate_config_fn: Optional[Callable[[int], Dict[str, Union[bool, bytes, float, int, str]]]] = None, accept_failures: bool = True, initial_parameters: Optional[flwr.common.Parameters] = None, fit_metrics_aggregation_fn: Optional[Callable[[List[Tuple[int, Dict[str, Union[bool, bytes, float, int, str]]]]], Dict[str, Union[bool, bytes, float, int, str]]]] = None, evaluate_metrics_aggregation_fn: Optional[Call

In [8]:
fl.__version__

'1.4.0'

In [9]:
strategy = fl.server.strategy.FedAvg(
    fraction_fit=1.0,  # Sample 100% of available clients for training
    fraction_evaluate=0.5,  # Sample 50% of available clients for evaluation
    min_fit_clients=5,
    min_evaluate_clients=1,  # Never sample less than 5 clients for evaluation
    min_available_clients=5,  # Wait until all 10 clients are available
)

## 6. Simulate!

In [10]:

fl.simulation.start_simulation(
    client_fn=launch_client,
    num_clients=5,
    config=fl.server.ServerConfig(num_rounds=5),
    strategy=strategy,
    client_resources=None,
)

INFO flwr 2023-04-26 22:44:12,633 | app.py:146 | Starting Flower simulation, config: ServerConfig(num_rounds=5, round_timeout=None)
2023-04-26 22:44:15,517	INFO worker.py:1625 -- Started a local Ray instance.
INFO flwr 2023-04-26 22:44:18,946 | app.py:180 | Flower VCE: Ray initialized with resources: {'GPU': 1.0, 'memory': 31434751182.0, 'CPU': 24.0, 'node:127.0.0.1': 1.0, 'object_store_memory': 15717375590.0}
INFO flwr 2023-04-26 22:44:18,947 | server.py:86 | Initializing global parameters
INFO flwr 2023-04-26 22:44:18,948 | server.py:273 | Requesting initial parameters from one random client
ERROR flwr 2023-04-26 22:44:19,881 | ray_client_proxy.py:72 | [36mray::launch_and_get_parameters()[39m (pid=23912, ip=127.0.0.1)
  File "python\ray\_raylet.pyx", line 877, in ray._raylet.execute_task
  File "python\ray\_raylet.pyx", line 881, in ray._raylet.execute_task
  File "c:\Users\gamin\anaconda3\envs\fed_learn\lib\site-packages\flwr\simulation\ray_transport\ray_client_proxy.py", line 135

RayTaskError(TypeError): [36mray::launch_and_get_parameters()[39m (pid=23912, ip=127.0.0.1)
  File "python\ray\_raylet.pyx", line 877, in ray._raylet.execute_task
  File "python\ray\_raylet.pyx", line 881, in ray._raylet.execute_task
  File "c:\Users\gamin\anaconda3\envs\fed_learn\lib\site-packages\flwr\simulation\ray_transport\ray_client_proxy.py", line 135, in launch_and_get_parameters
    client: Client = _create_client(client_fn, cid)
  File "c:\Users\gamin\anaconda3\envs\fed_learn\lib\site-packages\flwr\simulation\ray_transport\ray_client_proxy.py", line 168, in _create_client
    client_like: ClientLike = client_fn(cid)
  File "C:\Users\gamin\AppData\Local\Temp\ipykernel_18672\1446647322.py", line 32, in launch_client
TypeError: list indices must be integers or slices, not str