In [1]:
# https://www.kaggle.com/code/kaanboke/beginner-friendly-end-to-end-ml-project-enjoy

import warnings

import flwr as fl

import logging

logger = logging.getLogger('flwr')

CLIENT_INDEX = 0

  from .autonotebook import tqdm as notebook_tqdm
2024-08-08 07:28:03,137	INFO util.py:154 -- Missing packages: ['ipywidgets']. Run `pip install -U ipywidgets`, then restart the notebook server for rich notebook output.


In [13]:
from datasets import load_dataset
from sklearn.compose import ColumnTransformer
import numpy as np
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import OneHotEncoder, PowerTransformer
from sklearn.model_selection import train_test_split

def load_data ():
    data_files = [f"./stroke-prediction-dataset/healthcare-dataset-stroke-data-{CLIENT_INDEX}.csv"]
    dataset = load_dataset("csv", data_files=data_files)
    df = dataset['train'].to_pandas()
    
    y= df['stroke']
    X = df.drop('stroke', axis=1)

    
    categorical = [ 'hypertension', 'heart_disease', 'ever_married','work_type', 'Residence_type', 'smoking_status']
    numerical = ['avg_glucose_level', 'bmi','age']
    

    
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42, stratify=y)

    transformer = ColumnTransformer(
        transformers=[
            ('imp', SimpleImputer(strategy='median'), numerical),
            ('ohe', OneHotEncoder(), categorical)
        ]
    )

    unique, counts = np.unique(y, return_counts=True)
    class_distribution = dict(zip(unique, counts))
    print("Class distribution in original data:", class_distribution)

    # Apply the ColumnTransformer to the training data
    X_train_transformed = transformer.fit_transform(X_train)
    X_test_transformed = transformer.transform(X_test)

    # Apply the PowerTransformer to the numerical features in the training data
    power_transformer = PowerTransformer(method='yeo-johnson', standardize=True)
    X_train_transformed[:, :len(numerical)] = power_transformer.fit_transform(X_train_transformed[:, :len(numerical)])
    X_test_transformed[:, :len(numerical)] = power_transformer.transform(X_test_transformed[:, :len(numerical)])

    return X_train_transformed, y_train, X_test_transformed, y_test

In [14]:
import numpy as np
from sklearn.linear_model import LogisticRegression

from flwr.common import NDArrays


def get_model_parameters(model: LogisticRegression) -> NDArrays:
    """Returns the parameters of a sklearn LogisticRegression model."""
    if model.fit_intercept:
        params = [
            model.coef_,
            model.intercept_,
        ]
    else:
        params = [
            model.coef_,
        ]
    return params


def set_model_params(model: LogisticRegression, params: NDArrays) -> LogisticRegression:
    """Sets the parameters of a sklean LogisticRegression model."""
    model.coef_ = params[0]
    if model.fit_intercept:
        model.intercept_ = params[1]
    return model


def set_initial_params(model: LogisticRegression):
    """Sets initial parameters as zeros Required since model params are uninitialized
    until model.fit is called.

    But server asks for initial parameters from clients at launch. Refer to
    sklearn.linear_model.LogisticRegression documentation for more information.
    """
    n_classes = 2  # Number of classes in dataset
    n_features = 20  # Number of features in dataset
    model.classes_ = np.array([i for i in range(n_classes)])

    model.coef_ = np.zeros((n_classes, n_features))
    if model.fit_intercept:
        model.intercept_ = np.zeros((n_classes,))

In [15]:
model = LogisticRegression(
    penalty="l2",
    solver="liblinear",
    max_iter=1,  # local epoch
    warm_start=True,  # prevent refreshing weights when fitting
)

set_initial_params(model)
X_train, y_train, X_test, y_test = load_data()

Class distribution in original data: {0: 1175, 1: 52}


In [16]:
from sklearn.metrics import log_loss

class SklearnClient(fl.client.NumPyClient):
    def get_parameters(self, config):  # type: ignore
        return get_model_parameters(model)

    def fit(self, parameters, config):  # type: ignore
        set_model_params(model, parameters)
        with warnings.catch_warnings():
            warnings.simplefilter("ignore")
            model.fit(X_train, y_train)
        return get_model_parameters(model), len(X_train), {}

    def evaluate(self, parameters, config):  # type: ignore
        set_model_params(model, parameters)
        loss = log_loss(y_test, model.predict_proba(X_test))
        accuracy = model.score(X_test, y_test)
        return loss, len(X_test), {"accuracy": accuracy}

In [18]:
fl.client.start_client(server_address="20.198.223.216:8000", client=SklearnClient().to_client())

[92mINFO [0m:      
[92mINFO [0m:      Received: get_parameters message 295d7c88-29ec-4003-9b9c-4eb700c99166
[92mINFO [0m:      Sent reply
[92mINFO [0m:      
[92mINFO [0m:      Received: train message 10116a22-4efb-45dd-8d0d-ed409ec205e2
[92mINFO [0m:      Sent reply
[92mINFO [0m:      
[92mINFO [0m:      Received: evaluate message 447f04b1-65a0-431e-af9d-225bde2d7376
[92mINFO [0m:      Sent reply
[92mINFO [0m:      
[92mINFO [0m:      Received: train message 4804aa61-758a-4f02-a314-7c5a0e96b33d
[92mINFO [0m:      Sent reply
[92mINFO [0m:      
[92mINFO [0m:      Received: evaluate message 52acb29c-e1e1-44fb-b3fd-5b650fe9cf6e
[92mINFO [0m:      Sent reply
[92mINFO [0m:      
[92mINFO [0m:      Received: train message 101be2cd-cd17-42ec-ac0e-9a99e9c82b09
[92mINFO [0m:      Sent reply
[92mINFO [0m:      
[92mINFO [0m:      Received: evaluate message fa392130-2a1a-405e-b3f3-5c1d8579af92
[92mINFO [0m:      Sent reply
[92mINFO [0m:      
[92mINF