In [1]:
from typing import Dict, List, Tuple

import tensorflow as tf

import flwr as fl
from flwr.common import Metrics
from flwr.simulation.ray_transport.utils import enable_tf_gpu_growth

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split

In [2]:
VERBOSE = 0
NUM_CLIENTS = 30

# folder dataset
PATH_TRAIN_X = "../UCI_HAR_Dataset/train/X_train.txt"
PATH_TRAIN_Y = "../UCI_HAR_Dataset/train/y_train.txt"

PATH_TEST_X = "../UCI_HAR_Dataset/test/X_test.txt"
PATH_TEST_Y = "../UCI_HAR_Dataset/test/y_test.txt"

PATH_TRAIN_SBJ = "../UCI_HAR_Dataset/train/subject_train.txt"
PATH_TEST_SBJ = "../UCI_HAR_Dataset/test/subject_test.txt"
PATH_FT = "../UCI_HAR_Dataset/features.txt"

features = pd.read_csv(PATH_FT, sep=" ", header=None, index_col=0).reset_index()

# training X
df_x_train = pd.read_fwf(PATH_TRAIN_X, header=None)
df_x_train.rename(columns=features[1], inplace=True)

# activity subject (train)
df_sbj_train = pd.read_csv(PATH_TRAIN_SBJ, sep=" ", header=None)
df_x_train['user'] = df_sbj_train.values
arr_sbj_train = (df_sbj_train.iloc[:,0]).to_list()
# training Y
y_train_col = pd.read_fwf(PATH_TRAIN_Y, header=None)

# testing X
df_x_test = pd.read_fwf(PATH_TEST_X, header=None)
df_x_test.rename(columns=features[1], inplace=True)
# activity subject (test)
df_sbj_test = pd.read_csv(PATH_TEST_SBJ, sep=" ", header=None)
df_x_test['user'] = df_sbj_test.values
arr_sbj_test = (df_sbj_test.iloc[:,0]).to_list()
# testing Y
y_test_col = pd.read_fwf(PATH_TEST_Y, header=None)

In [3]:
def get_model():
    # model definition
    model = tf.keras.models.Sequential([
        tf.keras.layers.Dense(512, activation='relu', input_shape=(features.shape[0],)),
        tf.keras.layers.Dropout(0.5),
        tf.keras.layers.Dense(256, activation='relu'),
        tf.keras.layers.Dropout(0.5),
        tf.keras.layers.Dense(128, activation='relu'),
        tf.keras.layers.Dense(7, activation='softmax')
    ])
    model.compile("adam", "sparse_categorical_crossentropy", metrics=["accuracy"])
    return model

In [4]:
class FlowerClient(fl.client.NumPyClient):
    def __init__(self, X_train, X_test, y_train, y_test) -> None:
        super().__init__()
        # Create model
        self.model = get_model()
        self.X_train = X_train
        self.y_train = y_train
        self.X_test = X_test
        self.y_test = y_test

    def get_parameters(self, config):
        return self.model.get_weights()

    def fit(self, parameters, config):
        self.model.set_weights(parameters)
        self.model.fit(self.X_train, self.y_train, epochs=3, batch_size=32, verbose=VERBOSE)
        return self.model.get_weights(), len(self.X_train), {}

    def evaluate(self, parameters, config):
        self.model.set_weights(parameters)
        loss, accuracy = self.model.evaluate(self.X_test, self.y_test)
        return loss, len(self.X_test), {"accuracy": float(accuracy)}

In [5]:
def get_client_fn():

    def client_fn(cid) -> fl.client.Client:
        cid = int(cid) + 1

        usr_act_train = []                              # activity made by user (train)
        usr_act_test = []                               # activity made by user (test)

        for i in range(len(arr_sbj_train)):
            if(arr_sbj_train[i] == cid):
                usr_act_train.append(i)
        
        for i in range(len(arr_sbj_test)):
            if(arr_sbj_test[i] == cid):
                usr_act_test.append(i)
        
        df_ext_train = pd.DataFrame(dtype=float)
        df_ext_test = pd.DataFrame(dtype=float)
        y_ext_train = pd.DataFrame(dtype=float)
        y_ext_test = pd.DataFrame(dtype=float)

        for i in range(len(usr_act_train)):
            index = usr_act_train[i]
            x_row = df_x_train.iloc[index,:561]
            df_ext_train = pd.concat([df_ext_train, x_row], ignore_index=True, axis=1)
            y_row = y_train_col.iloc[index]
            y_ext_train = pd.concat([y_ext_train, y_row], ignore_index=True, axis=1)

        for i in range(len(usr_act_test)):
            index = usr_act_test[i]
            x_row = df_x_test.iloc[index,:561]
            df_ext_test = pd.concat([df_ext_test, x_row], ignore_index=True, axis=1)
            y_row = y_test_col.iloc[index]
            y_ext_test = pd.concat([y_ext_test, y_row], ignore_index=True, axis=1)

        df_ext_train = df_ext_train.T
        y_ext_train = y_ext_train.T
        df_ext_test = df_ext_test.T
        y_ext_test = y_ext_test.T

        X_train = np.array(df_ext_train)
        y_train = np.array(y_ext_train)
        X_test = np.array(df_ext_test)
        y_test = np.array(y_ext_test)

        if (len(y_train) == 0) | (len(y_test) == 0):
            if (len(y_train) == 0) & (len(y_test) == 0):
                X_train, X_test, y_train, y_test = np.array()
            else:
                if (len(y_train) == 0):
                    X_train, X_test, y_train, y_test = train_test_split(X_test, y_test, random_state=42, test_size=0.3)
                else:
                    X_train, X_test, y_train, y_test = train_test_split(X_train, y_train, random_state=42, test_size=0.3)
        

        # Create and return client
        return FlowerClient(X_train, X_test, y_train, y_test).to_client()

    return client_fn


def weighted_average(metrics: List[Tuple[int, Metrics]]) -> Metrics:
    # Multiply accuracy of each client by number of examples used
    accuracies = [num_examples * m["accuracy"] for num_examples, m in metrics]
    examples = [num_examples for num_examples, _ in metrics]

    # Aggregate and return custom metric (weighted average)
    return {"accuracy": sum(accuracies) / sum(examples)}


def get_evaluate_fn(X_test, y_test):
    # The `evaluate` function will be called after every round by the strategy
    def evaluate(
        server_round: int,
        parameters: fl.common.NDArrays,
        config: Dict[str, fl.common.Scalar],
    ):
        model = get_model()  # Construct the model
        model.set_weights(parameters)  # Update model with the latest parameters
        loss, accuracy = model.evaluate(X_test, y_test, verbose=VERBOSE)
        return loss, {"accuracy": accuracy}
    # def evaluate(server_round: int, parameters: fl.common.NDArrays, config: Dict[str, fl.common.Scalar]):
    #     model = get_model()
    #     model.set_weights(parameters)
    #     loss, accuracy = model.evaluate(X_test, y_test, verbose=VERBOSE)
    #     return loss, {"accuracy": accuracy}

    return evaluate

In [10]:
# Enable GPU growth in your main process
enable_tf_gpu_growth()

# Create FedAvg strategy
myStrategy = fl.server.strategy.FedAvg(
    fraction_fit=0.1,                   # Sample 10% of available clients for training
    fraction_evaluate=0.05,             # Sample 5% of available clients for evaluation
    min_fit_clients=10,                  # Never sample less than 3 clients for training
    min_evaluate_clients=10,             # Never sample less than 3 clients for evaluation
    min_available_clients=int(
        NUM_CLIENTS*0.75
    ),                                  # Wait until at least 30 clients are available
    evaluate_metrics_aggregation_fn=weighted_average,  # aggregates federated metrics
    # evaluate_fn=get_evaluate_fn(centralized_testset),  # global evaluation function
)

# With a dictionary, you tell Flower's VirtualClientEngine that each
# client needs exclusive access to these many resources in order to run
client_resources = {"num_cpus": 0.5, "num_gpus": 0}

# Start simulation
history = fl.simulation.start_simulation(
    client_fn=get_client_fn(),
    num_clients=NUM_CLIENTS,
    config=fl.server.ServerConfig(num_rounds=10),
    strategy=myStrategy,
    client_resources=client_resources,
    actor_kwargs={
        "on_actor_init_fn": enable_tf_gpu_growth  # Enable GPU growth upon actor init.
    },
)

[92mINFO [0m:      Starting Flower simulation, config: num_rounds=10, no round_timeout
2024-07-08 17:05:02,266	INFO worker.py:1771 -- Started a local Ray instance.
[92mINFO [0m:      Flower VCE: Ray initialized with resources: {'CPU': 8.0, 'node:__internal_head__': 1.0, 'node:127.0.0.1': 1.0, 'memory': 3169841972.0, 'object_store_memory': 1584920985.0}
[92mINFO [0m:      Optimize your simulation with Flower VCE: https://flower.ai/docs/framework/how-to-run-simulations.html
[92mINFO [0m:      Flower VCE: Resources for each Virtual Client: {'num_cpus': 0.5, 'num_gpus': 0}
[92mINFO [0m:      Flower VCE: Creating VirtualClientEngineActorPool with 16 actors
[92mINFO [0m:      [INIT]
[92mINFO [0m:      Requesting initial parameters from one random client
[92mINFO [0m:      Received initial parameters from one random client
[92mINFO [0m:      Evaluating initial global parameters
[92mINFO [0m:      
[92mINFO [0m:      [ROUND 1]
[92mINFO [0m:      configure_fit: strategy 

[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - accuracy: 0.4851 - loss: 1.2060  


[92mINFO [0m:      aggregate_evaluate: received 10 results and 0 failures
[92mINFO [0m:      
[92mINFO [0m:      [ROUND 2]
[92mINFO [0m:      configure_fit: strategy sampled 10 clients (out of 30)
[36m(ClientAppActor pid=15108)[0m   super().__init__(activity_regularizer=activity_regularizer, **kwargs)[32m [repeated 24x across cluster][0m
[92mINFO [0m:      aggregate_fit: received 10 results and 0 failures
[92mINFO [0m:      configure_evaluate: strategy sampled 10 clients (out of 30)


[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.7969 - loss: 0.8215 [32m [repeated 10x across cluster][0m
[1m1/3[0m [32m━━━━━━[0m[37m━━━━━━━━━━━━━━[0m [1m0s[0m 232ms/step - accuracy: 0.7500 - loss: 0.8164
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step - accuracy: 0.7747 - loss: 0.7988 


[36m(ClientAppActor pid=15115)[0m   super().__init__(activity_regularizer=activity_regularizer, **kwargs)[32m [repeated 13x across cluster][0m
[92mINFO [0m:      aggregate_evaluate: received 10 results and 0 failures
[92mINFO [0m:      
[92mINFO [0m:      [ROUND 3]
[92mINFO [0m:      configure_fit: strategy sampled 10 clients (out of 30)
[92mINFO [0m:      aggregate_fit: received 10 results and 0 failures
[92mINFO [0m:      configure_evaluate: strategy sampled 10 clients (out of 30)
[92mINFO [0m:      aggregate_evaluate: received 10 results and 0 failures
[92mINFO [0m:      
[92mINFO [0m:      [ROUND 4]
[92mINFO [0m:      configure_fit: strategy sampled 10 clients (out of 30)


[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.7578 - loss: 0.5948  [32m [repeated 13x across cluster][0m


[36m(ClientAppActor pid=15113)[0m   super().__init__(activity_regularizer=activity_regularizer, **kwargs)[32m [repeated 27x across cluster][0m
[92mINFO [0m:      aggregate_fit: received 10 results and 0 failures
[92mINFO [0m:      configure_evaluate: strategy sampled 10 clients (out of 30)


[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.9059 - loss: 0.2868  [32m [repeated 6x across cluster][0m


[92mINFO [0m:      aggregate_evaluate: received 10 results and 0 failures
[92mINFO [0m:      
[92mINFO [0m:      [ROUND 5]
[92mINFO [0m:      configure_fit: strategy sampled 10 clients (out of 30)
[92mINFO [0m:      aggregate_fit: received 10 results and 0 failures
[92mINFO [0m:      configure_evaluate: strategy sampled 10 clients (out of 30)
[36m(ClientAppActor pid=15112)[0m   super().__init__(activity_regularizer=activity_regularizer, **kwargs)[32m [repeated 27x across cluster][0m


[1m1/4[0m [32m━━━━━[0m[37m━━━━━━━━━━━━━━━[0m [1m0s[0m 127ms/step - accuracy: 1.0000 - loss: 0.1319
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step - accuracy: 0.9820 - loss: 0.1698 


[92mINFO [0m:      aggregate_evaluate: received 10 results and 0 failures
[92mINFO [0m:      
[92mINFO [0m:      [ROUND 6]
[92mINFO [0m:      configure_fit: strategy sampled 10 clients (out of 30)


[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 996us/step - accuracy: 0.8806 - loss: 0.3436[32m [repeated 17x across cluster][0m


[36m(ClientAppActor pid=15112)[0m   super().__init__(activity_regularizer=activity_regularizer, **kwargs)[32m [repeated 13x across cluster][0m
[92mINFO [0m:      aggregate_fit: received 10 results and 0 failures
[92mINFO [0m:      configure_evaluate: strategy sampled 10 clients (out of 30)
[92mINFO [0m:      aggregate_evaluate: received 10 results and 0 failures
[92mINFO [0m:      
[92mINFO [0m:      [ROUND 7]
[92mINFO [0m:      configure_fit: strategy sampled 10 clients (out of 30)
[36m(ClientAppActor pid=15111)[0m   super().__init__(activity_regularizer=activity_regularizer, **kwargs)[32m [repeated 23x across cluster][0m
[92mINFO [0m:      aggregate_fit: received 10 results and 0 failures
[92mINFO [0m:      configure_evaluate: strategy sampled 10 clients (out of 30)


[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 741us/step - accuracy: 0.8856 - loss: 0.2152[32m [repeated 12x across cluster][0m


[36m(ClientAppActor pid=15111)[0m   super().__init__(activity_regularizer=activity_regularizer, **kwargs)[32m [repeated 5x across cluster][0m


[1m1/4[0m [32m━━━━━[0m[37m━━━━━━━━━━━━━━━[0m [1m0s[0m 105ms/step - accuracy: 0.8438 - loss: 0.3038
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.7853 - loss: 0.3935  
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.7470 - loss: 0.4659  [32m [repeated 4x across cluster][0m


[92mINFO [0m:      aggregate_evaluate: received 10 results and 0 failures
[92mINFO [0m:      
[92mINFO [0m:      [ROUND 8]
[92mINFO [0m:      configure_fit: strategy sampled 10 clients (out of 30)
[92mINFO [0m:      aggregate_fit: received 10 results and 0 failures
[92mINFO [0m:      configure_evaluate: strategy sampled 10 clients (out of 30)
[36m(ClientAppActor pid=15116)[0m   super().__init__(activity_regularizer=activity_regularizer, **kwargs)[32m [repeated 18x across cluster][0m


[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - accuracy: 0.7544 - loss: 0.5423  [32m [repeated 5x across cluster][0m
[1m1/4[0m [32m━━━━━[0m[37m━━━━━━━━━━━━━━━[0m [1m0s[0m 327ms/step - accuracy: 0.8438 - loss: 0.3630
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step - accuracy: 0.8631 - loss: 0.3530 


[92mINFO [0m:      aggregate_evaluate: received 10 results and 0 failures
[92mINFO [0m:      
[92mINFO [0m:      [ROUND 9]
[92mINFO [0m:      configure_fit: strategy sampled 10 clients (out of 30)
[36m(ClientAppActor pid=15111)[0m   super().__init__(activity_regularizer=activity_regularizer, **kwargs)[32m [repeated 16x across cluster][0m
[92mINFO [0m:      aggregate_fit: received 10 results and 0 failures
[92mINFO [0m:      configure_evaluate: strategy sampled 10 clients (out of 30)


[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.7842 - loss: 0.3913  [32m [repeated 6x across cluster][0m
[1m1/4[0m [32m━━━━━[0m[37m━━━━━━━━━━━━━━━[0m [1m0s[0m 104ms/step - accuracy: 0.7188 - loss: 0.4110[32m [repeated 3x across cluster][0m
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step - accuracy: 0.8388 - loss: 0.3682 [32m [repeated 2x across cluster][0m


[92mINFO [0m:      aggregate_evaluate: received 10 results and 0 failures
[92mINFO [0m:      
[92mINFO [0m:      [ROUND 10]
[92mINFO [0m:      configure_fit: strategy sampled 10 clients (out of 30)
[36m(ClientAppActor pid=15111)[0m   super().__init__(activity_regularizer=activity_regularizer, **kwargs)[32m [repeated 14x across cluster][0m
[36m(ClientAppActor pid=15109)[0m   super().__init__(activity_regularizer=activity_regularizer, **kwargs)
[36m(ClientAppActor pid=15114)[0m   super().__init__(activity_regularizer=activity_regularizer, **kwargs)
[92mINFO [0m:      aggregate_fit: received 10 results and 0 failures
[92mINFO [0m:      configure_evaluate: strategy sampled 10 clients (out of 30)


[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 0.9023 - loss: 0.2774  [32m [repeated 8x across cluster][0m
[1m1/4[0m [32m━━━━━[0m[37m━━━━━━━━━━━━━━━[0m [1m0s[0m 319ms/step - accuracy: 0.9688 - loss: 0.1024[32m [repeated 2x across cluster][0m
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - accuracy: 0.9757 - loss: 0.1183  [32m [repeated 4x across cluster][0m


[36m(ClientAppActor pid=15108)[0m   super().__init__(activity_regularizer=activity_regularizer, **kwargs)[32m [repeated 11x across cluster][0m


[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.8374 - loss: 0.2307  [32m [repeated 3x across cluster][0m
[1m1/4[0m [32m━━━━━[0m[37m━━━━━━━━━━━━━━━[0m [1m1s[0m 346ms/step - accuracy: 0.8750 - loss: 0.1785
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - accuracy: 0.9011 - loss: 0.1658  


[36m(ClientAppActor pid=15110)[0m   super().__init__(activity_regularizer=activity_regularizer, **kwargs)[32m [repeated 4x across cluster][0m
[92mINFO [0m:      aggregate_evaluate: received 10 results and 0 failures
[92mINFO [0m:      
[92mINFO [0m:      [SUMMARY]


[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step - accuracy: 0.7190 - loss: 1.2545


[92mINFO [0m:      Run finished 10 round(s) in 80.00s
[92mINFO [0m:      	History (loss, distributed):
[92mINFO [0m:      		round 1: 1.237503048306044
[92mINFO [0m:      		round 2: 0.823358740785522
[92mINFO [0m:      		round 3: 0.6033574819117505
[92mINFO [0m:      		round 4: 0.4422619287636091
[92mINFO [0m:      		round 5: 0.34092859475233184
[92mINFO [0m:      		round 6: 0.44954666699706813
[92mINFO [0m:      		round 7: 0.5096752211148015
[92mINFO [0m:      		round 8: 0.34704422289936004
[92mINFO [0m:      		round 9: 0.35906792503533086
[92mINFO [0m:      		round 10: 0.4836552601275485
[92mINFO [0m:      	History (metrics, distributed, evaluate):
[92mINFO [0m:      	{'accuracy': [(1, 0.39813857469341746),
[92mINFO [0m:      	              (2, 0.7742885240628745),
[92mINFO [0m:      	              (3, 0.7213883682330599),
[92mINFO [0m:      	              (4, 0.8217433894766416),
[92mINFO [0m:      	              (5, 0.8542665289438125),
[92mINFO 

[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.7447 - loss: 1.2192  
