In [10]:
import collections

import numpy as np
import os
import tensorflow as tf
from tensorflow import keras
import tensorflow_federated as tff
import pandas as pd

from sklearn.model_selection import train_test_split


from keras.models import Sequential 
from keras.layers import Dense
from keras.initializers import GlorotUniform
from keras.initializers import HeUniform
from keras.layers import Dense, Dropout


TEST_SIZE = 0.2
NUM_CLIENTS = 100
ACTIVE_CLIENTS = 100
BATCH_SIZE = 512
DROPOUT = 0.1
EPOCHS = 10
PREFETCH_BUFFER = 10
NUM_ROUNDS = 10
path = os.path.dirname(tff.__file__)
print(path)

np.random.seed(42)

/home/ella/Documents/FL/venv-federated/lib/python3.9/site-packages/tensorflow_federated


In [11]:
# Import del dataset e divisione in train e test
df = pd.read_csv('datasets/data.csv')
# riempire i nan con 0
df.fillna(0, inplace=True)

train_df, test_df = train_test_split(df, test_size = TEST_SIZE, random_state = 42)

# Funzione per il preprocessing dei dati del singolo client che divide il dataset in batch
def preprocess(dataset):
  return dataset.repeat(EPOCHS).batch(BATCH_SIZE).prefetch(PREFETCH_BUFFER)

# Funzione per la creazione di un dataset ClientData a partire dal dataset di training a cui viene
# aggiunta una colonna client_nums che assegna ad ogni riga un client randomico
def create_clients(dataset):
    # Viene creata una lista randomica di client
    client_nums = list(range(NUM_CLIENTS))
    generator = np.random.default_rng(42)
    clients = generator.choice(client_nums, len(dataset))
    dataset['client_nums'] = clients

    # Viene convertito il dataset in dizionari, uno per ogni client, con label e pixel associati
    client_train_dataset = collections.OrderedDict()
    grouped_dataset = dataset.groupby('client_nums')
    for key, item in grouped_dataset:
        current_client = grouped_dataset.get_group(key)
        data = collections.OrderedDict((('y',current_client.iloc[:,-2]), ('x', current_client.iloc[:,:-2])))
        client_train_dataset[key] = data

    # I dizionari vengono convertiti in ClientDataset
    def serializable_dataset_fn(client_id):
        client_data = client_train_dataset[client_id]
        return tf.data.Dataset.from_tensor_slices(client_data)

    tff_train_data = tff.simulation.datasets.ClientData.from_clients_and_tf_fn(
        client_ids=list(client_train_dataset.keys()),
        serializable_dataset_fn=serializable_dataset_fn
    )

    return tff_train_data

# Creazione della lista contenente i client con i relativi dataset
client_data_df = create_clients(train_df)
client_ids = sorted(client_data_df.client_ids)[:ACTIVE_CLIENTS]
federated_train_data = [preprocess(client_data_df.create_tf_dataset_for_client(x)) for x in client_ids]

In [12]:
sample_batch = tf.nest.map_structure(lambda x: x.numpy(),
                                     next(iter(federated_train_data[0])))
sample_batch

OrderedDict([('y',
              array([1., 1., 0., 0., 0., 1., 1., 1., 0., 0., 1., 1., 1., 1., 1., 1., 1.,
                     0., 1., 1., 1., 1., 1., 0., 0., 0., 0., 1., 1., 1., 1., 1., 0., 0.,
                     0., 0., 1., 0., 0., 0., 0., 0., 1., 1., 1., 0., 0., 1., 1., 1., 1.,
                     1., 0., 0., 1., 0., 0., 0., 0., 0., 0., 1., 0., 1., 1., 0., 0., 0.,
                     0., 1., 1., 1., 1., 0., 0., 0., 1., 1., 1., 0., 0., 1., 1., 1., 1.,
                     1., 1., 1., 0., 1., 1., 1., 1., 1., 0., 0., 0., 0., 1., 1., 1., 1.,
                     1., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 1., 1., 1., 0., 0., 1.,
                     1., 1., 1., 1., 0., 0., 1., 0., 0., 0., 0., 0., 0., 1., 0., 1., 1.,
                     0., 0., 0., 0., 1., 1., 1., 1., 0., 0., 0., 1., 1., 1., 0., 0., 1.,
                     1., 1., 1., 1., 1., 1., 0., 1., 1., 1., 1., 1., 0., 0., 0., 0., 1.,
                     1., 1., 1., 1., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 1., 1., 1.,
  

In [13]:
def create_keras_model():
  model = Sequential()
  # layers
  model.add(Dense(64, kernel_initializer = HeUniform(), activation = 'relu', input_dim = 42))
  model.add(Dropout(DROPOUT))
  model.add(Dense(10, kernel_initializer = HeUniform(), activation = 'relu'))
  model.add(Dense(1, kernel_initializer = GlorotUniform(), activation = 'sigmoid'))
  return model

In [14]:
def model_fn():
  # We _must_ create a new model here, and _not_ capture it from an external
  # scope. TFF will call this within different graph contexts.
  keras_model = create_keras_model()
  return tff.learning.models.from_keras_model(
      keras_model,
      input_spec=federated_train_data[0].element_spec,
      loss=tf.keras.losses.BinaryFocalCrossentropy(),
      metrics=[tf.keras.metrics.Accuracy()])

In [15]:
training_process = tff.learning.algorithms.build_weighted_fed_avg(
    model_fn,
    client_optimizer_fn=tff.learning.optimizers.build_adam(learning_rate=0.0001),
    server_optimizer_fn=tff.learning.optimizers.build_adam(learning_rate=0.0001))
train_state = training_process.initialize()

2024-09-20 15:43:57.371086: I tensorflow/core/grappler/devices.cc:66] Number of eligible GPUs (core count >= 8, compute capability >= 0.0): 0
2024-09-20 15:43:57.371196: I tensorflow/core/grappler/clusters/single_machine.cc:361] Starting new session
2024-09-20 15:43:57.400536: I tensorflow/core/grappler/devices.cc:66] Number of eligible GPUs (core count >= 8, compute capability >= 0.0): 0
2024-09-20 15:43:57.400645: I tensorflow/core/grappler/clusters/single_machine.cc:361] Starting new session


In [16]:
for round_num in range(NUM_ROUNDS):
  result = training_process.next(train_state, federated_train_data)
  train_state = result.state
  train_metrics = result.metrics
  print('round {:2d}, metrics={}'.format(round_num, train_metrics))

2024-09-20 15:43:57.886981: I tensorflow/core/grappler/devices.cc:66] Number of eligible GPUs (core count >= 8, compute capability >= 0.0): 0
2024-09-20 15:43:57.887076: I tensorflow/core/grappler/clusters/single_machine.cc:361] Starting new session
2024-09-20 15:43:58.040092: I tensorflow/core/grappler/devices.cc:66] Number of eligible GPUs (core count >= 8, compute capability >= 0.0): 0
2024-09-20 15:43:58.040188: I tensorflow/core/grappler/clusters/single_machine.cc:361] Starting new session
2024-09-20 15:43:58.047485: I tensorflow/core/grappler/devices.cc:66] Number of eligible GPUs (core count >= 8, compute capability >= 0.0): 0
2024-09-20 15:43:58.047589: I tensorflow/core/grappler/clusters/single_machine.cc:361] Starting new session
2024-09-20 15:43:58.055665: I tensorflow/core/grappler/devices.cc:66] Number of eligible GPUs (core count >= 8, compute capability >= 0.0): 0
2024-09-20 15:43:58.055752: I tensorflow/core/grappler/clusters/single_machine.cc:361] Starting new session


round  0, metrics=OrderedDict([('distributor', ()), ('client_work', OrderedDict([('train', OrderedDict([('accuracy', 0.47953695), ('loss', 7.4270635), ('num_examples', 69540), ('num_batches', 198)]))])), ('aggregator', OrderedDict([('mean_value', ()), ('mean_weight', ())])), ('finalizer', OrderedDict([('update_non_finite', 0)]))])
round  1, metrics=OrderedDict([('distributor', ()), ('client_work', OrderedDict([('train', OrderedDict([('accuracy', 0.47808456), ('loss', 7.408291), ('num_examples', 69540), ('num_batches', 198)]))])), ('aggregator', OrderedDict([('mean_value', ()), ('mean_weight', ())])), ('finalizer', OrderedDict([('update_non_finite', 0)]))])
round  2, metrics=OrderedDict([('distributor', ()), ('client_work', OrderedDict([('train', OrderedDict([('accuracy', 0.47745183), ('loss', 7.4082904), ('num_examples', 69540), ('num_batches', 198)]))])), ('aggregator', OrderedDict([('mean_value', ()), ('mean_weight', ())])), ('finalizer', OrderedDict([('update_non_finite', 0)]))])
ro

In [17]:
def keras_evaluate(state):
  # Take our global model weights and push them back into a Keras model to
  # use its standard `.evaluate()` method.
  keras_model = create_keras_model()
  keras_model.compile(
      loss=tf.keras.losses.BinaryFocalCrossentropy(),
      metrics=[tf.keras.metrics.Accuracy()])
  model_weights = training_process.get_model_weights(state)
  model_weights.assign_weights_to(keras_model)
  loss, accuracy = keras_model.evaluate(x=test_df.iloc[:,:-1], y=test_df.iloc[:,-1], steps=2, verbose=0)
  print('\tEval: loss={l:.3f}, accuracy={a:.3f}'.format(l=loss, a=accuracy))
print(type(train_state))

keras_evaluate(train_state)


<class 'tensorflow_federated.python.learning.templates.composers.LearningAlgorithmState'>


2024-09-20 15:44:16.942725: I tensorflow/core/grappler/devices.cc:66] Number of eligible GPUs (core count >= 8, compute capability >= 0.0): 0
2024-09-20 15:44:16.942845: I tensorflow/core/grappler/clusters/single_machine.cc:361] Starting new session


	Eval: loss=135.884, accuracy=0.485
