In [19]:
import collections

import numpy as np
import os
import tensorflow as tf
from tensorflow import keras
import tensorflow_federated as tff
import pandas as pd

from sklearn.model_selection import train_test_split


from keras.models import Sequential 
from keras.layers import Dense
from keras.initializers import GlorotUniform
from keras.initializers import HeUniform
from keras.layers import Dense, Dropout


TEST_SIZE = 0.2
NUM_CLIENTS = 100
ACTIVE_CLIENTS = 100
BATCH_SIZE = 512
DROPOUT = 0.1
EPOCHS = 10
PREFETCH_BUFFER = 10
NUM_ROUNDS = 20
path = os.path.dirname(tff.__file__)
print(path)

np.random.seed(42)

/home/ella/Documents/FL/venv-federated/lib/python3.9/site-packages/tensorflow_federated


In [20]:
# Import del dataset e divisione in train e test
train_df = pd.read_csv('datasets/train_preprocessed.csv')
test_df = pd.read_csv('datasets/test_preprocessed.csv')

train_x = train_df.drop(columns=['Transported'])
train_y = train_df['Transported']

test_x = test_df.drop(columns=['Transported'])
test_y = test_df['Transported']

# Funzione per il preprocessing dei dati del singolo client che divide il dataset in batch
def preprocess(dataset):
  return dataset.repeat(EPOCHS).batch(BATCH_SIZE).prefetch(PREFETCH_BUFFER)

# Funzione per la creazione di un dataset ClientData a partire dal dataset di training a cui viene
# aggiunta una colonna client_nums che assegna ad ogni riga un client randomico
def create_clients(dataset):
    # Viene creata una lista randomica di client
    client_nums = list(range(NUM_CLIENTS))
    generator = np.random.default_rng(42)
    clients = generator.choice(client_nums, len(dataset))
    dataset['client_nums'] = clients

    # Viene convertito il dataset in dizionari, uno per ogni client, con label e pixel associati
    client_train_dataset = collections.OrderedDict()
    grouped_dataset = dataset.groupby('client_nums')
    for key, item in grouped_dataset:
        current_client = grouped_dataset.get_group(key)
        data = collections.OrderedDict((('y',current_client.iloc[:,-2]), ('x', current_client.iloc[:,:-2])))
        client_train_dataset[key] = data

    # I dizionari vengono convertiti in ClientDataset
    def serializable_dataset_fn(client_id):
        client_data = client_train_dataset[client_id]
        return tf.data.Dataset.from_tensor_slices(client_data)

    tff_train_data = tff.simulation.datasets.ClientData.from_clients_and_tf_fn(
        client_ids=list(client_train_dataset.keys()),
        serializable_dataset_fn=serializable_dataset_fn
    )

    return tff_train_data

# Creazione della lista contenente i client con i relativi dataset
client_data_df = create_clients(train_df)
client_ids = sorted(client_data_df.client_ids)[:ACTIVE_CLIENTS]
federated_train_data = [preprocess(client_data_df.create_tf_dataset_for_client(x)) for x in client_ids]

In [21]:
sample_batch = tf.nest.map_structure(lambda x: x.numpy(),
                                     next(iter(federated_train_data[5])))
print(federated_train_data[0].element_spec)
sample_batch


OrderedDict([('y', TensorSpec(shape=(None,), dtype=tf.float64, name=None)), ('x', TensorSpec(shape=(None, 16), dtype=tf.float64, name=None))])


OrderedDict([('y',
              array([0., 0., 0., 0., 1., 1., 1., 1., 0., 0., 1., 0., 0., 0., 0., 0., 0.,
                     0., 0., 0., 1., 1., 0., 0., 0., 0., 0., 0., 1., 1., 0., 1., 0., 0.,
                     1., 0., 1., 1., 1., 1., 1., 1., 0., 1., 0., 1., 1., 1., 1., 0., 1.,
                     0., 0., 0., 0., 0., 1., 0., 1., 0., 0., 0., 0., 0., 1., 1., 1., 1.,
                     0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 0., 0., 0.,
                     0., 0., 0., 1., 1., 0., 1., 0., 0., 1., 0., 1., 1., 1., 1., 1., 1.,
                     0., 1., 0., 1., 1., 1., 1., 0., 1., 0., 0., 0., 0., 0., 1., 0., 1.,
                     0., 0., 0., 0., 0., 1., 1., 1., 1., 0., 0., 1., 0., 0., 0., 0., 0.,
                     0., 0., 0., 0., 1., 1., 0., 0., 0., 0., 0., 0., 1., 1., 0., 1., 0.,
                     0., 1., 0., 1., 1., 1., 1., 1., 1., 0., 1., 0., 1., 1., 1., 1., 0.,
                     1., 0., 0., 0., 0., 0., 1., 0., 1., 0., 0., 0., 0., 0., 1., 1., 1.,
  

In [22]:
def create_keras_model():
  model = Sequential()

  model.add(Dense(train_x.shape[-1], kernel_initializer = HeUniform(), activation = 'relu', input_dim = train_x.shape[-1]))
  model.add(Dropout(DROPOUT))
  model.add(Dense(1024, kernel_initializer = HeUniform(), activation = 'relu', kernel_regularizer = tf.keras.regularizers.l2(30e-6)))
  model.add(Dropout(DROPOUT))
  model.add(Dense(256, kernel_initializer = HeUniform(), activation = 'relu', kernel_regularizer = tf.keras.regularizers.l2(30e-6)))
  model.add(Dropout(DROPOUT))
  model.add(Dense(128, kernel_initializer = HeUniform(), activation = 'relu', kernel_regularizer = tf.keras.regularizers.l2(30e-6)))
  model.add(Dropout(DROPOUT))
  model.add(Dense(1, kernel_initializer = GlorotUniform(), activation = 'sigmoid'))
  return model

In [23]:
def model_fn():
  # We _must_ create a new model here, and _not_ capture it from an external
  # scope. TFF will call this within different graph contexts.
  keras_model = create_keras_model()
  return tff.learning.models.from_keras_model(
      keras_model,
      input_spec=federated_train_data[0].element_spec,
      loss=tf.keras.losses.BinaryFocalCrossentropy(),
      metrics=[tf.keras.metrics.BinaryAccuracy()])

In [24]:
training_process = tff.learning.algorithms.build_weighted_fed_avg(
    model_fn,
    client_optimizer_fn=tff.learning.optimizers.build_adam(learning_rate=0.0001),
    server_optimizer_fn=tff.learning.optimizers.build_adam(learning_rate=0.0001))
train_state = training_process.initialize()

2024-09-21 12:56:58.466821: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:894] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2024-09-21 12:56:58.466974: I tensorflow/core/grappler/devices.cc:66] Number of eligible GPUs (core count >= 8, compute capability >= 0.0): 0
2024-09-21 12:56:58.467056: I tensorflow/core/grappler/clusters/single_machine.cc:361] Starting new session
2024-09-21 12:56:58.467412: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:894] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2024-09-21 12:56:58.467567: I tensorflow/compiler/xla/stream_executor/

In [25]:
for round_num in range(NUM_ROUNDS):
  result = training_process.next(train_state, federated_train_data)
  train_state = result.state
  train_metrics = result.metrics
  print('round {:2d}, metrics={}'.format(round_num, train_metrics))

2024-09-21 12:56:59.292335: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:894] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2024-09-21 12:56:59.292509: I tensorflow/core/grappler/devices.cc:66] Number of eligible GPUs (core count >= 8, compute capability >= 0.0): 0
2024-09-21 12:56:59.292591: I tensorflow/core/grappler/clusters/single_machine.cc:361] Starting new session
2024-09-21 12:56:59.293006: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:894] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2024-09-21 12:56:59.293166: I tensorflow/compiler/xla/stream_executor/

round  0, metrics=OrderedDict([('distributor', ()), ('client_work', OrderedDict([('train', OrderedDict([('binary_accuracy', 0.5315657), ('loss', 0.25491577), ('num_examples', 55630), ('num_batches', 170)]))])), ('aggregator', OrderedDict([('mean_value', ()), ('mean_weight', ())])), ('finalizer', OrderedDict([('update_non_finite', 0)]))])
round  1, metrics=OrderedDict([('distributor', ()), ('client_work', OrderedDict([('train', OrderedDict([('binary_accuracy', 0.5763078), ('loss', 0.22499974), ('num_examples', 55630), ('num_batches', 170)]))])), ('aggregator', OrderedDict([('mean_value', ()), ('mean_weight', ())])), ('finalizer', OrderedDict([('update_non_finite', 0)]))])
round  2, metrics=OrderedDict([('distributor', ()), ('client_work', OrderedDict([('train', OrderedDict([('binary_accuracy', 0.6232788), ('loss', 0.205432), ('num_examples', 55630), ('num_batches', 170)]))])), ('aggregator', OrderedDict([('mean_value', ()), ('mean_weight', ())])), ('finalizer', OrderedDict([('update_non

In [27]:
def keras_evaluate(state):
  # Take our global model weights and push them back into a Keras model to
  # use its standard `.evaluate()` method.
  keras_model = create_keras_model()
  keras_model.compile(
      loss=tf.keras.losses.BinaryFocalCrossentropy(),
      metrics=[tf.keras.metrics.BinaryAccuracy()])
  model_weights = training_process.get_model_weights(state)
  model_weights.assign_weights_to(keras_model)
  loss, accuracy = keras_model.evaluate(x=test_x, y=test_y)
  print('\tEval: loss={l:.3f}, accuracy={a:.3f}'.format(l=loss, a=accuracy))
print(type(train_state))

keras_evaluate(train_state)


<class 'tensorflow_federated.python.learning.templates.composers.LearningAlgorithmState'>
	Eval: loss=0.210, accuracy=0.751
