In [104]:
import collections

import numpy as np
import os
import tensorflow as tf
from tensorflow import keras
import tensorflow_federated as tff
import pandas as pd

from sklearn.model_selection import train_test_split


from keras.models import Sequential 
from keras.layers import Dense
from keras.initializers import GlorotUniform
from keras.initializers import HeUniform
from keras.layers import Dense, Dropout


TEST_SIZE = 0.2
NUM_CLIENTS = 100
ACTIVE_CLIENTS = 100
BATCH_SIZE = 512
DROPOUT = 0.1
EPOCHS = 10
PREFETCH_BUFFER = 10
NUM_ROUNDS = 20
path = os.path.dirname(tff.__file__)
print(path)

np.random.seed(42)

/home/ella/Documents/FL/venv-federated/lib/python3.10/site-packages/tensorflow_federated


In [105]:
# Import del dataset e divisione in train e test
train_df = pd.read_csv('datasets/train_obesity.csv')
test_df = pd.read_csv('datasets/test_obesity.csv')

train_x = train_df.drop(columns=['label'])
train_y = train_df['label']

test_x = test_df.drop(columns=['label'])
test_y = test_df['label']

# Funzione per il preprocessing dei dati del singolo client che divide il dataset in batch
def preprocess(dataset):
  return dataset.repeat(EPOCHS).batch(BATCH_SIZE).prefetch(PREFETCH_BUFFER)

# Funzione per la creazione di un dataset ClientData a partire dal dataset di training a cui viene
# aggiunta una colonna client_nums che assegna ad ogni riga un client randomico
def create_clients(dataset):
    # Viene creata una lista randomica di client
    client_nums = list(range(NUM_CLIENTS))
    generator = np.random.default_rng(42)
    clients = generator.choice(client_nums, len(dataset))
    dataset['client_nums'] = clients

    # Viene convertito il dataset in dizionari, uno per ogni client, con label e pixel associati
    client_train_dataset = collections.OrderedDict()
    grouped_dataset = dataset.groupby('client_nums')
    for key, item in grouped_dataset:
        current_client = grouped_dataset.get_group(key)
        data = collections.OrderedDict((('y', train_y), ('x', train_x)))
        client_train_dataset[key] = data

    # I dizionari vengono convertiti in ClientDataset
    def serializable_dataset_fn(client_id):
        client_data = client_train_dataset[client_id]
        return tf.data.Dataset.from_tensor_slices(client_data)

    tff_train_data = tff.simulation.datasets.ClientData.from_clients_and_tf_fn(
        client_ids=list(client_train_dataset.keys()),
        serializable_dataset_fn=serializable_dataset_fn
    )

    return tff_train_data

# Creazione della lista contenente i client con i relativi dataset
client_data_df = create_clients(train_df)
client_ids = sorted(client_data_df.client_ids)[:ACTIVE_CLIENTS]
federated_train_data = [preprocess(client_data_df.create_tf_dataset_for_client(x)) for x in client_ids]

In [106]:
sample_batch = tf.nest.map_structure(lambda x: x.numpy(),
                                     next(iter(federated_train_data[5])))
print(federated_train_data[0].element_spec)
sample_batch


OrderedDict([('y', TensorSpec(shape=(None,), dtype=tf.float64, name=None)), ('x', TensorSpec(shape=(None, 14), dtype=tf.float64, name=None))])


OrderedDict([('y',
              array([19.77769867, 47.71870465, 28.14555002, 28.71908906, 36.33752577,
                     28.53526812, 24.02380867, 25.52411445, 17.92131315, 17.58149245,
                     18.59103675, 36.31495668, 39.40560448, 25.93423807, 26.57312925,
                     27.67393798, 33.63973154, 39.18367347, 27.96185725, 30.93043808,
                     25.91807609, 37.31802573, 45.2101355 , 38.5375391 , 32.64812118,
                     42.28172429, 32.02001105, 48.5720615 , 31.06675908, 27.53768584,
                     37.49786273, 28.44462027, 43.86400172, 28.07609454, 36.57723076,
                     17.66906966, 27.60694862, 18.22302352, 37.03035533, 24.95073175,
                     20.19946976, 20.9566075 , 32.49673425, 28.2789534 , 18.00924749,
                     42.33215552, 27.78428516, 32.04699922, 22.22222222, 26.38798144,
                     27.94120875, 24.1516725 , 16.44459437, 34.06757742, 37.19305389,
                     17.47425268, 2

In [107]:
def create_keras_model():
  model = Sequential()

  model.add(Dense(128, kernel_initializer='normal',input_dim = train_x.shape[1], activation='relu'))
  model.add(Dropout(DROPOUT))
  model.add(Dense(256, kernel_initializer='normal',activation='relu'))
  model.add(Dropout(DROPOUT))
  model.add(Dense(128, kernel_initializer='normal',activation='relu'))
  model.add(Dense(64, kernel_initializer='normal',activation='relu'))
  model.add(Dense(1, kernel_initializer='normal',activation='linear'))

  return model

In [108]:
def model_fn():
  # We _must_ create a new model here, and _not_ capture it from an external
  # scope. TFF will call this within different graph contexts.
  keras_model = create_keras_model()
  return tff.learning.models.from_keras_model(
      keras_model,
      input_spec=federated_train_data[0].element_spec,
      loss=tf.keras.losses.MeanSquaredError(),
      metrics=[tf.keras.metrics.MeanSquaredError()])

In [109]:
initial_learning_rate = 0.01
final_learning_rate = 0.001
learning_rate_decay_factor = (final_learning_rate / initial_learning_rate)**(1/EPOCHS)
steps_per_epoch = int(train_x.shape[0]/BATCH_SIZE)

lr_schedule = keras.optimizers.schedules.ExponentialDecay(
    initial_learning_rate,
    decay_steps=steps_per_epoch,
    decay_rate=learning_rate_decay_factor,
    staircase=True)

def client_optimizer_fn(lr):
    return tf.keras.optimizers.Adam(learning_rate=lr)

def server_optimizer_fn(lr):
    return tf.keras.optimizers.Adam(learning_rate=lr)

training_process = tff.learning.algorithms.build_weighted_fed_avg_with_optimizer_schedule(
    model_fn,
    client_learning_rate_fn=lr_schedule,   
    client_optimizer_fn=client_optimizer_fn)
train_state = training_process.initialize()

2024-09-22 17:49:00.623916: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:894] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2024-09-22 17:49:00.624086: I tensorflow/core/grappler/devices.cc:66] Number of eligible GPUs (core count >= 8, compute capability >= 0.0): 0
2024-09-22 17:49:00.624191: I tensorflow/core/grappler/clusters/single_machine.cc:361] Starting new session
2024-09-22 17:49:00.624560: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:894] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2024-09-22 17:49:00.624676: W tensorflow/core/common_runtime/gpu/gpu_d

In [110]:
for round_num in range(NUM_ROUNDS):
  result = training_process.next(train_state, federated_train_data)
  train_state = result.state
  train_metrics = result.metrics
  print('round {:2d}, metrics={}'.format(round_num, train_metrics))

2024-09-22 17:49:01.032466: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:894] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2024-09-22 17:49:01.032630: I tensorflow/core/grappler/devices.cc:66] Number of eligible GPUs (core count >= 8, compute capability >= 0.0): 0
2024-09-22 17:49:01.032746: I tensorflow/core/grappler/clusters/single_machine.cc:361] Starting new session
2024-09-22 17:49:01.033174: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:894] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2024-09-22 17:49:01.033289: W tensorflow/core/common_runtime/gpu/gpu_d

round  0, metrics=OrderedDict([('distributor', ()), ('client_work', OrderedDict([('train', OrderedDict([('mean_squared_error', 110224.09), ('loss', 112864.195), ('num_examples', 1350000), ('num_batches', 2700)])), ('client_learning_rate', 0.1)])), ('aggregator', OrderedDict([('mean_value', ()), ('mean_weight', ())])), ('finalizer', OrderedDict([('update_non_finite', 0)]))])
round  1, metrics=OrderedDict([('distributor', ()), ('client_work', OrderedDict([('train', OrderedDict([('mean_squared_error', 249.35728), ('loss', 254.62296), ('num_examples', 1350000), ('num_batches', 2700)])), ('client_learning_rate', 0.1)])), ('aggregator', OrderedDict([('mean_value', ()), ('mean_weight', ())])), ('finalizer', OrderedDict([('update_non_finite', 0)]))])
round  2, metrics=OrderedDict([('distributor', ()), ('client_work', OrderedDict([('train', OrderedDict([('mean_squared_error', 41.432796), ('loss', 41.97072), ('num_examples', 1350000), ('num_batches', 2700)])), ('client_learning_rate', 0.06309574

In [111]:
def keras_evaluate(state):
  # Take our global model weights and push them back into a Keras model to
  # use its standard `.evaluate()` method.
  keras_model = create_keras_model()
  keras_model.compile(
      loss=tf.keras.losses.MeanSquaredError(),
      metrics=[tf.keras.metrics.MeanSquaredError()])
  model_weights = training_process.get_model_weights(state)
  model_weights.assign_weights_to(keras_model)
  loss, accuracy = keras_model.evaluate(x=test_x, y=test_y)
  print('\tEval: loss={l:.3f}, accuracy={a:.3f}'.format(l=loss, a=accuracy))
print(type(train_state))

keras_evaluate(train_state)


<class 'tensorflow_federated.python.learning.templates.composers.LearningAlgorithmState'>
	Eval: loss=0.737, accuracy=0.737
