In [33]:
import collections
import math

import numpy as np
import os
import tensorflow as tf
from tensorflow import keras
import tensorflow_federated as tff
import pandas as pd

from sklearn.model_selection import train_test_split


from keras.models import Sequential 
from keras.layers import Dense
from keras.initializers import GlorotUniform
from keras.initializers import HeUniform
from keras.layers import Dense, Dropout


TEST_SIZE = 0.2
NUM_CLIENTS = 500
ACTIVE_CLIENTS = 500
BATCH_SIZE = 512
DROPOUT = 0.1
EPOCHS = 10
PREFETCH_BUFFER = 10
NUM_ROUNDS = 20
path = os.path.dirname(tff.__file__)
print(path)

np.random.seed(42)

/home/ella/Documents/FL/venv-federated/lib/python3.10/site-packages/tensorflow_federated


In [36]:
# Import del dataset e divisione in train e test
train_df = pd.read_csv('datasets/train_titanic.csv')
test_df = pd.read_csv('datasets/test_titanic.csv')

test_x = test_df.drop(columns=['Transported'])
test_y = test_df['Transported']

# Funzione per il preprocessing dei dati del singolo client che divide il dataset in batch
def preprocess(dataset):
  return dataset.repeat(EPOCHS).batch(BATCH_SIZE).prefetch(PREFETCH_BUFFER)

def client_distribution(dataset, perc):
    positive = []
    negative = []

    dataset_0 = dataset[dataset['Transported'] == 0]
    dataset_1 = dataset[dataset['Transported'] == 1]
    parts = (len(dataset) / NUM_CLIENTS)/4
    print(parts*4*NUM_CLIENTS)
    for c in range(round(NUM_CLIENTS/2)):
        positive += [c] * math.floor(parts*perc)
        negative += [c] * math.ceil(parts*(4-perc))
    for c in range(round(NUM_CLIENTS/2), NUM_CLIENTS):
        positive += [c] * math.floor(parts*(4-perc))
        negative += [c] * math.ceil(parts*perc)
    # 

    dataset_0['client_nums'] = positive
    dataset_1['client_nums'] = negative
    return pd.concat([dataset_0, dataset_1])

# Funzione per la creazione di un dataset ClientData a partire dal dataset di training a cui viene
# aggiunta una colonna client_nums che assegna ad ogni riga un client randomico
def create_clients(dataset, perc):
    dataset = client_distribution(dataset, perc)

    # Viene convertito il dataset in dizionari, uno per ogni client, con label e pixel associati
    client_train_dataset = collections.OrderedDict()
    grouped_dataset = dataset.groupby('client_nums')
    for key, item in grouped_dataset:
        current_client = grouped_dataset.get_group(key)
        data = collections.OrderedDict((('y',current_client.iloc[:,-2]), ('x', current_client.iloc[:,:-2])))
        client_train_dataset[key] = data

    # I dizionari vengono convertiti in ClientDataset
    def serializable_dataset_fn(client_id):
        client_data = client_train_dataset[client_id]
        return tf.data.Dataset.from_tensor_slices(client_data)

    tff_train_data = tff.simulation.datasets.ClientData.from_clients_and_tf_fn(
        client_ids=list(client_train_dataset.keys()),
        serializable_dataset_fn=serializable_dataset_fn
    )

    return tff_train_data

# Creazione della lista contenente i client con i relativi dataset
#client_data_df = create_clients(train_df, 2)
#client_ids = sorted(client_data_df.client_ids)[:ACTIVE_CLIENTS]
#federated_train_data = [preprocess(client_data_df.create_tf_dataset_for_client(x)) for x in client_ids]

In [37]:

ds = client_distribution(train_df, 2)
#stampare per ogni client il numero di positivi e negativi
print(ds.groupby('client_nums').sum())



5563.0


ValueError: Length of values (2500) does not match length of index (2765)

In [13]:
sample_batch = tf.nest.map_structure(lambda x: x.numpy(),
                                     next(iter(federated_train_data[5])))
print(federated_train_data[0].element_spec)
sample_batch


OrderedDict([('y', TensorSpec(shape=(None,), dtype=tf.float64, name=None)), ('x', TensorSpec(shape=(None, 16), dtype=tf.float64, name=None))])


OrderedDict([('y',
              array([1., 1., 0., 0., 0., 1., 1., 0., 0., 0., 1., 1., 0., 0., 0., 1., 1.,
                     0., 0., 0., 1., 1., 0., 0., 0., 1., 1., 0., 0., 0., 1., 1., 0., 0.,
                     0., 1., 1., 0., 0., 0., 1., 1., 0., 0., 0., 1., 1., 0., 0., 0.])),
             ('x',
              array([[-0.33741795, -0.28399291, -0.25827893, -0.26223396, -0.8501908 ,
                       0.        ,  0.        ,  0.        ,  0.        ,  1.        ,
                       0.        ,  1.        ,  0.        ,  0.        ,  1.        ,
                       0.        ],
                     [-0.33741795, -0.28399291, -0.2591386 , -0.51611434,  1.17620656,
                       1.        ,  0.        ,  0.        ,  0.        ,  0.        ,
                       1.        ,  0.        ,  1.        ,  1.        ,  1.        ,
                       0.        ],
                     [-0.33590224, -0.28399291,  0.43117165, -0.23379093, -0.8501908 ,
               

In [14]:
def create_keras_model():
  model = Sequential()

  model.add(Dense(train_x.shape[-1], kernel_initializer = HeUniform(), activation = 'relu', input_dim = train_x.shape[-1]))
  model.add(Dropout(DROPOUT))
  model.add(Dense(1024, kernel_initializer = HeUniform(), activation = 'relu', kernel_regularizer = tf.keras.regularizers.l2(30e-6)))
  model.add(Dropout(DROPOUT))
  model.add(Dense(256, kernel_initializer = HeUniform(), activation = 'relu', kernel_regularizer = tf.keras.regularizers.l2(30e-6)))
  model.add(Dropout(DROPOUT))
  model.add(Dense(128, kernel_initializer = HeUniform(), activation = 'relu', kernel_regularizer = tf.keras.regularizers.l2(30e-6)))
  model.add(Dropout(DROPOUT))
  model.add(Dense(1, kernel_initializer = GlorotUniform(), activation = 'sigmoid'))
  return model

In [15]:
def model_fn():
  # We _must_ create a new model here, and _not_ capture it from an external
  # scope. TFF will call this within different graph contexts.
  keras_model = create_keras_model()
  return tff.learning.models.from_keras_model(
      keras_model,
      input_spec=federated_train_data[0].element_spec,
      loss=tf.keras.losses.BinaryFocalCrossentropy(),
      metrics=[tf.keras.metrics.BinaryAccuracy()])

In [16]:
training_process = tff.learning.algorithms.build_weighted_fed_avg(
    model_fn,
    client_optimizer_fn=tff.learning.optimizers.build_adam(learning_rate=0.0001),
    server_optimizer_fn=tff.learning.optimizers.build_adam(learning_rate=0.0001))
train_state = training_process.initialize()

2024-09-22 18:55:39.560919: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:894] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2024-09-22 18:55:39.561094: I tensorflow/core/grappler/devices.cc:66] Number of eligible GPUs (core count >= 8, compute capability >= 0.0): 0
2024-09-22 18:55:39.561225: I tensorflow/core/grappler/clusters/single_machine.cc:361] Starting new session
2024-09-22 18:55:39.561647: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:894] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2024-09-22 18:55:39.561803: W tensorflow/core/common_runtime/gpu/gpu_d

In [17]:
for round_num in range(NUM_ROUNDS):
  result = training_process.next(train_state, federated_train_data)
  train_state = result.state
  train_metrics = result.metrics
  print('round {:2d}, metrics={}'.format(round_num, train_metrics))

2024-09-22 18:55:40.104680: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:894] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2024-09-22 18:55:40.104858: I tensorflow/core/grappler/devices.cc:66] Number of eligible GPUs (core count >= 8, compute capability >= 0.0): 0
2024-09-22 18:55:40.104951: I tensorflow/core/grappler/clusters/single_machine.cc:361] Starting new session
2024-09-22 18:55:40.105302: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:894] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2024-09-22 18:55:40.105406: W tensorflow/core/common_runtime/gpu/gpu_d

round  0, metrics=OrderedDict([('distributor', ()), ('client_work', OrderedDict([('train', OrderedDict([('binary_accuracy', 0.44680926), ('loss', 0.32459384), ('num_examples', 55630), ('num_batches', 500)]))])), ('aggregator', OrderedDict([('mean_value', ()), ('mean_weight', ())])), ('finalizer', OrderedDict([('update_non_finite', 0)]))])
round  1, metrics=OrderedDict([('distributor', ()), ('client_work', OrderedDict([('train', OrderedDict([('binary_accuracy', 0.4832105), ('loss', 0.28760886), ('num_examples', 55630), ('num_batches', 500)]))])), ('aggregator', OrderedDict([('mean_value', ()), ('mean_weight', ())])), ('finalizer', OrderedDict([('update_non_finite', 0)]))])


KeyboardInterrupt: 

In [None]:
def keras_evaluate(state):
  # Take our global model weights and push them back into a Keras model to
  # use its standard `.evaluate()` method.
  keras_model = create_keras_model()
  keras_model.compile(
      loss=tf.keras.losses.BinaryFocalCrossentropy(),
      metrics=[tf.keras.metrics.BinaryAccuracy()])
  model_weights = training_process.get_model_weights(state)
  model_weights.assign_weights_to(keras_model)
  loss, accuracy = keras_model.evaluate(x=test_x, y=test_y)
  print('\tEval: loss={l:.3f}, accuracy={a:.3f}'.format(l=loss, a=accuracy))
print(type(train_state))

keras_evaluate(train_state)
