# Notebook for federated training on diabetes dataset

## Installation of required packages

In [None]:
!pip install --quiet --upgrade dp-accounting==0.4.3
!pip install --quiet --upgrade tensorflow-federated==0.84.0

## Imports

In [None]:
import collections
import dp_accounting
import numpy as np
import pandas as pd
import csv
import tensorflow as tf
import tensorflow_federated as tff
from sklearn.model_selection import train_test_split

## Define global variables

In [None]:
TOTAL_CLIENTS = 8
SEED = 42
TARGET_DELTA = 1e-4
EXP_NO = 10

path = '/path/to/output'

## Data preprocessing

In [None]:
def load_diabetes_dataset():
  # load data
  data_dir = '/path/to/diabetes.csv'
  data = pd.read_csv(data_dir)
  data = data.sample(frac=1).reset_index(drop=True)

  # handle null values
  data['BMI'] = data.BMI.mask(data.BMI == 0, (data['BMI'].mean(skipna=True)))
  data['BloodPressure'] = data.BloodPressure.mask(data.BloodPressure == 0, (data['BloodPressure'].mean(skipna=True)))
  data['Glucose'] = data.Glucose.mask(data.Glucose == 0, (data['Glucose'].mean(skipna=True)))
  data['SkinThickness'] = data.Glucose.mask(data.Glucose == 0, (data['SkinThickness'].mean(skipna=True)))
  data['Insulin'] = data.Glucose.mask(data.Glucose == 0, (data['Insulin'].mean(skipna=True)))

  cols_to_norm = ['Pregnancies', 'Glucose','BloodPressure', 'SkinThickness', 'Insulin', 'BMI', 'DiabetesPedigreeFunction', 'Age']
  data_normalized = data[cols_to_norm].apply(lambda x: (x - x.min()) / (x.max() - x.min()))

  min_dict = {}
  max_dict = {}

  for col in ['Pregnancies', 'BMI', 'Age']:
    min_dict[col] = data[col].min()
    max_dict[col] = data[col].max()

  data_normalized['Outcome'] = data['Outcome']
  data = data_normalized

  # train test split
  train_data, test_data = train_test_split(data, test_size=0.2, random_state=SEED)
  train_samples = len(train_data)
  num_clients = TOTAL_CLIENTS

  # create tf datasets
  client_train_dataframes = np.array_split(train_data, num_clients)

  def dataframe_to_tf_dataset(df):
    features = df.drop(['Outcome'], axis=1).values
    labels = df['Outcome'].values
    return tf.data.Dataset.from_tensor_slices((features, labels))#.batch(len(df))

  client_train_datasets = [dataframe_to_tf_dataset(client_df).batch(len(client_df)) for client_df in client_train_dataframes]
  test_data = dataframe_to_tf_dataset(test_data)

  def create_tf_dataset_for_client_fn(client_id):
    return client_train_datasets[client_id]

  federated_data = tff.simulation.datasets.ClientData.from_clients_and_tf_fn(
      client_ids=range(num_clients),
      serializable_dataset_fn=create_tf_dataset_for_client_fn
  )

  federated_train_data = [
    federated_data.create_tf_dataset_for_client(client_id)
    for client_id in federated_data.client_ids
]

  return federated_data, test_data, min_dict, max_dict, train_samples

train_data, test_data, min_dict, max_dict, train_samples = load_diabetes_dataset()

### Test data splitting for fairness evaluation

In [None]:
def split_test_data():
  # groupings
  pregnancies_grouping = [(x - min_dict['Pregnancies']) / (max_dict['Pregnancies'] - min_dict['Pregnancies']) for x in[0,3,6,9]]
  bmi_grouping = [(x - min_dict['BMI']) / (max_dict['BMI'] - min_dict['BMI']) for x in [18.5, 25, 30, 40]]
  age_grouping = [(x - min_dict['Age']) / (max_dict['Age'] - min_dict['Age']) for x in [30,40,50]]


  # pregnancies
  test_data_preg_1 = test_data.filter(lambda x, y: tf.equal(x[0], pregnancies_grouping[0])).batch(len(test_data))
  test_data_preg_2 = test_data.filter(lambda x, y: tf.logical_and(tf.greater(x[0], pregnancies_grouping[0]), tf.less(x[0], pregnancies_grouping[1]))).batch(len(test_data))
  test_data_preg_3 = test_data.filter(lambda x, y: tf.logical_and(tf.greater_equal(x[0], pregnancies_grouping[1]), tf.less(x[0], pregnancies_grouping[2]))).batch(len(test_data))
  test_data_preg_4 = test_data.filter(lambda x, y: tf.logical_and(tf.greater_equal(x[0], pregnancies_grouping[2]), tf.less(x[0], pregnancies_grouping[3]))).batch(len(test_data))
  test_data_preg_5 = test_data.filter(lambda x, y: tf.greater_equal(x[0], pregnancies_grouping[3])).batch(len(test_data))

  test_data_preg = {'preg_=0': test_data_preg_1, 'preg_1-2': test_data_preg_2, 'preg_3-5': test_data_preg_3, 'preg_6-8': test_data_preg_4, 'preg_>=9': test_data_preg_5}

  # bmi
  test_data_bmi_1 = test_data.filter(lambda x, y: tf.less_equal(x[5], bmi_grouping[0])).batch(len(test_data))
  test_data_bmi_2 = test_data.filter(lambda x, y: tf.logical_and(tf.greater(x[5], bmi_grouping[0]), tf.less_equal(x[5], bmi_grouping[1]))).batch(len(test_data))
  test_data_bmi_3 = test_data.filter(lambda x, y: tf.logical_and(tf.greater(x[5], bmi_grouping[1]), tf.less_equal(x[5], bmi_grouping[2]))).batch(len(test_data))
  test_data_bmi_4 = test_data.filter(lambda x, y: tf.logical_and(tf.greater(x[5], bmi_grouping[2]), tf.less_equal(x[5], bmi_grouping[3]))).batch(len(test_data))
  test_data_bmi_5 = test_data.filter(lambda x, y: tf.greater(x[5], bmi_grouping[3])).batch(len(test_data))

  test_data_bmi = {'bmi_0-18.5': test_data_bmi_1, 'bmi_18.5-25': test_data_bmi_2, 'bmi_25-30': test_data_bmi_3, 'bmi_30-40': test_data_bmi_4, 'bmi_>40': test_data_bmi_5}

  # age
  test_data_age_1 = test_data.filter(lambda x, y: tf.less_equal(x[7], age_grouping[0])).batch(len(test_data))
  test_data_age_2 = test_data.filter(lambda x, y: tf.logical_and(tf.greater(x[7], age_grouping[0]), tf.less_equal(x[7], age_grouping[1]))).batch(len(test_data))
  test_data_age_3 = test_data.filter(lambda x, y: tf.logical_and(tf.greater(x[7], age_grouping[1]), tf.less_equal(x[7], age_grouping[2]))).batch(len(test_data))
  test_data_age_4 = test_data.filter(lambda x, y: tf.greater(x[7], age_grouping[2])).batch(len(test_data))

  test_data_age = {'age_<30': test_data_age_1, 'age_30-40': test_data_age_2, 'age_40-50': test_data_age_3, 'age_>50': test_data_age_4}

  return test_data_preg, test_data_bmi, test_data_age


test_data_preg, test_data_bmi, test_data_age = split_test_data()

test_data = test_data.batch(len(test_data))

## Model definition

In [None]:
def my_model_fn():
  model = tf.keras.models.Sequential([
      tf.keras.layers.Dense(units=200, input_shape=(8,)),
        tf.keras.layers.ReLU(),
        tf.keras.layers.Dense(200),
        tf.keras.layers.ReLU(),
        tf.keras.layers.Dense(1, activation=tf.nn.sigmoid)])
  return tff.learning.models.from_keras_model(
      keras_model=model,
      loss=tf.keras.losses.BinaryCrossentropy(),
      input_spec=test_data.element_spec,
      metrics=[tf.keras.metrics.BinaryAccuracy(),
            tf.keras.metrics.Precision(),
            tf.keras.metrics.Recall(),
            tf.keras.metrics.TruePositives(),
            tf.keras.metrics.TrueNegatives(),
            tf.keras.metrics.FalsePositives(),
            tf.keras.metrics.FalseNegatives(),])

## Fairness measures

In [None]:
def measureFairness(mconf_p, mconf_u):

  (TN_p, FP_p, FN_p, TP_p) = mconf_p
  (TN_u, FP_u, FN_u, TP_u) = mconf_u

  def _DI_degree():
    # |1-(SR_u/SR_p)| --> where SR is the selection rate SR=(TP+FP)/N
    PR_p = FP_p + TP_p
    PR_u = FP_u + TP_u
    N_p = TN_p + FP_p + FN_p + TP_p
    N_u = TN_u + FP_u + FN_u + TP_u

    SR_p = PR_p/N_p
    SR_u = PR_u/N_u

    if SR_p == 0 or SR_u == 0:
        return 1

    # expecting SR_p being higher...
    DI_degree = abs(1-SR_u/SR_p)

    return DI_degree

  def _EOP_difference():
    # |TPR_p - TPR_u|

    TPR_p = TP_p/(TP_p + FN_p)
    TPR_u = TP_u/(TP_u + FN_u)

    EOP_difference = abs(TPR_p - TPR_u)

    return EOP_difference

  def _EODD_difference():
    # 0.5 * (|TPR_p - TPR_u| + |TNR_p - TNR_u|)

    TPR_p = TP_p/(TP_p + FN_p)
    TPR_u = TP_u/(TP_u + FN_u)

    TNR_p = TN_p/(TN_p + FP_p)
    TNR_u = TN_u/(TN_u + FP_u)

    EODD_difference = 0.5 * (abs(TPR_p - TPR_u) + abs(TNR_p - TNR_u))

    return EODD_difference

  def _SP_difference():
    # |((TP_p + FP_p)/N_p) - ((TP_u + FP_u)/N_u)|

    N_p = TN_p + FP_p + FN_p + TP_p
    N_u = TN_u + FP_u + FN_u + TP_u

    SP_difference = abs(((TP_p + FP_p)/N_p) - ((TP_u + FP_u)/N_u))

    return SP_difference

  DI_degree = round(_DI_degree(), 4)
  EOP_difference = round(_EOP_difference(), 4)
  EODD_difference = round(_EODD_difference(), 4)
  SP_difference = round(_SP_difference(), 4)

  print("Disparate Impact: ", DI_degree)
  print("EOP difference: ", EOP_difference)
  print("EODD difference: ", EODD_difference)
  print("SP difference: ", SP_difference)

  return {"DI_degree": DI_degree, "EOP_difference": EOP_difference, "EODD_difference": EODD_difference, "SP_difference": SP_difference}

In [None]:
def evaluate_fairness(eval_process, eval_state, model_weights, noise_multiplier=0.0):
  def _createConfMatrix(data_frame):
    results = data_frame.iloc[-1]
    tn = results['true_negatives']
    fp = results['false_positives']
    fn = results['false_negatives']
    tp = results['true_positives']
    return (tn, fp, fn, tp)

  def _eval_to_mconf(ds):
    new_eval_state = eval_process.set_model_weights(eval_state, model_weights)
    eval_output = eval_process.next(new_eval_state, [ds])
    metrics = eval_output.metrics['client_work']['eval']['current_round_metrics']
    df = pd.DataFrame({'Round': round, 'NoiseMultiplier': noise_multiplier, **metrics}, index=[0])
    return _createConfMatrix(df)

  def _get_all_mconf(input_dict):
    output_dict = {}
    for group, ds in input_dict.items():
      output_dict[group] = _eval_to_mconf(ds)

    return output_dict

  def _measure_fairness_for_all_constellations(mconf_dict):
    for key, current_tuple in mconf_dict.items():
      mconf_u = current_tuple

      other_tuples = [v for k, v in mconf_dict.items() if k != key]
      mconf_p = tuple(sum(x) for x in zip(*other_tuples))

      print('group: ', key)
      results = measureFairness(mconf_p, mconf_u)
      results['noise_multiplier'] = noise_multiplier

      append_dict_to_csv(results, results_path + key + '.csv')

  def append_dict_to_csv(input_dict, file_path):

    with open(file_path, mode='a', newline='') as file:
        writer = csv.writer(file)

        if file.tell() == 0:
            writer.writerow(input_dict.keys())

        writer.writerow(input_dict.values())

  for test_data in [test_data_preg, test_data_bmi, test_data_age]:
    mconf_dict = _get_all_mconf(test_data)
    print('measuring fairness on ' + str(test_data))
    _measure_fairness_for_all_constellations(mconf_dict)


## Baseline training

In [None]:
def train_baseline(rounds, data_frame):

  learning_process = tff.learning.algorithms.build_unweighted_fed_avg(
        my_model_fn,
        client_optimizer_fn=lambda: tf.keras.optimizers.SGD(0.1),
        server_optimizer_fn=lambda: tf.keras.optimizers.SGD(1.0, momentum=0.9))

  eval_process = tff.learning.algorithms.build_fed_eval(my_model_fn)
  state = learning_process.initialize()
  eval_state = eval_process.initialize()

  all_clients = train_data.client_ids
  all_train_data = [
        train_data.create_tf_dataset_for_client(client)
        for client in all_clients
    ]

  for round in range(rounds):
    if round % 5 == 0:
      model_weights = learning_process.get_model_weights(state)
      eval_state = eval_process.set_model_weights(eval_state, model_weights)
      eval_output = eval_process.next(eval_state, [test_data])
      metrics = eval_output.metrics['client_work']['eval']['current_round_metrics']
      if round < 25 or round % 25 == 0:
        print(f'Round {round:3d}: {metrics}')
      data_frame = pd.concat([data_frame, pd.DataFrame({'round': round,
                                      **metrics}, index=[0])], ignore_index=True)

    # model update
    result = learning_process.next(state, all_train_data)
    state = result.state
    metrics = result.metrics


  model_weights = learning_process.get_model_weights(state)
  eval_state = eval_process.set_model_weights(eval_state, model_weights)
  eval_output = eval_process.next(eval_state, [test_data])
  metrics = eval_output.metrics['client_work']['eval']['current_round_metrics']
  print(f'Round {rounds:3d}: {metrics}')
  data_frame = pd.concat([data_frame, pd.DataFrame({'round': round,
                                      **metrics}, index=[0])], ignore_index=True)

  evaluate_fairness(eval_process, eval_state, model_weights)

  return data_frame

In [None]:
df = pd.DataFrame()
rounds = 200
results_path = path + 'baseline/'

for exp in range(EXP_NO):
  print(f'Starting training with experiment: {exp}')
  df = train_baseline(rounds, df)

# add f1 score
df['f1_score'] = 2 * (df['precision'] * df['recall']) / (df['precision'] + df['recall'])

# save results
df.to_csv(results_path + 'performance_complete.csv', index=False)

## DP Training

In [None]:
def train(rounds, noise_multiplier, data_frame, samples_per_round=train_samples):
  accountant = dp_accounting.rdp.RdpAccountant()

  aggregation_factory = tff.learning.model_update_aggregator.dp_aggregator(
      noise_multiplier, samples_per_round)

  learning_process = tff.learning.algorithms.build_unweighted_fed_avg(
        my_model_fn,
        client_optimizer_fn=lambda: tf.keras.optimizers.SGD(0.1),
        server_optimizer_fn=lambda: tf.keras.optimizers.SGD(1.0, momentum=0.9),
        model_aggregator=aggregation_factory)

  eval_process = tff.learning.algorithms.build_fed_eval(my_model_fn)

  state = learning_process.initialize()
  eval_state = eval_process.initialize()

  all_clients = train_data.client_ids
  all_train_data = [
        train_data.create_tf_dataset_for_client(client)
        for client in all_clients
    ]

  sampling_probability = samples_per_round / TOTAL_CLIENTS

  for round in range(rounds):
    if round % 5 == 0:
      model_weights = learning_process.get_model_weights(state)
      eval_state = eval_process.set_model_weights(eval_state, model_weights)
      eval_output = eval_process.next(eval_state, [test_data])
      metrics = eval_output.metrics['client_work']['eval']['current_round_metrics']
      if round < 25 or round % 25 == 0:
        print(f'Round {round:3d}: {metrics}')
      data_frame = pd.concat([data_frame, pd.DataFrame({'round': round,
                                      'noise_multiplier': noise_multiplier,
                                      **metrics}, index=[0])], ignore_index=True)

    # model update
    result = learning_process.next(state, all_train_data)
    state = result.state
    metrics = result.metrics

    accountant.compose(dp_accounting.GaussianDpEvent(noise_multiplier))

  epsilon = accountant.get_epsilon(TARGET_DELTA)
  print(f"Total privacy budget (epsilon): {epsilon}")

  model_weights = learning_process.get_model_weights(state)
  eval_state = eval_process.set_model_weights(eval_state, model_weights)
  eval_output = eval_process.next(eval_state, [test_data])
  metrics = eval_output.metrics['client_work']['eval']['current_round_metrics']
  print(f'Round {rounds:3d}: {metrics}')
  print(f'Privacy budget (epsilon) spent: {epsilon:.4f}')

  data_frame = pd.concat([data_frame, pd.DataFrame({'round': round,
                                      'noise_multiplier': noise_multiplier, 'epsilon': epsilon,
                                      **metrics}, index=[0])], ignore_index=True)

  evaluate_fairness(eval_process, eval_state, model_weights, noise_multiplier)

  return data_frame

In [None]:
df = pd.DataFrame()
rounds = 200
noise_multipliers = [1.0, 5.0, 10.0, 20.0, 30.0, 40.0, 50.0, 60.0]
results_path = path + 'dp/'

for exp in range(EXP_NO):
  print(f'Starting training with experiment: {exp}')
  for noise_multiplier in noise_multipliers:
    print(f'Starting training with noise multiplier: {noise_multiplier}')
    df = train(rounds, noise_multiplier, df)
    print()

# add f1 score
df['f1_score'] = 2 * (df['precision'] * df['recall']) / (df['precision'] + df['recall'])

# save results
df.to_csv(results_path + 'performance_complete.csv', index=False)
# save just last round's results
filtered_df = df[df['round'] == df['round'].max()]
filtered_df.to_csv(results_path + 'performance.csv', index=False)