# federated-ml-health

Copyright 2020 Google LLC

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at https://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.

# Overview

This colab implements side-by-side comparison of models and their interpretations trained in a centralized machine learning (a.k.a. classical) way vs. the federated (distributed) way. Specifically, we concentrate on binary inference problems and evaluate several approaches: 

1.   regression models trained in a traditional way using various optimizers on centralized datasets
2.   equivalent models expressed in tensorflow and trained using available optimizers therein
3.   equivalent models trained in tensorflow_federated where each device keeps its data local and private
4.   all of the above with differential privacy added

In [None]:
# To install TFF and dependencies
!pip uninstall --quiet --yes tensorflow-datasets tensorflow-metadata
!pip install --quiet --upgrade nest_asyncio
!pip install --quiet --upgrade tensorflow_federated_nightly
!pip install --quiet --upgrade tensorflow_privacy

# Note: It is safe to ignore the `ERROR: datascience 0.10.6 has requirement folium==0.2.1, but you'll have folium 0.8.3 which is incompatible.` as that is an artifact of preinstalled packages in the default colab runtime.





# Data initialization

Using a public diabetes multi-factorial dataset for quick eval on a specific well-studied problem.

In [None]:
import collections
import matplotlib.pyplot as plt
import nest_asyncio
import numpy as np
import pandas as pd
import sklearn
import tempfile
import tensorflow as tf
import tensorflow_federated as tff
from collections import defaultdict 
from io import StringIO
from keras.utils import to_categorical
from matplotlib.pyplot import figure
from numpy import loadtxt
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import roc_auc_score
from sklearn.preprocessing import StandardScaler

nest_asyncio.apply()
tff.framework.set_default_context(tff.backends.native.create_thread_debugging_execution_context(clients_per_thread=50))
np.random.seed(10)
tf.random.set_seed(10)

f = tempfile.NamedTemporaryFile(mode="w")

def get_diabetes_dataset():
  # Download data from https://raw.githubusercontent.com/jbrownlee/Datasets/master/pima-indians-diabetes.names and paste to return statement here.
  return """
  """


def get_diabetes_labels():
  return [
      "intercept", "Number of times pregnant", "Plasma glucose concentration",
      "Diastolic blood pressure", "Triceps skin fold thickness",
      "2-Hour serum insulin (mu U/ml)", "BMI", "Diabetes pedigree function",
      "Age (years)", "Diabetic?"
  ]

dataset = get_diabetes_dataset()
labels = get_diabetes_labels()

f.write(dataset)
dataset = loadtxt(f.name, delimiter=",", skiprows=1)
num_col = np.size(dataset, 1)
X = dataset[:, 0:num_col-1]
y = dataset[:, num_col-1]

scaler = StandardScaler()
scaler.fit(X)
X = scaler.transform(X)

D = np.column_stack((X, y))
ds = pd.DataFrame(
    data=D,
    columns=labels[1:])

ds

# Test centralized predictive power of all models and training regimes

In [None]:
TRAIN_PROPORTION = 0.8
NUM_FEATURES = np.size(X, 1)
NUM_ROUNDS = 12
n_train = round(TRAIN_PROPORTION * np.size(X, 0))
NUM_CLIENTS = n_train
NUM_PARTICIPATING_PER_ROUND = round(NUM_CLIENTS/3)

data_train = X[:n_train]
labels_train =  y[:n_train]
data_test = X[n_train:]
labels_test =  y[n_train:]

# sklearn regression


In [None]:
sk_model = LogisticRegression(random_state=0, solver='liblinear').fit(data_train, labels_train)
proba = sk_model.predict_proba(data_test)
labels_proba = proba[:,1]
fpr_skl_liblinear, tpr_skl_liblinear, threshold_skl_liblinear = sklearn.metrics.roc_curve(labels_test, labels_proba)
roc_auc_skl_liblinear = sklearn.metrics.auc(fpr_skl_liblinear, tpr_skl_liblinear)
print(roc_auc_skl_liblinear)

sk_model = LogisticRegression(random_state=0, solver='sag').fit(data_train, labels_train)
proba = sk_model.predict_proba(data_test)
labels_proba = proba[:,1]
fpr_skl_sag, tpr_skl_sag, threshold_skl_sag = sklearn.metrics.roc_curve(labels_test, labels_proba)
roc_auc_skl_sag = sklearn.metrics.auc(fpr_skl_sag, tpr_skl_sag)
print(roc_auc_skl_sag)

sk_model = LogisticRegression(random_state=0, solver='lbfgs').fit(data_train, labels_train)
proba = sk_model.predict_proba(data_test)
labels_proba = proba[:,1]
fpr_skl, tpr_skl, threshold_skl = sklearn.metrics.roc_curve(labels_test, labels_proba)
roc_auc_skl = sklearn.metrics.auc(fpr_skl, tpr_skl)
print(roc_auc_skl)

# TF regression

Comparing weights obtained with common regression weight learning method often used in literature to those obtained with general neural network optimization (expressing regression as a special instance of neural architecture with one layer).

Adam optimization method is used to mimic the sklearn solver as close as possible (leveraging second derivatives of gradient).

In [None]:
dataset_train = tf.data.Dataset.from_tensor_slices((data_train, labels_train)).batch(n_train)
dataset_test = tf.data.Dataset.from_tensor_slices((data_test, labels_test)).batch(n_train)

def create_keras_model():
  return tf.keras.models.Sequential([
      tf.keras.layers.Dense(
          1,
          activation='sigmoid',
          input_shape=(NUM_FEATURES,),
          kernel_regularizer=tf.keras.regularizers.l2(0.01),
      )
  ])

def create_keras_model_deeper():
  initializer = tf.keras.initializers.GlorotNormal(seed=10)
  m = tf.keras.models.Sequential()
  m.add(tf.keras.Input(shape=(NUM_FEATURES,)))
  m.add(tf.keras.layers.Dense(6, activation='sigmoid', kernel_initializer=initializer))
  m.add(tf.keras.layers.Dense(3, activation='sigmoid', kernel_initializer=initializer))
  m.add(tf.keras.layers.Dense(1, activation='sigmoid', kernel_initializer=initializer, kernel_regularizer=tf.keras.regularizers.L1L2(l1=0.0001, l2=0.01)))
  return m
  

tf_model = create_keras_model()
tf_model.compile(
              optimizer=tf.keras.optimizers.Nadam(learning_rate=0.5),   
              loss=tf.keras.losses.BinaryCrossentropy(),
              metrics=[
                       tf.keras.metrics.BinaryAccuracy(name='accuracy'),
                       tf.keras.metrics.AUC(name='auc'),
                       ]
              )
tf_model.fit(dataset_train, validation_data=dataset_test, epochs=NUM_ROUNDS, batch_size=NUM_PARTICIPATING_PER_ROUND, verbose=1, use_multiprocessing=True)

labels_proba = tf_model.predict(dataset_test)
fpr_tf, tpr_tf, threshold = sklearn.metrics.roc_curve(labels_test, labels_proba)
roc_auc_tf = sklearn.metrics.auc(fpr_tf, tpr_tf)

# TF Federated regression

Optimizing the same model architecture as above in the TF case, but here trained in a federated (distributed) way.

In [None]:
def create_client_dataset(data, labels):
  client_ids = range(len(data))

  def create_dataset_fn(client_id):
    return tf.data.Dataset.from_tensor_slices((data[client_id:client_id+1,:], labels[client_id:client_id+1]))

  return tff.simulation.ClientData.from_clients_and_fn(
      client_ids=client_ids,
      create_tf_dataset_for_client_fn=create_dataset_fn)
  
def preprocess(dataset):
  return dataset.repeat(1).batch(1)

def make_federated_data(client_data, client_ids):
  return [
      preprocess(client_data.create_tf_dataset_for_client(x))
      for x in client_ids
  ]

client_dataset_train = create_client_dataset(data_train, labels_train)
client_dataset_test = create_client_dataset(data_test, labels_test)

preprocessed_example_dataset = preprocess(client_dataset_train.create_tf_dataset_for_client(client_dataset_train.client_ids[0]))
print(preprocessed_example_dataset.element_spec)

def model_fn():
  keras_model = create_keras_model()
  return tff.learning.from_keras_model(
      keras_model,
      input_spec=preprocessed_example_dataset.element_spec,
      loss=tf.keras.losses.BinaryCrossentropy(),
      metrics=[tf.keras.metrics.BinaryAccuracy(name='accuracy'),
               tf.keras.metrics.AUC(name='auc')])
  
# Create TFF interative process.
iterative_process = tff.learning.build_federated_averaging_process(
    model_fn,
    client_optimizer_fn=lambda: tf.keras.optimizers.SGD(learning_rate=1.0),
    server_optimizer_fn=lambda: tf.keras.optimizers.Nadam(learning_rate=0.5),
    use_experimental_simulation_loop = True
)

state = iterative_process.initialize()
tff_model = create_keras_model()
tff_auc = defaultdict(lambda:0)

# Test various sizes of subsets of eligible devices participating in each round.
for participation in list(range(1, NUM_CLIENTS, 100)) + [NUM_CLIENTS]:
  for round_num in range(NUM_ROUNDS):
    federated_train_data = make_federated_data(client_dataset_train, np.random.choice(range(NUM_CLIENTS), size=participation, replace=False))
    state, metrics = iterative_process.next(state, federated_train_data)
    print(participation, round_num, str(metrics))
    state.model.assign_weights_to(tff_model)
    labels_proba = tff_model.predict(dataset_test)
    fpr, tpr, threshold = sklearn.metrics.roc_curve(labels_test, labels_proba)
    test_loss = tf.keras.losses.binary_crossentropy(labels_test, np.reshape(labels_proba, [-1]))
    print('validation auc={}, loss={}'.format(sklearn.metrics.auc(fpr, tpr), test_loss))
    tff_auc[100*participation/float(NUM_CLIENTS)] = max(tff_auc[100*participation/float(NUM_CLIENTS)], sklearn.metrics.auc(fpr, tpr))

In [None]:
state.model.assign_weights_to(tff_model)
labels_proba = tff_model.predict(dataset_test)
fpr_tff_sgd, tpr_tff_sgd, threshold_tff_sgd = sklearn.metrics.roc_curve(labels_test, labels_proba)
roc_auc_tff_sgd = sklearn.metrics.auc(fpr_tff_sgd, tpr_tff_sgd)

# TFF regression with DP

In [None]:
print(preprocessed_example_dataset.element_spec)

tff_dp_auc = defaultdict(lambda:0)

# Test various sizes of subsets of eligible devices participating in each round.
for participation in list(range(1, NUM_CLIENTS, 100)) + [NUM_CLIENTS]:
  # DP process depends on participation rate to select noise scale.
  dp_query = tff.utils.build_dp_query(
      clip=0.1,
      noise_multiplier=29.3,
      expected_total_weight=participation,
      adaptive_clip_learning_rate=0,
      target_unclipped_quantile=0.5,
      clipped_count_budget_allocation=0.05,
      expected_clients_per_round=participation,
      per_vector_clipping=True,
      geometric_clip_update=True,
      model=model_fn())

  weights_type = tff.learning.framework.weights_type_from_model(model_fn)
  dp_aggregation_process = tff.utils.build_dp_aggregate_process(
      weights_type.trainable, dp_query)

  # Create TFF interative process.
  iterative_process = tff.learning.build_federated_averaging_process(
      model_fn,
      client_optimizer_fn=lambda: tf.keras.optimizers.SGD(learning_rate=1.0),
      server_optimizer_fn=lambda: tf.keras.optimizers.Nadam(learning_rate=0.5),
      use_experimental_simulation_loop = True,
      aggregation_process=dp_aggregation_process,
  )

  state = iterative_process.initialize()

  for round_num in range(10):
    federated_train_data = make_federated_data(client_dataset_train, np.random.choice(range(NUM_CLIENTS), size=participation, replace=False))
    state, metrics = iterative_process.next(state, federated_train_data)
    print(participation, round_num, str(metrics))
    tff_dp_model = create_keras_model() # reassigning weights to the same model was causing a bug
    state.model.assign_weights_to(tff_dp_model)
    labels_proba = tff_dp_model.predict(dataset_test)
    fpr, tpr, threshold = sklearn.metrics.roc_curve(labels_test, labels_proba)
    test_loss = tf.keras.losses.binary_crossentropy(labels_test, np.reshape(labels_proba, [-1]))
    print('validation auc={}, loss={}'.format(sklearn.metrics.auc(fpr, tpr), test_loss))
    tff_dp_auc[100*participation/float(NUM_CLIENTS)] = max(tff_dp_auc[100*participation/float(NUM_CLIENTS)], sklearn.metrics.auc(fpr, tpr))

In [None]:
state.model.assign_weights_to(tff_dp_model)
labels_proba = tff_dp_model.predict(dataset_test)
fpr_tff_dp_sgd, tpr_tff_dp_sgd, threshold_tff_dp_sgd = sklearn.metrics.roc_curve(labels_test, labels_proba)
roc_auc_tff_dp_sgd = sklearn.metrics.auc(fpr_tff_dp_sgd, tpr_tff_dp_sgd)

# TF regression with differential privacy

This is needed to make TF Privacy work in TF 2.0+. Specifically, wrapping the optimizer.

In [None]:
from absl import logging
import collections

from tensorflow_privacy.privacy.analysis import privacy_ledger
from tensorflow_privacy.privacy.dp_query import gaussian_query

def make_optimizer_class(cls):
  """Constructs a DP optimizer class from an existing one."""
  parent_code = tf.compat.v1.train.Optimizer.compute_gradients.__code__
  child_code = cls.compute_gradients.__code__
  GATE_OP = tf.compat.v1.train.Optimizer.GATE_OP  # pylint: disable=invalid-name
  if child_code is not parent_code:
    logging.warning(
        'WARNING: Calling make_optimizer_class() on class %s that overrides '
        'method compute_gradients(). Check to ensure that '
        'make_optimizer_class() does not interfere with overridden version.',
        cls.__name__)

  class DPOptimizerClass(cls):
    """Differentially private subclass of given class cls."""

    _GlobalState = collections.namedtuple(
      '_GlobalState', ['l2_norm_clip', 'stddev'])
    
    def __init__(
        self,
        dp_sum_query,
        num_microbatches=None,
        unroll_microbatches=False,
        *args,  # pylint: disable=keyword-arg-before-vararg, g-doc-args
        **kwargs):
      """Initialize the DPOptimizerClass.

      Args:
        dp_sum_query: DPQuery object, specifying differential privacy
          mechanism to use.
        num_microbatches: How many microbatches into which the minibatch is
          split. If None, will default to the size of the minibatch, and
          per-example gradients will be computed.
        unroll_microbatches: If true, processes microbatches within a Python
          loop instead of a tf.while_loop. Can be used if using a tf.while_loop
          raises an exception.
      """
      super(DPOptimizerClass, self).__init__(*args, **kwargs)
      self._dp_sum_query = dp_sum_query
      self._num_microbatches = num_microbatches
      self._global_state = self._dp_sum_query.initial_global_state()
      self._unroll_microbatches = unroll_microbatches

    def compute_gradients(self,
                          loss,
                          var_list,
                          gate_gradients=GATE_OP,
                          aggregation_method=None,
                          colocate_gradients_with_ops=False,
                          grad_loss=None,
                          gradient_tape=None,
                          curr_noise_mult=0,
                          curr_norm_clip=1):

      self._dp_sum_query = gaussian_query.GaussianSumQuery(curr_norm_clip, 
                                                           curr_norm_clip*curr_noise_mult)
      self._global_state = self._dp_sum_query.make_global_state(curr_norm_clip, 
                                                                curr_norm_clip*curr_noise_mult)
      

      # TF is running in Eager mode, check we received a vanilla tape.
      if not gradient_tape:
        raise ValueError('When in Eager mode, a tape needs to be passed.')

      vector_loss = loss()
      if self._num_microbatches is None:
        self._num_microbatches = tf.shape(input=vector_loss)[0]
      sample_state = self._dp_sum_query.initial_sample_state(var_list)
      microbatches_losses = tf.reshape(vector_loss, [self._num_microbatches, -1])
      sample_params = (self._dp_sum_query.derive_sample_params(self._global_state))

      def process_microbatch(i, sample_state):
        """Process one microbatch (record) with privacy helper."""
        microbatch_loss = tf.reduce_mean(input_tensor=tf.gather(microbatches_losses, [i]))
        grads = gradient_tape.gradient(microbatch_loss, var_list)
        sample_state = self._dp_sum_query.accumulate_record(sample_params, sample_state, grads)
        return sample_state
    
      for idx in range(self._num_microbatches):
        sample_state = process_microbatch(idx, sample_state)

      if curr_noise_mult > 0:
        grad_sums, self._global_state = (self._dp_sum_query.get_noised_result(sample_state, self._global_state))
      else:
        grad_sums = sample_state

      def normalize(v):
        return v / tf.cast(self._num_microbatches, tf.float32)

      final_grads = tf.nest.map_structure(normalize, grad_sums)
      grads_and_vars = final_grads
    
      return grads_and_vars

  return DPOptimizerClass


def make_gaussian_optimizer_class(cls):
  """Constructs a DP optimizer with Gaussian averaging of updates."""

  class DPGaussianOptimizerClass(make_optimizer_class(cls)):
    """DP subclass of given class cls using Gaussian averaging."""

    def __init__(
        self,
        l2_norm_clip,
        noise_multiplier,
        num_microbatches=None,
        ledger=None,
        unroll_microbatches=False,
        *args,  # pylint: disable=keyword-arg-before-vararg
        **kwargs):
      dp_sum_query = gaussian_query.GaussianSumQuery(
          l2_norm_clip, l2_norm_clip * noise_multiplier)

      if ledger:
        dp_sum_query = privacy_ledger.QueryWithLedger(dp_sum_query,
                                                      ledger=ledger)

      super(DPGaussianOptimizerClass, self).__init__(
          dp_sum_query,
          num_microbatches,
          unroll_microbatches,
          *args,
          **kwargs)

    @property
    def ledger(self):
      return self._dp_sum_query.ledger

  return DPGaussianOptimizerClass

In [None]:
from tensorflow_privacy.privacy.analysis import compute_dp_sgd_privacy
from tensorflow_privacy.privacy.optimizers.dp_optimizer import DPGradientDescentGaussianOptimizer
from tensorflow_privacy.privacy.optimizers.dp_optimizer import DPAdamGaussianOptimizer

l2_norm_clip = 0.1
noise_multiplier = 59.3
num_microbatches = 1
learning_rate = 0.5

GradientDescentOptimizer = tf.compat.v1.train.GradientDescentOptimizer
DPGradientDescentGaussianOptimizer_NEW = make_gaussian_optimizer_class(GradientDescentOptimizer)

dp_optimizer = DPGradientDescentGaussianOptimizer_NEW(
    l2_norm_clip=l2_norm_clip,
    noise_multiplier=noise_multiplier,
    num_microbatches=num_microbatches,
    learning_rate=learning_rate)

dp_loss = tf.keras.losses.BinaryCrossentropy()

tf_dp_model = create_keras_model()
tf_dp_model.compile(
              optimizer=dp_optimizer,   
              loss=dp_loss,
              metrics=[
                       tf.keras.metrics.BinaryAccuracy(name='accuracy'),
                       tf.keras.metrics.AUC(name='auc'),
                       ]
              )
tf_dp_model.fit(dataset_train, validation_data=dataset_test, epochs=NUM_ROUNDS, batch_size=NUM_PARTICIPATING_PER_ROUND, verbose=1, use_multiprocessing=True)

labels_proba = tf_dp_model.predict(dataset_test)
fpr_tf_dp, tpr_tf_dp, threshold = sklearn.metrics.roc_curve(labels_test, labels_proba)
roc_auc_tf_dp = sklearn.metrics.auc(fpr_tf_dp, tpr_tf_dp)

compute_dp_sgd_privacy.compute_dp_sgd_privacy(n=n_train, batch_size=NUM_PARTICIPATING_PER_ROUND, noise_multiplier=noise_multiplier, epochs=NUM_ROUNDS, delta=1/n_train)

# Predictive power comparison of all models

In [None]:
figure(num=None, figsize=(8, 6), dpi=150, facecolor='w', edgecolor='k')
plt.title('ROC')
plt.plot(fpr_skl_liblinear, tpr_skl_liblinear, label = 'Sklearn LR liblinear AUC = %0.3f' % roc_auc_skl_liblinear)
plt.plot(fpr_skl_sag, tpr_skl_sag, label = 'Sklearn LR SAG AUC = %0.3f' % roc_auc_skl_sag)
plt.plot(fpr_tf, tpr_tf, label = 'TF Centralized LR AUC = %0.3f' % roc_auc_tf)
plt.plot(fpr_tf_dp, tpr_tf_dp, label = 'TF Centralized LR with DP AUC = %0.3f' % roc_auc_tf_dp)
plt.plot(fpr_tff_sgd, tpr_tff_sgd, label = 'TF Federated LR SGDM AUC = %0.3f' % roc_auc_tff_sgd)
plt.plot(fpr_tff_dp_sgd, tpr_tff_dp_sgd, label = 'TF Federated LR with DP SGDM AUC = %0.3f' % roc_auc_tff_dp_sgd)
plt.legend(loc = 'lower right')
plt.plot([0, 1], [0, 1],'r--')
plt.xlim([0, 1])
plt.ylim([0, 1])
plt.ylabel('True Positive Rate')
plt.xlabel('False Positive Rate')
plt.show()

In [None]:
figure(num=None, figsize=(8, 6), dpi=150, facecolor='w', edgecolor='k')
plt.title('AUC as a function of participation of devices')
s = sorted(tff_auc.items(), key = lambda kv:(kv[0], kv[1]))
x_data = [o[0] for o in s]
y_data = [o[1] for o in s]
plt.plot(x_data, y_data, label = 'TF Federated LR SGDM')
plt.legend(loc = 'lower right')
plt.ylabel('AUC achieved')
plt.xlabel('% of participating devices (random subset in each round)')
plt.show()

# Model parameters comparison

In [None]:
output_sklearn = sk_model.coef_[0]
output_sklearn = np.insert(output_sklearn, 0, sk_model.intercept_)

output_tf = tf_model.get_weights()[0]
output_tf = np.insert(output_tf, 0, tf_model.get_weights()[1])

output_tff = tff_model.get_weights()[0]
output_tff = np.insert(output_tff, 0, tff_model.get_weights()[1])

print(output_sklearn)
print(output_tf)
print(output_tff)

# Performance grid eval

In [None]:
import time
from sklearn import datasets

def create_keras_model_benchmark(input_dim):
  return tf.keras.models.Sequential([
      tf.keras.layers.Dense(
          1,
          activation='sigmoid',
          input_shape=(input_dim,),
          kernel_regularizer=tf.keras.regularizers.l2(0.01),
      )
  ])

def model_builder(input_dim, input_spec):
  keras_model = tf.keras.models.Sequential([
      tf.keras.layers.Dense(
          1,
          activation='sigmoid',
          input_shape=(input_dim,),
          kernel_regularizer=tf.keras.regularizers.l2(0.01),
      )
  ])
  return tff.learning.from_keras_model(
      keras_model,
      input_spec=input_spec,
      loss=tf.keras.losses.BinaryCrossentropy(),
      metrics=[tf.keras.metrics.BinaryAccuracy(name='accuracy'),
               tf.keras.metrics.AUC(name='auc')])


# Synthetic dataset
MAX_NUM_EXAMPLES = 500
INCREMENT_EXAMPLES = 100
MAX_NUM_FEATURES = 200
INCREMENT_FEATURES = 50

eval_data = np.zeros((round(MAX_NUM_EXAMPLES/INCREMENT_EXAMPLES), round(MAX_NUM_FEATURES/INCREMENT_FEATURES))) 

for e, examples_size in enumerate(list(range(INCREMENT_EXAMPLES, MAX_NUM_EXAMPLES+1, INCREMENT_EXAMPLES))):
  for f, features_size in enumerate(list(range(INCREMENT_FEATURES, MAX_NUM_FEATURES+1, INCREMENT_FEATURES))):
    dataset, labels = sklearn.datasets.make_classification(n_samples=examples_size, n_features=features_size, n_classes=2, weights=[0.75])
    n_train = round(TRAIN_PROPORTION * np.size(dataset, 0))
    data_train = dataset[:n_train]
    labels_train =  labels[:n_train]
    data_test = dataset[n_train:]
    labels_test =  labels[n_train:]

    client_dataset_train = create_client_dataset(data_train, labels_train)
    client_dataset_test = create_client_dataset(data_test, labels_test)
    preprocessed_example_dataset_bench = preprocess(client_dataset_train.create_tf_dataset_for_client(client_dataset_train.client_ids[0]))

    # Create TFF interative process.
    iterative_process = tff.learning.build_federated_averaging_process(
        model_fn=lambda: model_builder(input_dim=features_size, input_spec=preprocessed_example_dataset_bench.element_spec),
        client_optimizer_fn=lambda: tf.keras.optimizers.SGD(learning_rate=1.0),
        server_optimizer_fn=lambda: tf.keras.optimizers.Nadam(learning_rate=0.5),
        use_experimental_simulation_loop = True
    )

    state = iterative_process.initialize()
    tff_model_bench = create_keras_model_benchmark(features_size)
    federated_train_data = make_federated_data(client_dataset_train, range(n_train))

    start_t = time.process_time()
    state, metrics = iterative_process.next(state, federated_train_data)
    elapsed_time = time.process_time() - start_t
    eval_data[e][f] = elapsed_time

    print(examples_size, features_size, elapsed_time, str(metrics))
    state.model.assign_weights_to(tff_model_bench)
    labels_proba = tff_model_bench.predict(data_test)
    fpr, tpr, threshold = sklearn.metrics.roc_curve(labels_test, labels_proba)
    test_loss = tf.keras.losses.binary_crossentropy(labels_test, np.reshape(labels_proba, [-1]))
    print('validation auc={}, loss={}'.format(sklearn.metrics.auc(fpr, tpr), test_loss))

In [None]:
import matplotlib.pyplot as plt
x_axis = list(range(INCREMENT_FEATURES, MAX_NUM_FEATURES+1, INCREMENT_FEATURES))
y_axis = list(range(INCREMENT_EXAMPLES, MAX_NUM_EXAMPLES+1, INCREMENT_EXAMPLES))
plt.figure(figsize=(6, 6), dpi=150)
plt.title('Runtime [s] as a function of number of clients and features')
color_map = plt.imshow(eval_data)
color_map.set_cmap("Blues_r")
plt.colorbar()
plt.ylabel('# Examples')
plt.xlabel('# Features')
plt.yticks(ticks=range(len(y_axis)), labels=y_axis)
plt.xticks(ticks=range(len(x_axis)), labels=x_axis)


# Runtime as a function of number of clients

In [None]:
import time
from sklearn import datasets


def create_keras_model_benchmark(input_dim):
  return tf.keras.models.Sequential([
      tf.keras.layers.Dense(
          1,
          activation='sigmoid',
          input_shape=(input_dim,),
          kernel_regularizer=tf.keras.regularizers.l2(0.01),
      )
  ])

def model_builder(input_dim, input_spec):
  keras_model = tf.keras.models.Sequential([
      tf.keras.layers.Dense(
          1,
          activation='sigmoid',
          input_shape=(input_dim,),
          kernel_regularizer=tf.keras.regularizers.l2(0.01),
      )
  ])
  return tff.learning.from_keras_model(
      keras_model,
      input_spec=input_spec,
      loss=tf.keras.losses.BinaryCrossentropy(),
      metrics=[tf.keras.metrics.BinaryAccuracy(name='accuracy'),
               tf.keras.metrics.AUC(name='auc')])


# Synthetic dataset
MAX_NUM_EXAMPLES = 10000
INCREMENT_EXAMPLES = 1000
MAX_NUM_FEATURES = 60
INCREMENT_FEATURES = 60

eval_data = np.zeros((round(MAX_NUM_EXAMPLES/INCREMENT_EXAMPLES), round(MAX_NUM_FEATURES/INCREMENT_FEATURES))) 

for e, examples_size in enumerate(list(range(INCREMENT_EXAMPLES, MAX_NUM_EXAMPLES+1, INCREMENT_EXAMPLES))):
  for f, features_size in enumerate(list(range(INCREMENT_FEATURES, MAX_NUM_FEATURES+1, INCREMENT_FEATURES))):
    dataset, labels = sklearn.datasets.make_classification(n_samples=examples_size, n_features=features_size, n_classes=2, weights=[0.75])
    n_train = round(TRAIN_PROPORTION * np.size(dataset, 0))
    data_train = dataset[:n_train]
    labels_train =  labels[:n_train]
    data_test = dataset[n_train:]
    labels_test =  labels[n_train:]

    client_dataset_train = create_client_dataset(data_train, labels_train)
    client_dataset_test = create_client_dataset(data_test, labels_test)
    preprocessed_example_dataset_bench = preprocess(client_dataset_train.create_tf_dataset_for_client(client_dataset_train.client_ids[0]))

    # Create TFF interative process.
    iterative_process = tff.learning.build_federated_averaging_process(
        model_fn=lambda: model_builder(input_dim=features_size, input_spec=preprocessed_example_dataset_bench.element_spec),
        client_optimizer_fn=lambda: tf.keras.optimizers.SGD(learning_rate=1.0),
        server_optimizer_fn=lambda: tf.keras.optimizers.Nadam(learning_rate=0.5),
        use_experimental_simulation_loop = True
    )

    state = iterative_process.initialize()
    tff_model_bench = create_keras_model_benchmark(features_size)
    federated_train_data = make_federated_data(client_dataset_train, range(n_train))

    start_t = time.process_time()
    state, metrics = iterative_process.next(state, federated_train_data)
    elapsed_time = time.process_time() - start_t
    eval_data[e][f] = elapsed_time
    print(examples_size, features_size, elapsed_time)



In [None]:
figure(num=None, figsize=(8, 6), dpi=150, facecolor='w', edgecolor='k')
x_axis = list(range(INCREMENT_EXAMPLES, MAX_NUM_EXAMPLES+1, INCREMENT_EXAMPLES))
plt.plot(x_axis, eval_data)
plt.ylabel('Runtime [s]')
plt.xlabel('Number of training clients')
plt.show()