<a href="https://colab.research.google.com/github/haaslogan1/Federated-Learning/blob/main/Q2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [7]:
!pip install h5py
!pip install typing-extensions
!pip install wheel
!pip install --quiet --upgrade tensorflow-federated
!pip install --quiet --upgrade nest-asyncio

import nest_asyncio
nest_asyncio.apply()
import collections
import numpy as np
import tensorflow as tf
import tensorflow_federated as tff



In [8]:
SEED = 200230801  #TODO: set seed to stuent ID number
np.random.seed(200230801) #TODO: random number generator seed set to stuent ID number

In [2]:
# preprocess the input data 
def preprocess(dataset, epoch):
  def batch_format_fn(element):
    """Flatten a batch `pixels` and return the features as an `OrderedDict`."""
    return collections.OrderedDict(
        x=tf.reshape(element['pixels'], [-1, 784]),
        y=tf.reshape(element['label'], [-1, 1]))

  return dataset.repeat(epoch).shuffle(100, seed=SEED).batch(
      20).map(batch_format_fn).prefetch(10)

# combine data from multiple clients
def make_federated_data(client_data, client_ids, epoch):
  return [
      preprocess(client_data.create_tf_dataset_for_client(x), epoch)
      for x in client_ids
  ]

In [3]:
# download the MNIST data 
emnist_train, emnist_test = tff.simulation.datasets.emnist.load_data()
print ("Total number of clients: ",len(emnist_train.client_ids))

Downloading emnist_all.sqlite.lzma: 100%|██████████| 170507172/170507172 [00:43<00:00, 3967718.46it/s]


Total number of clients:  3383


In [9]:
# determine the sample data input data structure for ML model 
example_dataset = emnist_train.create_tf_dataset_for_client(emnist_train.client_ids[0])
preprocessed_example_dataset = preprocess(example_dataset, 0)

# Neural network model
def create_keras_model():
  return tf.keras.models.Sequential([
      tf.keras.layers.InputLayer(input_shape=(784,)),
      tf.keras.layers.Dense(10, kernel_initializer='zeros'),
      tf.keras.layers.Softmax(),
  ])
  
def model_fn():
  # We _must_ create a new model here, and _not_ capture it from an external
  # scope. TFF will call this within different graph contexts.
  keras_model = create_keras_model()
  return tff.learning.from_keras_model(
      keras_model,
      input_spec=preprocessed_example_dataset.element_spec,
      loss=tf.keras.losses.SparseCategoricalCrossentropy(),
      metrics=[tf.keras.metrics.SparseCategoricalAccuracy()])

In [None]:
NUM_CLIENTS = 5 #TODO: change number of clients as needed
NUM_EPOCHS = 5 #TODO: change the number of training epoch for local training by each client

## TODO: you need to iteratively change NUM_CLIENTS for part 'a'
## TODO: you need to iteratively change NUM_EPOCHS for part 'b'

its = [5, 50, 100]

for it in its:

  NUM_CLIENTS = it

  print('There are ' + str(NUM_CLIENTS) + ' clients')
  print('There are ' + str(NUM_EPOCHS) + ' clients')

  sample_clients = np.random.choice(emnist_train.client_ids, NUM_CLIENTS)
  print ("Client IDs selected: ", sample_clients)

  # conside data from only the selected clients
  federated_train_data = make_federated_data(emnist_train, sample_clients, NUM_EPOCHS)
  print(f'Number of client datasets considered: {len(sample_clients)}')

  # Initialize the iterative training object with the right learning parameter
  iterative_process = tff.learning.build_federated_averaging_process(
    model_fn,
    client_optimizer_fn=lambda: tf.keras.optimizers.SGD(learning_rate=0.01),
    server_optimizer_fn=lambda: tf.keras.optimizers.SGD(learning_rate=1.0))

  # initialize the parameters of the ML model (you need to initialize this each time you change the client number or epoch numer)
  state = iterative_process.initialize()

  # total number of server and client interactions
  NUM_ROUNDS = 11
  for round_num in range(1, NUM_ROUNDS):
    state, metrics = iterative_process.next(state, federated_train_data)
    print('round {:2d}, training accuracy= {}%'.format(round_num, metrics['train']['sparse_categorical_accuracy']*100))

  # evalute the latest converged model 
  evaluation = tff.learning.build_federated_evaluation(model_fn)
  federated_test_data = make_federated_data(emnist_test, sample_clients, 5)
  test_metrics = evaluation(state.model, federated_test_data)
  print('Test Accuracy: {}%'.format(str(test_metrics['eval']['sparse_categorical_accuracy']*100)))

for it in its:

  NUM_EPOCHS = it

  print('There are ' + str(NUM_CLIENTS) + ' clients')
  print('There are ' + str(NUM_EPOCHS) + ' clients')

  sample_clients = np.random.choice(emnist_train.client_ids, NUM_CLIENTS)
  print ("Client IDs selected: ", sample_clients)

  # conside data from only the selected clients
  federated_train_data = make_federated_data(emnist_train, sample_clients, NUM_EPOCHS)
  print(f'Number of client datasets considered: {len(sample_clients)}')

  # Initialize the iterative training object with the right learning parameter
  iterative_process = tff.learning.build_federated_averaging_process(
    model_fn,
    client_optimizer_fn=lambda: tf.keras.optimizers.SGD(learning_rate=0.01),
    server_optimizer_fn=lambda: tf.keras.optimizers.SGD(learning_rate=1.0))

  # initialize the parameters of the ML model (you need to initialize this each time you change the client number or epoch numer)
  state = iterative_process.initialize()

  # total number of server and client interactions
  NUM_ROUNDS = 11
  for round_num in range(1, NUM_ROUNDS):
    state, metrics = iterative_process.next(state, federated_train_data)
    print('round {:2d}, training accuracy= {}%'.format(round_num, metrics['train']['sparse_categorical_accuracy']*100))

  # evalute the latest converged model 
  evaluation = tff.learning.build_federated_evaluation(model_fn)
  federated_test_data = make_federated_data(emnist_test, sample_clients, 5)
  test_metrics = evaluation(state.model, federated_test_data)
  print('Test Accuracy: {}%'.format(str(test_metrics['eval']['sparse_categorical_accuracy']*100)))

There are 5 clients
There are 5 clients
Client IDs selected:  ['f0981_17' 'f3185_38' 'f0787_21' 'f2410_85' 'f1660_07']
Number of client datasets considered: 5
round  1, training accuracy= 13.252031803131104%
round  2, training accuracy= 13.29268366098404%
round  3, training accuracy= 15.365853905677795%
round  4, training accuracy= 20.650406181812286%
round  5, training accuracy= 21.422764658927917%
round  6, training accuracy= 27.398374676704407%
round  7, training accuracy= 27.560976147651672%
round  8, training accuracy= 31.747967004776%
round  9, training accuracy= 36.544716358184814%
round 10, training accuracy= 35.650405287742615%
Test Accuracy: 55.17241358757019%
There are 50 clients
There are 5 clients
Client IDs selected:  ['f4055_44' 'f0593_46' 'f2304_67' 'f1960_26' 'f1857_48' 'f2076_31'
 'f1302_15' 'f4001_45' 'f3572_03' 'f0771_13' 'f3620_07' 'f1376_43'
 'f0918_47' 'f0779_27' 'f3928_39' 'f1735_22' 'f4070_43' 'f1678_19'
 'f2289_62' 'f3909_46' 'f4034_28' 'f3549_47' 'f1007_22' '