<a href="https://colab.research.google.com/github/UmarBalak/Federated-Learning-with-TensorFlow/blob/main/TensorFlow_Fedetared_Learning.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install tensorflow_federated



In [None]:
import collections
import tensorflow as tf
import tensorflow_federated as tff
import numpy as np
np.random.seed(0)

# ***MNIST***

In [None]:
emnist_train, emnist_test = tff.simulation.datasets.emnist.load_data()

In [None]:
len(emnist_train.client_ids)

In [None]:
emnist_train.element_type_structure

In [None]:
example_dataset = emnist_train.create_tf_dataset_for_client(
    emnist_train.client_ids[0])

example_element = next(iter(example_dataset))

example_element['label'].numpy()

In [None]:
from matplotlib import pyplot as plt
plt.imshow(example_element['pixels'].numpy(), cmap='gray', aspect='equal')
plt.grid(False)
_ = plt.show()

In [None]:
## Example MNIST digits for one client
figure = plt.figure(figsize=(20, 4))
j = 0

for example in example_dataset.take(40):
  plt.subplot(4, 10, j+1)
  plt.imshow(example['pixels'].numpy(), cmap='gray', aspect='equal')
  plt.axis('off')
  j += 1

In [None]:
# Number of examples per layer for a sample of clients
f = plt.figure(figsize=(12, 7))
f.suptitle('Label Counts for a Sample of Clients')
for i in range(6):
  client_dataset = emnist_train.create_tf_dataset_for_client(
      emnist_train.client_ids[i])
  plot_data = collections.defaultdict(list)
  for example in client_dataset:
    # Append counts individually per label to make plots
    # more colorful instead of one color per plot.
    label = example['label'].numpy()
    plot_data[label].append(label)
  plt.subplot(2, 3, i+1)
  plt.title('Client {}'.format(i))
  for j in range(10):
    plt.hist(
        plot_data[j],
        density=False,
        bins=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10])

In [None]:
# Each client has different mean images, meaning each client will be nudging
# the model in their own directions locally.

for i in range(5):
  client_dataset = emnist_train.create_tf_dataset_for_client(
      emnist_train.client_ids[i])
  plot_data = collections.defaultdict(list)
  for example in client_dataset:
    plot_data[example['label'].numpy()].append(example['pixels'].numpy())
  f = plt.figure(i, figsize=(12, 5))
  f.suptitle("Client #{}'s Mean Image Per Label".format(i))
  for j in range(10):
    mean_img = np.mean(plot_data[j], 0)
    plt.subplot(2, 5, j+1)
    plt.imshow(mean_img.reshape((28, 28)))
    plt.axis('off')

In [None]:
NUM_CLIENTS = 10
NUM_EPOCHS = 5
BATCH_SIZE = 20
SHUFFLE_BUFFER = 100
PREFETCH_BUFFER = 10

def preprocess(dataset):

  def batch_format_fn(element):
    """Flatten a batch `pixels` and return the features as an `OrderedDict`."""
    return collections.OrderedDict(
        x=tf.reshape(element['pixels'], [-1, 784]),
        y=tf.reshape(element['label'], [-1, 1]))

  return dataset.repeat(NUM_EPOCHS).shuffle(SHUFFLE_BUFFER, seed=1).batch(
      BATCH_SIZE).map(batch_format_fn).prefetch(PREFETCH_BUFFER)

In [None]:
preprocessed_example_dataset = preprocess(example_dataset)

sample_batch = tf.nest.map_structure(lambda x: x.numpy(),
                                     next(iter(preprocessed_example_dataset)))

sample_batch

In [None]:
def make_federated_data(client_data, client_ids):
  return [
      preprocess(client_data.create_tf_dataset_for_client(x))
      for x in client_ids
  ]

In [None]:
sample_clients = emnist_train.client_ids[0:NUM_CLIENTS]

federated_train_data = make_federated_data(emnist_train, sample_clients)

print(f'Number of client datasets: {len(federated_train_data)}')
print(f'First dataset: {federated_train_data[0]}')

In [None]:
def create_keras_model():
  return tf.keras.models.Sequential([
      tf.keras.layers.InputLayer(input_shape=(784,)),
      tf.keras.layers.Dense(10, kernel_initializer='zeros'),
      tf.keras.layers.Softmax(),
  ])

In [None]:
def model_fn():
  # We _must_ create a new model here, and _not_ capture it from an external
  # scope. TFF will call this within different graph contexts.
  keras_model = create_keras_model()
  return tff.learning.models.from_keras_model(
      keras_model,
      input_spec=preprocessed_example_dataset.element_spec,
      loss=tf.keras.losses.SparseCategoricalCrossentropy(),
      metrics=[tf.keras.metrics.SparseCategoricalAccuracy()])

In [None]:
training_process = tff.learning.algorithms.build_weighted_fed_avg(
    model_fn,
    client_optimizer_fn=lambda: tf.keras.optimizers.SGD(learning_rate=0.02),
    server_optimizer_fn=lambda: tf.keras.optimizers.SGD(learning_rate=1.0))

In [None]:
print(training_process.initialize.type_signature.formatted_representation())

In [None]:
train_state = training_process.initialize()

In [None]:
result = training_process.next(train_state, federated_train_data)
train_state = result.state
train_metrics = result.metrics
print('round  1, metrics={}'.format(train_metrics))

In [None]:
NUM_ROUNDS = 11
for round_num in range(2, NUM_ROUNDS):
  result = training_process.next(train_state, federated_train_data)
  train_state = result.state
  train_metrics = result.metrics
  print('round {:2d}, metrics={}'.format(round_num, train_metrics))

# ***New Dataset***

In [None]:
cifar_train, cifar_test = tff.simulation.datasets.cifar100.load_data()

In [None]:
len(cifar_train.client_ids)

In [None]:
cifar_train.element_type_structure

In [None]:
example_dataset = cifar_train.create_tf_dataset_for_client(
    cifar_train.client_ids[0])

example_element = next(iter(example_dataset))

example_element['label'].numpy()

In [None]:
from matplotlib import pyplot as plt
plt.imshow(example_element['image'].numpy(), cmap='gray', aspect='equal')
plt.grid(False)
_ = plt.show()

In [None]:
## Example MNIST digits for one client
figure = plt.figure(figsize=(20, 4))
j = 0

for example in example_dataset.take(50):
  plt.subplot(5, 10, j+1)
  plt.imshow(example['image'].numpy(), cmap='gray', aspect='equal')
  plt.axis('off')
  j += 1

In [None]:
# Number of examples per layer for a sample of clients
f = plt.figure(figsize=(12, 7))
f.suptitle('Label Counts for a Sample of Clients')
for i in range(6):
  client_dataset = emnist_train.create_tf_dataset_for_client(
      emnist_train.client_ids[i])
  plot_data = collections.defaultdict(list)
  for example in client_dataset:
    # Append counts individually per label to make plots
    # more colorful instead of one color per plot.
    label = example['label'].numpy()
    plot_data[label].append(label)
  plt.subplot(2, 3, i+1)
  plt.title('Client {}'.format(i))
  for j in range(10):
    plt.hist(
        plot_data[j],
        density=False,
        bins=[i for i in range(100)])

In [None]:
NUM_CLIENTS = 10
NUM_EPOCHS = 5
BATCH_SIZE = 20
SHUFFLE_BUFFER = 100
PREFETCH_BUFFER = 10

def preprocess(dataset):

  def batch_format_fn(element):
    """Flatten a batch `pixels` and return the features as an `OrderedDict`."""
    return collections.OrderedDict(
        x=tf.reshape(element['image'], [-1, 61440]),
        y=tf.reshape(element['label'], [-1, 1]))

  return dataset.repeat(NUM_EPOCHS).shuffle(SHUFFLE_BUFFER, seed=1).batch(
      BATCH_SIZE).map(batch_format_fn).prefetch(PREFETCH_BUFFER)

In [None]:
preprocessed_example_dataset = preprocess(example_dataset)

sample_batch = tf.nest.map_structure(lambda x: x.numpy(),
                                     next(iter(preprocessed_example_dataset)))

sample_batch

In [None]:
def make_federated_data(client_data, client_ids):
  return [
      preprocess(client_data.create_tf_dataset_for_client(x))
      for x in client_ids
  ]

In [None]:
sample_clients = cifar_train.client_ids[0:NUM_CLIENTS]

federated_train_data = make_federated_data(cifar_train, sample_clients)

print(f'Number of client datasets: {len(federated_train_data)}')
print(f'First dataset: {federated_train_data[0]}')

In [None]:
def create_keras_model():
  return tf.keras.models.Sequential([
      tf.keras.layers.InputLayer(input_shape=(32, 32, 3)),
      tf.keras.layers.Dense(64, kernel_initializer='zeros'),
      tf.keras.layers.Softmax(100),
  ])

In [None]:
def model_fn():
  # We _must_ create a new model here, and _not_ capture it from an external
  # scope. TFF will call this within different graph contexts.
  keras_model = create_keras_model()
  return tff.learning.models.from_keras_model(
      keras_model,
      input_spec=preprocessed_example_dataset.element_spec,
      loss=tf.keras.losses.CategoricalCrossentropy(),
      metrics=[tf.keras.metrics.CategoricalAccuracy()])

In [None]:
training_process = tff.learning.algorithms.build_weighted_fed_avg(
    model_fn,
    client_optimizer_fn=lambda: tf.keras.optimizers.SGD(learning_rate=0.02),
    server_optimizer_fn=lambda: tf.keras.optimizers.SGD(learning_rate=1.0))

# ***cifar10 running***

In [None]:
import tensorflow as tf
import tensorflow_federated as tff
import numpy as np
import collections

In [None]:
# Load CIFAR-10 data
(train_images, train_labels), (test_images, test_labels) = tf.keras.datasets.cifar10.load_data()

# Reduce the dataset to 30%
train_size = int(0.3 * len(train_images))
test_size = int(0.3 * len(test_images))

train_images = train_images[:train_size]
train_labels = train_labels[:train_size]
test_images = test_images[:test_size]
test_labels = test_labels[:test_size]

# Function to preprocess the data
def preprocess(dataset):
    def batch_format_fn(element):
        return collections.OrderedDict(
            x=tf.cast(element['image'], tf.float32) / 255.0,  # Normalize the input images
            y=tf.cast(element['label'], tf.int32))

    return dataset.map(batch_format_fn).shuffle(100).batch(20)

# Function to create federated data
def make_federated_data(client_data, client_ids):
    return [
        preprocess(client_data.create_tf_dataset_for_client(x))
        for x in client_ids
    ]

In [None]:
# Create client data
def create_client_data(images, labels, num_clients=10):
    dataset = tf.data.Dataset.from_tensor_slices({'image': images, 'label': labels})
    client_data = tff.simulation.datasets.ClientData.from_clients_and_tf_fn(
        client_ids=[str(i) for i in range(num_clients)],
        serializable_dataset_fn=lambda client_id: dataset.shard(num_clients, int(client_id)))
    return client_data

train_data = create_client_data(train_images, train_labels)
test_data = create_client_data(test_images, test_labels)

# Select a subset of clients for training
client_ids = np.random.choice(train_data.client_ids, size=10, replace=False)
federated_train_data = make_federated_data(train_data, client_ids)


In [None]:
def create_keras_model():
    model = tf.keras.models.Sequential([
        tf.keras.layers.Conv2D(32, kernel_size=(3, 3), activation='relu', input_shape=(32, 32, 3)),
        tf.keras.layers.MaxPooling2D(pool_size=(2, 2)),
        tf.keras.layers.Conv2D(64, kernel_size=(3, 3), activation='relu'),
        tf.keras.layers.MaxPooling2D(pool_size=(2, 2)),
        tf.keras.layers.Flatten(),
        tf.keras.layers.Dense(128, activation='relu'),
        tf.keras.layers.Dense(10, activation='softmax')
    ])
    return model


In [None]:
def model_fn():
    keras_model = create_keras_model()
    return tff.learning.models.from_keras_model(
        keras_model,
        input_spec=federated_train_data[0].element_spec,
        loss=tf.keras.losses.SparseCategoricalCrossentropy(),
        metrics=[tf.keras.metrics.SparseCategoricalAccuracy()])


In [None]:
iterative_process = tff.learning.algorithms.build_weighted_fed_avg(
    model_fn=model_fn,
    client_optimizer_fn=lambda: tf.keras.optimizers.SGD(learning_rate=0.02),
    server_optimizer_fn=lambda: tf.keras.optimizers.SGD(learning_rate=1.0))


In [None]:
state = iterative_process.initialize()

for round_num in range(1, 11):
    state, metrics = iterative_process.next(state, federated_train_data)
    print(f'round {round_num}, metrics={metrics}')


# **cifar10 new testing**

In [1]:
import tensorflow as tf
import tensorflow_federated as tff
import numpy as np
from sklearn.metrics import f1_score

In [25]:
# Load CIFAR-10 dataset
(cifar_train_images, cifar_train_labels), (cifar_test_images, cifar_test_labels) = tf.keras.datasets.cifar10.load_data()

# Normalize the images to [0, 1] range
cifar_train_images, cifar_test_images = cifar_train_images / 255.0, cifar_test_images / 255.0

# Create a function to preprocess the data
def preprocess(dataset, batch_size):
    def batch_format_fn(element):
        return (element['pixels'], element['label'])

    dataset = dataset.shuffle(10000).batch(batch_size)
    return dataset.map(batch_format_fn).prefetch(10)

def create_federated_data(client_data, client_ids, batch_size=32):
    return [preprocess(client_data.create_tf_dataset_for_client(x), batch_size) for x in client_ids]

# Define a function to convert numpy arrays to tf.data.Dataset
def create_tf_dataset_for_client_fn(client_id):
    client_id = int(client_id)
    num_samples_per_client = 5000 // num_clients
    start_index = client_id * num_samples_per_client
    end_index = (client_id + 1) * num_samples_per_client
    client_data = {
        'pixels': cifar_train_images[start_index:end_index],
        'label': cifar_train_labels[start_index:end_index]
    }
    return tf.data.Dataset.from_tensor_slices(client_data)

# Create federated data
num_clients = 10
client_ids = [str(i) for i in range(num_clients)]
client_data = tff.simulation.datasets.ClientData.from_clients_and_tf_fn(
    client_ids=client_ids,
    serializable_dataset_fn=create_tf_dataset_for_client_fn
)

federated_train_data = create_federated_data(client_data, client_ids)

In [30]:
from tensorflow.keras import layers

def create_keras_model():
          weight_decay = 1e-4
          model = tf.keras.Sequential([
              # 1st Block
              layers.Conv2D(64, (3, 3), padding='same', kernel_regularizer=tf.keras.regularizers.l2(weight_decay), input_shape=(32, 32, 3), activation='relu', name="conv2d_1"),
              layers.BatchNormalization(name="batch_normalization_1"),
              layers.Dropout(0.3, name="dropout_1"),
              layers.Conv2D(64, (3, 3), padding='same', kernel_regularizer=tf.keras.regularizers.l2(weight_decay), activation='relu', name="conv2d_2"),
              layers.BatchNormalization(name="batch_normalization_2"),
              layers.MaxPooling2D(pool_size=(2, 2), name="max_pooling2d_1"),

              # 2nd Block
              layers.Conv2D(128, (3, 3), padding='same', kernel_regularizer=tf.keras.regularizers.l2(weight_decay), activation='relu', name="conv2d_3"),
              layers.BatchNormalization(name="batch_normalization_3"),
              layers.Dropout(0.4, name="dropout_2"),
              layers.Conv2D(128, (3, 3), padding='same', kernel_regularizer=tf.keras.regularizers.l2(weight_decay), activation='relu', name="conv2d_4"),
              layers.BatchNormalization(name="batch_normalization_4"),
              layers.MaxPooling2D(pool_size=(2, 2), name="max_pooling2d_2"),

              # 3rdBlock
              layers.Conv2D(128, (3, 3), padding='same', kernel_regularizer=tf.keras.regularizers.l2(weight_decay), activation='relu', name="conv2d_5"),
              layers.BatchNormalization(name="batch_normalization_5"),
              layers.Conv2D(128, (3, 3), padding='same', kernel_regularizer=tf.keras.regularizers.l2(weight_decay), activation='relu', name="conv2d_6"),
              layers.BatchNormalization(name="batch_normalization_6"),
              layers.MaxPooling2D(pool_size=(2, 2), name="max_pooling2d_3"),
              layers.Dropout(0.4, name="dropout_3"),

              # Fully Connected Layers
              layers.Flatten(name="flatten_1"),
              layers.Dense(256, kernel_regularizer=tf.keras.regularizers.l2(weight_decay), activation='relu', name="dense_1"),
              layers.BatchNormalization(name="batch_normalization_7"),
              layers.Dropout(0.5, name="dropout_4"),
              layers.Dense(10, activation='softmax', name="dense_2")
          ])

          return model

def model_fn():
    keras_model = create_keras_model()
    return tff.learning.models.from_keras_model(
        keras_model,
        input_spec=federated_train_data[0].element_spec,
        loss=tf.keras.losses.SparseCategoricalCrossentropy(),
        metrics=[tf.keras.metrics.SparseCategoricalAccuracy()])


In [31]:
iterative_process = tff.learning.algorithms.build_weighted_fed_avg(
    model_fn=model_fn,
    client_optimizer_fn=lambda: tf.keras.optimizers.SGD(learning_rate=0.01),
    server_optimizer_fn=lambda: tf.keras.optimizers.SGD(learning_rate=1.0)
)


Instructions for updating:
Colocations handled automatically by placer.


In [None]:
state = iterative_process.initialize()

for round_num in range(1, 11):  # Run for 10 rounds
    state, metrics = iterative_process.next(state, federated_train_data)
    print(f'Round {round_num}, Metrics={metrics}')


Round 1, Metrics=OrderedDict([('distributor', ()), ('client_work', OrderedDict([('train', OrderedDict([('sparse_categorical_accuracy', 0.1262), ('loss', 3.4419503), ('num_examples', 5000), ('num_batches', 160)]))])), ('aggregator', OrderedDict([('mean_value', ()), ('mean_weight', ())])), ('finalizer', OrderedDict([('update_non_finite', 0)]))])
Round 2, Metrics=OrderedDict([('distributor', ()), ('client_work', OrderedDict([('train', OrderedDict([('sparse_categorical_accuracy', 0.1534), ('loss', 3.2115765), ('num_examples', 5000), ('num_batches', 160)]))])), ('aggregator', OrderedDict([('mean_value', ()), ('mean_weight', ())])), ('finalizer', OrderedDict([('update_non_finite', 0)]))])
Round 3, Metrics=OrderedDict([('distributor', ()), ('client_work', OrderedDict([('train', OrderedDict([('sparse_categorical_accuracy', 0.1742), ('loss', 3.0204482), ('num_examples', 5000), ('num_batches', 160)]))])), ('aggregator', OrderedDict([('mean_value', ()), ('mean_weight', ())])), ('finalizer', Order

In [None]:
def evaluate_model(model, test_data):
    predictions = model.predict(test_data.map(lambda x, y: x))
    y_true = np.concatenate([y for x, y in test_data], axis=0)
    y_pred = np.argmax(predictions, axis=1)

    accuracy = np.mean(y_pred == y_true.squeeze())
    f1 = f1_score(y_true, y_pred, average='macro')

    return accuracy, f1

# Create and preprocess the test dataset
test_data = tf.data.Dataset.from_tensor_slices((cifar_test_images, cifar_test_labels)).batch(32)

# Load the final global model
global_model = create_keras_model()
# state.model.assign_weights_to(global_model)
iterative_process.get_model_weights(state)

# Evaluate the model
accuracy, f1 = evaluate_model(global_model, test_data)
print(f'Accuracy: {accuracy:.4f}, F1 Score: {f1:.4f}')
