<a href="https://colab.research.google.com/github/aissahm/FedSeleKDistill/blob/main/FedSeleKDistill.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import random
import numpy as np
from tensorflow import keras

#given the dataset X, Y, the object with indexes for every client, returns the dataset of client identified with its client_id
def returnClientDataset(client_id, clients_data_obj, x, y):
  dataset_indexes = np.array(clients_data_obj[client_id]["indexes"])
  return [x[dataset_indexes], y[dataset_indexes]]

In [None]:
import pickle

number_clients = 30

(x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data()

clients_datasets_obj_filename = "/content/MNIST_30_clients_70percent_main_class.pickle"

clients_datasets_obj = pickle.load( open(clients_datasets_obj_filename, "rb" ) )

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz


In [None]:
clients_datasets_obj[0]

In [None]:
# Model / data parameters
num_classes = 10
input_shape = (28, 28, 1)

# Load the data and split it between train and test sets
(x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data()

# Scale images to the [0, 1] range
x_train = x_train.astype("float32") / 255
x_test = x_test.astype("float32") / 255
# Make sure images have shape (28, 28, 1)
x_train = np.expand_dims(x_train, -1)
x_test = np.expand_dims(x_test, -1)
print("x_train shape:", x_train.shape)
print(x_train.shape[0], "train samples")
print(x_test.shape[0], "test samples")

# convert class vectors to binary class matrices
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)

x_train shape: (60000, 28, 28, 1)
60000 train samples
10000 test samples


In [None]:
import numpy as np
from tensorflow import keras
import tensorflow as tf
from tensorflow.keras import layers, models, losses, optimizers
import random
from matplotlib import pyplot as plt

def knowledge_distillation_loss(y_true, y_pred):
            y_true = tf.convert_to_tensor(y_true, dtype=tf.float32)

            # Ensure that y_pred has the same shape as soft targets
            y_pred = tf.convert_to_tensor(y_pred, dtype=tf.float32)

            loss_ce = losses.categorical_crossentropy(y_true, y_pred, from_logits=False)
            return loss_ce

def returnInitialGlobalModel():
  model = keras.Sequential(
    [
        keras.Input(shape=input_shape),
        layers.Conv2D(32, kernel_size=(3, 3), activation="relu"),
        layers.MaxPooling2D(pool_size=(2, 2)),
        layers.Conv2D(64, kernel_size=(3, 3), activation="relu"),
        layers.MaxPooling2D(pool_size=(2, 2)),
        layers.Flatten(),
        layers.Dropout(0.5),
        layers.Dense(num_classes, activation="softmax"),
    ]
)
  #model.summary()
  model.compile(loss=knowledge_distillation_loss, optimizer="adam", metrics=["accuracy"])
  return model

#returns a copy of the global model to client
def returnCopyGlobalModelToClient(globalmodel):
  clientmodel = returnInitialGlobalModel()
  clientmodel.set_weights(globalmodel.get_weights())
  return clientmodel

#Given the weights after training and initial weights, returns the gradient from entire training
def computeClientGradientNoCompression(modelNotTrained, modelTrained):
  gradient = []
  notTrainedWeight = modelNotTrained.get_weights()
  i = 0
  for weight in modelTrained.get_weights():
    gradient.append( notTrainedWeight[i] - weight )
    i += 1
  return gradient

#add the client gradient to the global model
def addGradientNoCompression(modelNotTrained, gradient, client_weight):
  newWeight = []
  i = 0
  notTrainedWeight = modelNotTrained.get_weights()
  for weight in modelNotTrained.get_weights():
    newWeight.append( weight - (gradient[i] * client_weight) )
    i += 1
  modelNotTrained.set_weights(newWeight)

#Generate a random list of clients considered of length num_clients_considered
def returnRandomSelectedClientsIDsList():
  return random.sample(range(0, num_clients), num_clients_considered)

#return random clients
def returnRandomParticipatingClients(num_clients, num_participating_clients):
  return random.sample(range(0, num_clients), num_participating_clients)

#function that returns the accuracy score of the model on  the data
def evaluateGlobalModel(client_model, x, y):
  return client_model.evaluate(x, y, verbose=0)

def returnClientGradientAsVector(client_gradient):
  weights = []
  for weight in client_gradient:
      weight = weight.reshape(weight.size)
      weights.extend( weight)
  return np.array(weights)

def returnClientLambdaValueKD(i, client_accuracy, server_side_accuracy):
  return 1 - client_accuracy

#function that returns the evaluation of the global model on the selected clients
def returnWorstLossesAmongChosenClients(classifierModel, chosenClients, datasetObject):

  clients_accuracy_list = []
  j = 0

  #evaluate the classifier on each random client's data
  while j < len(chosenClients):

    client_id = chosenClients[j]
    client_x, client_y = returnClientDataset(client_id, datasetObject, x_train, y_train)
    client_evaluation_score = evaluateGlobalModel(classifierModel, client_x, client_y)
    #print(client_evaluation_score)
    clients_accuracy_list.append({"clientID": client_id, "accuracy": client_evaluation_score[1]})

    j+= 1

  #order the losses from worst to best
  clients_accuracy_list.sort(key=lambda x: x["accuracy"], reverse=False)

  return clients_accuracy_list

def returnAccuracyScoreForClient(clientID, clients_accuracy_list):
  for client_accuracy_elem in clients_accuracy_list:
    if client_accuracy_elem["client_id"] == clientID:
      return client_accuracy_elem["accuracy"]
  print('accuracy not found for' , clientID )
  return -1

In [None]:
#main code running on server side
def runTrainingNoCompression(classifierModel, accuracyScoreTarget, clientsDatasetObject, lambda_val):

  #We first evaluate the global model
  globalAccuracy = evaluateGlobalModel(classifierModel, central_server_data_x, central_server_data_y_true)

  print("Starting")
  print("dataset ", clients_datasets_obj_filename)
  print("Before training global model accuracy =", globalAccuracy)
  print()

  i = 0
  accuracy_score = 0
  per_round_global_model_accuracy = []
  per_round_global_model_loss = []
  convergence_reached_at_round = 0

  per_round_global_model_accuracy.append(globalAccuracy[1])
  per_round_global_model_loss.append(globalAccuracy[0])

  while i < max_num_rounds:

      #print("Round ", i + 1, " / Max rounds ", max_num_rounds)

      #select random clients
      random_clients = returnRandomSelectedClientsIDsList()

      #return the list of clients with loss value from worst to best
      pretraining_loss_values_obj = returnWorstLossesAmongChosenClients(classifierModel, random_clients, clientsDatasetObject)

      print("Model evaluation on clients : ", pretraining_loss_values_obj)
      #print("Selected clients :", pretraining_loss_values_obj[:num_clients_selected])

      selected_clients_grad_list = []

      round_lambda_values = []

      ##################
      #At client
      ##################

      #training classifier on each client
      for selected_client in pretraining_loss_values_obj[:num_clients_selected]:

          client_id = selected_client["clientID"]

          #training the global model on client's local data
          client_x, client_y = returnClientDataset(client_id, clientsDatasetObject, x_train, y_train)


          #####KD
          ###BEGINING KD

          # Build the teacher and student models
          teacher_model = returnCopyGlobalModelToClient(classifierModel)
          student_model = returnCopyGlobalModelToClient(classifierModel)

          # Use teacher model to generate "soft targets" for the student
          soft_target_train = teacher_model.predict(client_x)

          def student_knowledge_distillation_loss(y_true, y_pred):
            y_true = tf.convert_to_tensor(y_true, dtype=tf.float32)

            # Ensure that y_pred has the same shape as soft targets
            y_soft = tf.convert_to_tensor(soft_target_train, dtype=tf.float32)
            y_pred = tf.convert_to_tensor(y_pred, dtype=tf.float32)

            loss_ce = losses.categorical_crossentropy(y_true, y_pred, from_logits=False)

            loss_kd = tf.keras.losses.KLD(y_soft, tf.nn.softmax(y_pred / temperature))

            return (1- lambda_val)*loss_ce + lambda_val * loss_kd  # Adjust the weight for the distillation loss as needed


          #set student weights to central server weights and recompile
          student_model.compile(loss=student_knowledge_distillation_loss, optimizer="adam", metrics=["accuracy"])
          student_model.set_weights(teacher_model.get_weights())

          #training model on client data
          student_model.fit(client_x, client_y, epochs=num_iterations_per_round,
                              validation_split = validation_size ,verbose=0,  batch_size=batch_size)


          #getting the gradient from the client
          client_gradient = computeClientGradientNoCompression(classifierModel, student_model)

          #storing the gradients to be sent to central server
          selected_clients_grad_list.append( {"client_id": client_id, "clientgradient": client_gradient} )


      ##################
      #At central server
      ##################

      #updating the global model weights
      for client_gradient in selected_clients_grad_list:

          client_id = client_gradient["client_id"]
          client_x, client_y = returnClientDataset(client_id, clientsDatasetObject, x_train, y_train)
          client_weight = client_y.shape[0] / y_train.shape[0]

          addGradientNoCompression(classifierModel, client_gradient["clientgradient"], client_weight)


      #evaluating the global model on data contained in central server
      globalModelEvaluation = evaluateGlobalModel(classifierModel, central_server_data_x, central_server_data_y_true)

      globalAccuracy = globalModelEvaluation[1]
      per_round_global_model_accuracy.append(globalAccuracy)
      per_round_global_model_loss.append(globalModelEvaluation[0])

      print("Round ", i,  ", global model accuracy score = ", globalAccuracy)
      print()

      i += 1


  #plot the results
  print("Number of clients :", num_clients)
  print("Number of clients participating per round :", num_clients_considered)
  print("Number of clients training the model per round : ", num_clients_selected)
  print("Number of iterations per client per round : ", num_iterations_per_round)
  print("Number of rounds until convergence : ", convergence_reached_at_round)

  #accuracy for each client dataset
  clients_final_accuracy_list = []

  for i in range(0, num_clients):
    #get client dataset
    client_x, client_y = returnClientDataset(i, clientsDatasetObject, x_train, y_train)
    client_score = evaluateGlobalModel(classifierModel, client_x, client_y)
    clients_final_accuracy_list.append( client_score[1])
  print("clients_final_accuracy_list", clients_final_accuracy_list)

  print (per_round_global_model_accuracy)
  print (per_round_global_model_loss)

  plt.plot(per_round_global_model_accuracy)
  plt.ylabel('Accuracy per round')
  plt.show()

  plt.plot(per_round_global_model_loss)
  plt.ylabel('Loss per round')
  plt.show()

### **Setting the experiment parameters**

In [None]:
central_server_data_x, central_server_data_y_true = x_test, y_test

num_iterations_per_round = 4
validation_size = 0.15

batch_size = 1
temperature = 5

In [None]:
initial_global_model = returnInitialGlobalModel()

runTrainingNoCompression(initial_global_model, accuracy_score_target , clients_datasets_obj)