<a href="https://colab.research.google.com/github/aissahm/CCSF-KD/blob/main/CCSF_KD_least_trained_clients_strategy.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **CCSF-KD, least trained client strategy**

In [None]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.datasets import mnist
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Flatten, Dense, Conv2D, MaxPooling2D
from tensorflow.keras.utils import to_categorical
import random as random
from tensorflow.keras import layers, models, losses, optimizers
import matplotlib.pyplot as plt
import numpy as np
import pickle
from sklearn.metrics import accuracy_score

In [None]:
#given the dataset X, Y, the object with indexes for every client, returns the dataset of client identified with its client_id
def returnClientDataset(client_id, clients_data_obj, x, y):
  dataset_indexes = np.array(clients_data_obj[client_id]["indexes"])
  return [x[dataset_indexes], y[dataset_indexes]]

def returnClientGradientAsVector(client_gradient):
  weights = []
  for weight in client_gradient:
      weight = weight.reshape(weight.size)
      weights.extend( weight)
  return np.array(weights)

def returnClusterID(client_id, cluster_obj):
  for attr, value in cluster_obj.items():
    if client_id in cluster_obj[attr]["cluster"]:
      return attr
  return -1

def returnClientWeight(client_id, cluster_obj):
  cluster_id = returnClusterID(client_id, cluster_obj)
  return cluster_obj[cluster_id]["cluster_size"]

In [None]:
def knowledge_distillation_loss(y_true, y_pred):
  y_true = tf.convert_to_tensor(y_true, dtype=tf.float32)

  # Ensure that y_pred has the same shape as soft targets
  y_pred = tf.convert_to_tensor(y_pred, dtype=tf.float32)

  loss_ce = losses.categorical_crossentropy(y_true, y_pred, from_logits=False)
  return loss_ce

def returnInitialGlobalModel():
  model = keras.Sequential(
    [
        keras.Input(shape=input_shape),
        layers.Conv2D(32, kernel_size=(3, 3), activation="relu"),
        layers.MaxPooling2D(pool_size=(2, 2)),
        layers.Conv2D(64, kernel_size=(3, 3), activation="relu"),
        layers.MaxPooling2D(pool_size=(2, 2)),
        layers.Flatten(),
        layers.Dropout(0.5),
        layers.Dense(num_classes, activation="softmax"),
    ]
  )
  #model.summary()
  model.compile(loss=knowledge_distillation_loss, optimizer="adam", metrics=["accuracy"])
  return model

#returns a copy of the global model to client
def returnCopyGlobalModelToClient(globalmodel):
  clientmodel = returnInitialGlobalModel()
  clientmodel.set_weights(globalmodel.get_weights())
  return clientmodel

#Given the weights after training and initial weights, returns the gradient from entire training
def computeClientGradient(modelNotTrained, modelTrained):
  gradient = []
  notTrainedWeight = modelNotTrained.get_weights()
  i = 0
  for weight in modelTrained.get_weights():
    gradient.append( notTrainedWeight[i] - weight )
    i += 1
  return gradient

#add the client gradient to the global model
def addGradientNoCompression(modelNotTrained, gradient, clientweight):
  newWeight = []
  i = 0
  notTrainedWeight = modelNotTrained.get_weights()
  for weight in modelNotTrained.get_weights():
    newWeight.append( weight - (gradient[i] * clientweight) )
    i += 1
  modelNotTrained.set_weights(newWeight)

#return random clients
def returnRandomParticipatingClients(num_clients, num_participating_clients):
  return random.sample(range(0, num_clients), num_participating_clients)

#function that returns the accuracy score of the model on  the data
def evaluateGlobalModel(glomodel, x, y):
  return glomodel.evaluate(x, y, verbose=0)

#function that returns the lambda_value according to the client accuracy score on local data
def returnLambdaValueKD():
  return 0.7

In [None]:
def trainLeastTrainedClientsWithKD(num_participating_clients, clients_datasets_object, cluster_obj, representation_threshold, target_accuracy, xtrain, ytrain, xtest, ytest, classifierModel, training_rounds, num_epochs, validation_split, batch_size):

  print("Dataset ", clients_datasets_obj_filename)
  initial_evalution = evaluateGlobalModel(classifierModel, xtest, ytest)
  global_accuracy = [initial_evalution[1]]
  global_loss = [initial_evalution[0]]
  print("Pretraining overall accuracy score = ", initial_evalution[1])
  print()
  num_clients = len(clients_datasets_object)

  num_clients_training_each_round = []

  training_history_obj = {}
  for client_ID in range(num_clients):
    training_history_obj[client_ID] = {"training_num": 0}

  converging_round = 0
  i = 0
  while i < training_rounds:
    #select random clients
    participating_clients_IDs = returnRandomParticipatingClients(num_clients, num_participating_clients)


    #####Displaying information at every round

    #returns list of clients to train at each round, per cluster, and least trained whenever possible
    selected_clients_IDs = returnLeastTrainedClientsFromClusters(participating_clients_IDs, cluster_obj, training_history_obj)

    num_selected_clients = len(selected_clients_IDs)

    sum_clients_weights = 0
    for client_id in selected_clients_IDs:
      client_weights = returnClientWeight(client_id, cluster_obj)
      sum_clients_weights = sum_clients_weights + client_weights

    current_round_clients_num_training = {}
    for client_id in selected_clients_IDs:
      current_round_clients_num_training[client_id] = training_history_obj[client_id]["training_num"]

    ##################
    #At client
    ##################

    selected_clients_grad_list = []

    # global model trained only if the clients selected represents at least a minimum value of the global distribution
    if sum_clients_weights >= representation_threshold:
      i = i + 1

      print("selected clients ", selected_clients_IDs)
      print("training num", current_round_clients_num_training)

      #saving the counts of clients training at each round
      num_clients_training_each_round.append(num_selected_clients)


      for client_id in selected_clients_IDs:

          #get client dataset
          client_x, client_y = returnClientDataset(client_id, clients_datasets_object, xtrain, ytrain)

          #####KD
          ###BEGINING KD

          # Build the teacher and student models
          teacher_model = returnCopyGlobalModelToClient(classifierModel)
          student_model = returnCopyGlobalModelToClient(classifierModel)

          # Use teacher model to generate "soft targets" for the student
          soft_target_train = teacher_model.predict(client_x)

          lambda_val = returnLambdaValueKD()  # Adjust the weight lambda for the distillation loss

          def student_knowledge_distillation_loss(y_true, y_pred):
            y_true = tf.convert_to_tensor(y_true, dtype=tf.float32)

            # Ensure that y_pred has the same shape as soft targets
            y_soft = tf.convert_to_tensor(soft_target_train, dtype=tf.float32)
            y_pred = tf.convert_to_tensor(y_pred, dtype=tf.float32)

            loss_ce = losses.categorical_crossentropy(y_true, y_pred, from_logits=False)

            loss_kd = tf.keras.losses.KLD(y_soft, tf.nn.softmax(y_pred / temperature))

            return (1- lambda_val)*loss_ce + lambda_val * loss_kd  # Adjust the weight for the distillation loss as needed


          #set student weights to central server weights and recompile
          student_model.compile(loss=student_knowledge_distillation_loss, optimizer="adam", metrics=["accuracy"])
          student_model.set_weights(teacher_model.get_weights())

          #training model on client data
          student_model.fit(client_x, client_y, epochs=num_epochs,
                              validation_split = validation_split ,verbose=0,  batch_size=batch_size)


          ###END KD
          ######

          #getting the gradient from the client
          client_gradient = computeClientGradient(classifierModel, student_model)

          #storing the gradients to be sent to central server
          selected_clients_grad_list.append( {"client_id": client_id, "clientgradient": client_gradient} )

          current_num_training = training_history_obj[client_id]["training_num"]
          training_history_obj[client_id]["training_num"] = current_num_training + 1

      ##################
      #At central server
      ##################

      #updating the global model weights
      for client_gradient in selected_clients_grad_list:

          client_id = client_gradient["client_id"]
          client_x, client_y = returnClientDataset(client_id, clients_datasets_object, xtrain, ytrain)
          client_weight = client_y.shape[0] / ytrain.shape[0]

          addGradientNoCompression(classifierModel, client_gradient["clientgradient"], client_weight)

      selected_clients_grad_list = []

      #evaluating the global model on data contained in central server
      current_evalution = evaluateGlobalModel(classifierModel, xtest, ytest)
      global_accuracy.append(current_evalution[1])
      global_loss.append(current_evalution[0])

      print("Round :", i, ", selected clients :", len(selected_clients_IDs), "/", len(cluster_obj), " representing ", 100*sum_clients_weights, "% of global distribution" ", overall accuracy score = ", current_evalution[1])
      print("lambda_val = ", lambda_val)
      print()

      if i % 5 == 0 :
        print("accuracy = ", global_accuracy)
        print("Average number of clients training the model per round : ", num_clients_training_each_round)
        print()

      if current_evalution[1] >= target_accuracy:
        print("")
        print("Global model converged")
        print("Convergence accuracy score = ", current_evalution[1])
        converging_round = i
        break

  #plot the results
  print("Number of clients :", num_clients)
  print("Number of clients participating per round :", num_participating_clients)
  print("Number of clients training the model per round : ", num_clients_training_each_round)
  print("Average number of clients training the model per round : ", np.array(num_clients_training_each_round).mean())
  print("Number of iterations per client per round : ", num_epochs)
  print("Number of rounds until convergence : ", converging_round)
  print("training_history_obj", training_history_obj)

  #accuracy for each client dataset
  clients_final_accuracy_list = []
  for i in range(0, num_clients):
    #get client dataset
    client_x, client_y = returnClientDataset(i, clients_datasets_object, xtrain, ytrain)
    client_score = evaluateGlobalModel(classifierModel, client_x, client_y)
    clients_final_accuracy_list.append( client_score[1])
  print("clients_final_accuracy_list", clients_final_accuracy_list)

  print (global_accuracy)
  print (global_loss)
  plt.plot(global_accuracy)
  plt.ylabel('Accuracy per round')
  plt.show()

  plt.plot(global_loss)
  plt.ylabel('Loss per round')
  plt.show()

In [None]:
def returnLeastTrainedClientsFromClusters(participating_clients_IDs, cluster_obj, clients_training_history_obj):
  selected_clients_IDs = []
  selected_cluster_IDs = []

  for client_id in participating_clients_IDs:
    cluster_ID = returnClusterID(client_id, cluster_obj)

    if cluster_ID not in selected_cluster_IDs:
      cluster_clients_list = returnClientsListsFromCluster(cluster_ID, cluster_obj )
      candidates_clients_least  = [clientID for clientID in participating_clients_IDs if clientID in cluster_clients_list]
      least_trained_client = client_id
      client_trained_num = clients_training_history_obj[client_id]["training_num"]
      for clientID in candidates_clients_least:
        if clients_training_history_obj[clientID]["training_num"] < client_trained_num:
          least_trained_client = clientID
          client_trained_num = clients_training_history_obj[clientID]["training_num"]

      selected_clients_IDs.append(least_trained_client)
      selected_cluster_IDs.append(cluster_ID)

  return selected_clients_IDs

#return selected clients according to clusters
def returnSelectedClientsFromClusters(participating_clients_IDs, cluster_obj):
  selected_clients_IDs = []
  selected_cluster_IDs = []
  for client_id in participating_clients_IDs:
    cluster_ID = returnClusterID(client_id, cluster_obj)
    if cluster_ID not in selected_cluster_IDs:
      selected_clients_IDs.append(client_id)
      selected_cluster_IDs.append(cluster_ID)
  return selected_clients_IDs

def returnClientsListsFromCluster(client_id, cluster_obj ):
  for attr, value in cluster_obj.items():
    if client_id == attr:
      return cluster_obj[attr]["cluster"]
  return []

## **Data pre-processing + client clustering**

In [None]:
import pickle

number_clients = 100

(x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data()

clients_datasets_obj_filename = "MNIST_100_clients_90percent_main_class.pickle"

clients_datasets_obj = pickle.load( open(clients_datasets_obj_filename, "rb" ) )

In [None]:
# Model / data parameters
num_classes = 10
input_shape = (28, 28, 1)

# Load the data and split it between train and test sets
(x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data()

# Scale images to the [0, 1] range
x_train = x_train.astype("float32") / 255
x_test = x_test.astype("float32") / 255
# Make sure images have shape (28, 28, 1)
x_train = np.expand_dims(x_train, -1)
x_test = np.expand_dims(x_test, -1)
print("x_train shape:", x_train.shape)
print(x_train.shape[0], "train samples")
print(x_test.shape[0], "test samples")

# convert class vectors to binary class matrices
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)

In [None]:
num_iterations_per_round = 1
validation_size = 0.1

central_server_model = returnInitialGlobalModel()
clients_gradients = {}

for i in range(len(clients_datasets_obj)):

  #get client dataset
  client_x, client_y = returnClientDataset(i, clients_datasets_obj, x_train, y_train)

  #sending copy of global model to client
  client_model = returnCopyGlobalModelToClient(central_server_model)

  #training model on client data
  client_model.fit(client_x, client_y, epochs = num_iterations_per_round,
          validation_split = validation_size, verbose=0)

  #getting the gradient from the client
  client_gradient = computeClientGradient(central_server_model, client_model)

  clients_gradients[i] = {"client_id": i , "client_gradient": client_gradient, "client_weights": client_model.get_weights()}

### **Getting the cosine similarity matrix between clients**

In [None]:
from sklearn.metrics.pairwise import cosine_similarity

similarity_matrix = []
for i in range(len(clients_gradients)):
  similarity_matrix.append([0] * len(clients_gradients))

for i in range(len(clients_gradients)):
  weights_i = returnClientGradientAsVector(clients_gradients[i]["client_gradient"])
  similarity_matrix[i][i] = 1
  j = i + 1
  while j < len(clients_gradients):
    weights_j = returnClientGradientAsVector(clients_gradients[j]["client_gradient"])
    cosinesimilarityij = cosine_similarity([weights_i], [weights_j])
    similarity_matrix[i][j] = cosinesimilarityij[0][0]
    similarity_matrix[j][i] = cosinesimilarityij[0][0]
    j = j + 1

In [None]:
clients_clustered = {}

similarity_threshold = 0.8

all_clients_dataset_size = 0

for i in range(len(clients_gradients)):
  all_clients_dataset_size = all_clients_dataset_size + len(clients_datasets_obj[i]["indexes"])
  if returnClusterID(i, clients_clustered) < 0:
    clients_data_size = len(clients_datasets_obj[i]["indexes"])
    clients_clustered[i] = {"main_class": clients_datasets_obj[i]["main_class"], "cluster": [i], "cluster_size": clients_data_size }
  j = i + 1
  cluster_id = returnClusterID(i, clients_clustered)
  while j < len(clients_gradients):
    if similarity_matrix[i][j] > similarity_threshold:
      if returnClusterID(j, clients_clustered) < 0:
        prev_cluster = clients_clustered[cluster_id]["cluster"]
        prev_cluster.append(j)
        clients_clustered[cluster_id]["cluster"] = prev_cluster
        prev_cluster_size = clients_clustered[cluster_id]["cluster_size"]
        clients_clustered[cluster_id]["cluster_size"] = prev_cluster_size + len(clients_datasets_obj[j]["indexes"])
    j = j + 1

for attr, value in clients_clustered.items():
  cluster_size = clients_clustered[attr]["cluster_size"]
  clients_clustered[attr]["cluster_size"] = cluster_size / all_clients_dataset_size


#resetting unused parameters
clients_gradients = {}
similarity_matrix = []

print("Total samples ", all_clients_dataset_size)
print("Clusters of clients")
print("num of clusters", len(clients_clustered))
#clients_clustered

### **Settings experiments parameters**

In [None]:
# Define temperature parameter for knowledge distillation
temperature = 5

participating_clients = 25
selected_clients = len(clients_clustered)
targeted_accuracy = 0.95
max_training_rounds = 100
epochs_per_client = 2
validation_size = 0.1
batch_size = 1

distribution_representation_min = 0.89 #threshold for the selected clients to represent the global distribution in Client Selection Algorithm

In [None]:
epochs_per_client = 2
distribution_representation_min = 0.91
participating_clients = 25
lambda_val = 0.3

starting_central_server_model = returnCopyGlobalModelToClient(central_server_model)

trainLeastTrainedClientsWithKD(participating_clients, clients_datasets_obj, clients_clustered, distribution_representation_min, targeted_accuracy, x_train, y_train, x_test, y_test, starting_central_server_model, max_training_rounds, epochs_per_client, validation_size, batch_size)