# centralized IDS

In [None]:
import pandas as pd # used for handling the dataset
import numpy as np # used for handling numbers
from collections import Counter
import random
import math
import warnings
warnings.filterwarnings('ignore')

from sklearn.metrics import accuracy_score, roc_auc_score, confusion_matrix, precision_score, recall_score, f1_score
from sklearn.preprocessing import OrdinalEncoder, OneHotEncoder, MinMaxScaler, StandardScaler
from sklearn.model_selection import train_test_split

# necessary package for DL
import time
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Activation, Dense
from tensorflow.keras.metrics import categorical_crossentropy
from tensorflow.keras.layers import BatchNormalization
from tensorflow.keras.optimizers import Adam
from tensorflow.keras import layers

# added import
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, classification_report, confusion_matrix
import time
import sys
from typing import Dict, Optional, Tuple

from sklearn import svm
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import SGDClassifier
from sklearn.metrics import roc_curve, auc

warnings.filterwarnings('ignore')

In [None]:
# https://colab.research.google.com/drive/1hvVW5GMSc5qRmtCKZAImW3gZ2Qa-vx9V#scrollTo=jk1CNtc9aG55
from google.colab import drive
drive.mount('/content/drive')

In [None]:
df = pd.read_csv("/content/drive/MyDrive/KDDTrain+.txt")
test_df = pd.read_csv("/content/drive/MyDrive/KDDTest+.txt")

In [None]:
columns = (['duration'
,'protocol_type'
,'service'
,'flag'
,'src_bytes'
,'dst_bytes'
,'land'
,'wrong_fragment'
,'urgent'
,'hot'
,'num_failed_logins'
,'logged_in'
,'num_compromised'
,'root_shell'
,'su_attempted'
,'num_root'
,'num_file_creations'
,'num_shells'
,'num_access_files'
,'num_outbound_cmds'
,'is_host_login'
,'is_guest_login'
,'count'
,'srv_count'
,'serror_rate'
,'srv_serror_rate'
,'rerror_rate'
,'srv_rerror_rate'
,'same_srv_rate'
,'diff_srv_rate'
,'srv_diff_host_rate'
,'dst_host_count'
,'dst_host_srv_count'
,'dst_host_same_srv_rate'
,'dst_host_diff_srv_rate'
,'dst_host_same_src_port_rate'
,'dst_host_srv_diff_host_rate'
,'dst_host_serror_rate'
,'dst_host_srv_serror_rate'
,'dst_host_rerror_rate'
,'dst_host_srv_rerror_rate'
,'attack'
,'level'])

df.columns = columns
test_df.columns = columns

dataset = pd.concat([df, test_df])

#keep attack_cat for further use
df_attack_cat = dataset['attack']

# map normal to 0, all attacks to 1
is_attack = dataset.attack.map(lambda a: 0 if a == 'normal' else 1)

#data_with_attack = df.join(is_attack, rsuffix='_flag')
dataset['attack_flag'] = is_attack

# delete attribut used to label the dataset
dataset = dataset.drop('level', axis=1)
dataset = dataset.drop('attack', axis=1)

# get object attributs
df_object = dataset[['protocol_type', 'service', 'flag']]

# get label data
df_y = dataset['attack_flag']

# get numerical attributs
df_numerical = dataset.drop('protocol_type', axis=1)
df_numerical = df_numerical.drop('service', axis=1)
df_numerical = df_numerical.drop('flag', axis=1)
df_numerical = df_numerical.drop('attack_flag', axis=1)

# create a OneHotEncoder object
std = StandardScaler()

# fit and transform the single column
encoded_column = std.fit_transform(df_numerical)

# create a new dataframe with the encoded column
df_num = pd.DataFrame(encoded_column, columns=df_numerical.columns)

# create a OneHotEncoder object
ohe = OneHotEncoder()

# fit and transform the single column
encoded_column = ohe.fit_transform(df_object).toarray()

# create a new dataframe with the encoded column
df_obj = pd.DataFrame(encoded_column, columns=ohe.get_feature_names_out())

df_preprocessed = pd.concat([df_obj, df_num], axis=1)

In [None]:
df_train_X, df_test_X, df_train_y, df_test_y = train_test_split(df_preprocessed, df_y, test_size=0.2)

# taille du model
taille_variable1 = sys.getsizeof(df_train_X)
taille_variable2 = sys.getsizeof(df_train_y)
taille_total = taille_variable1 + taille_variable2
print(f"La taille du model est : {taille_total} octets")

In [None]:
input = 122
model = Sequential()
model.add(Dense(80, input_dim=input, activation='relu', kernel_initializer='he_normal'))
model.add(Dense(40, activation='relu'))
model.add(Dense(30, activation='relu'))
model.add(Dense(20, activation='relu'))
model.add(Dense(10, activation='relu'))
model.add(Dense(1, activation='relu'))
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=["accuracy",
                                                                    tf.keras.metrics.AUC(from_logits=True),
                                                                    tf.keras.metrics.Precision(),
                                                                    tf.keras.metrics.Recall(), tf.keras.metrics.TruePositives(),
                                                                    tf.keras.metrics.TrueNegatives(), tf.keras.metrics.FalsePositives(),
                                                                    tf.keras.metrics.FalseNegatives()])
start_time = time.time()
model.fit(df_train_X, df_train_y, epochs=50) #15
end_time = time.time()
execution_time = end_time - start_time
# temps d'execution
print(f"Execution time: {execution_time:.4f} seconds")
# taille du model
taille_variable = sys.getsizeof(model)
print(f"La taille de la variable est : {taille_variable} octets")
loss, acc, auc, precision, recall, tp, tn, fp, fn= model.evaluate(df_test_X, df_test_y, verbose=2)

# Making predictions on the test set to obtain probabilities
predictions_proba = model.predict(df_test_X).ravel()
# Deriving classes based on a threshold (e.g., 0.5)
predictions = np.where(predictions_proba >= 0.5, 1, 0)
# Calculating F1 score
f1 = f1_score(df_test_y, predictions)
# print f1 score
print(f"F1 Score: {f1:.4f}")

# FL IDS

## Setup FL

In [None]:
pip install shap

In [None]:
pip install flwr["simulation"]

In [None]:
import flwr as fl

In [None]:
def print_results (list_acc, list_f1_score, list_auc, list_precision, list_recall, list_tp, list_tn, list_fp, list_fn):
  print("==============list_acc=================")
  for i in list_acc :
    print(i)

  print("==============list_f1_score=================")
  for i in list_f1_score :
    print(i)

  print("==============list_auc==============")
  for i in list_auc:
    print(i)

  print("==============list_precision==============")
  for i in list_precision:
    print(i)

  print("==============list_recall==============")
  for i in list_recall:
    print(i)

  print("==============list_tp==============")
  for i in list_tp:
    print(i)

  print("==============list_tn==============")
  for i in list_tn:
    print(i)

  print("==============list_fp==============")
  for i in list_fp:
    print(i)

  print("==============list_fn==============")
  for i in list_fn:
    print(i)

In [None]:
class FlowerClient(fl.client.NumPyClient):
    def __init__(self, model, x_train, y_train, x_val, y_val) -> None:
        warnings.filterwarnings('ignore')
        self.model = model
        self.x_train, self.y_train = x_train, y_train
        self.x_val, self.y_val = x_val, y_val

    def get_parameters(self):
        warnings.filterwarnings('ignore')
        return self.model.get_weights()

    def fit(self, parameters, config):
        warnings.filterwarnings('ignore')
        self.model.set_weights(parameters)
        self.model.fit(self.x_train, self.y_train, epochs=1, verbose=0)

        taille_variable1 = sys.getsizeof(self.x_train)
        taille_variable2 = sys.getsizeof(self.y_train)
        taille_totale = taille_variable1 + taille_variable2
        print(f"La taille du training data est : {taille_totale} octets")

        taille_variable3 = sys.getsizeof(self.model.get_weights())
        print(f"La taille des weights du model est : {taille_variable3} octets")

        return self.model.get_weights(), len(self.x_train), {}

    def evaluate(self, parameters, config):
        warnings.filterwarnings('ignore')
        self.model.set_weights(parameters)
        loss, acc, auc, precision, recall, tp, tn, fp, fn= self.model.evaluate(self.x_val, self.y_val, verbose=2)
        return loss, len(self.x_val), {"accuracy": acc}

In [None]:
def func_client_fn(model_fl, x_train_all, y_train_all):

  x_train_all = x_train_all
  y_train_all = y_train_all

  def client_fn(cid: str) -> fl.client.Client:

      print('########################################## cid= ',cid,' ##########################################')
      # Load data partition (divide dataset into NUM_CLIENTS distinct partitions)
      partition_size = math.floor(len(x_train_all) / NUM_CLIENTS)
      idx_from, idx_to = int(cid) * partition_size, (int(cid) + 1) * partition_size
      x_train_all_part = x_train_all[idx_from:idx_to]
      y_train_all_part = y_train_all[idx_from:idx_to]

      x_train_all_part, X_test_all, y_train_all_part, y_test_all = train_test_split(x_train_all, y_train_all, test_size=0.1)

      # Create and return client
      return FlowerClient(model_fl, x_train_all_part, y_train_all_part, X_test_all, y_test_all)

  return client_fn

In [None]:
def get_eval_fn(model, x_test_arr_val, y_test_arr_val):
    """Return an evaluation function for server-side evaluation."""
    global list_f1_score
    global list_acc
    global list_auc
    global list_precision
    global list_recall
    global list_tp
    global list_tn
    global list_fp
    global list_fn
    global model_xai

    x_test_arr_val = x_test_arr_val
    y_test_arr_val = y_test_arr_val


    # The `evaluate` function will be called after every round
    def evaluate(server_round: int, parameters: fl.common.NDArrays, config: Dict[str, fl.common.Scalar],) -> Optional[Tuple[float, Dict[str, fl.common.Scalar]]]:

        model.set_weights(parameters)  # Update model with the latest parameters
        loss, accuracy, auc, precision, recall, tp, tn, fp, fn = model.evaluate(x_test_arr_val, y_test_arr_val)

        # Making predictions on the test set to obtain probabilities
        predictions_proba = model.predict(x_test_arr_val).ravel()

        # Deriving classes based on a threshold (e.g., 0.5)
        predictions = np.where(predictions_proba >= 0.5, 1, 0)

        # Calculating F1 score
        f1 = f1_score(y_test_arr_val, predictions)
        list_acc.append(accuracy)
        list_f1_score.append(f1)
        list_auc.append(auc)
        list_precision.append(precision)
        list_recall.append(recall)
        list_tp.append(tp)
        list_tn.append(tn)
        list_fp.append(fp)
        list_fn.append(fn)

        if accuracy > 0.9902 :
          model_xai = model
          sys.exit()


        return loss, {"accuracy": accuracy}


    return evaluate

In [None]:
list_f1_score =[]
list_acc =[]
list_auc=[]
list_precision=[]
list_recall=[]
list_tp=[]
list_tn=[]
list_fp=[]
list_fn=[]

def FL_process (x_fraction, NUM_CLIENTS, df_train_X, df_train_y, df_test_X, df_test_y):

    model_cent = Sequential()
    model_cent.add(Dense(80, input_dim=122, activation='relu', kernel_initializer='he_normal'))
    model_cent.add(Dense(40, activation='relu'))
    model_cent.add(Dense(30, activation='relu'))
    model_cent.add(Dense(20, activation='relu'))
    model_cent.add(Dense(10, activation='relu'))
    model_cent.add(Dense(1, activation='relu'))
    model_cent.compile(loss='binary_crossentropy', optimizer='adam', metrics=["accuracy",
                                                                            tf.keras.metrics.AUC(from_logits=True),
                                                                            tf.keras.metrics.Precision(),
                                                                            tf.keras.metrics.Recall(), tf.keras.metrics.TruePositives(),
                                                                            tf.keras.metrics.TrueNegatives(), tf.keras.metrics.FalsePositives(),
                                                                            tf.keras.metrics.FalseNegatives()])

    model_xai = model_cent

    # Create FedAvg strategy
    strategy=fl.server.strategy.FedAvg(
            fraction_fit=x_fraction,
            min_fit_clients=2,
            min_available_clients=int(NUM_CLIENTS * 0.75),  # Wait until at least 75 clients are available
            evaluate_fn = get_eval_fn(model_cent, df_test_X, df_test_y),
            initial_parameters=fl.common.ndarrays_to_parameters(model_cent.get_weights()),
    )

    # Start simulation

    fl.simulation.start_simulation(
        client_fn=func_client_fn(model_cent, df_train_X, df_train_y),
        num_clients=NUM_CLIENTS,
        config=fl.server.ServerConfig(num_rounds=50),
        strategy=strategy,
    )

## test number of clients

### number of clients = 2 & fraction fit = 1 & local epochs = 1

In [None]:
NUM_CLIENTS = 2
start_time = time.time()
FL_process(1, NUM_CLIENTS, df_train_X, df_train_y, df_test_X, df_test_y)

In [None]:
end_time = time.time()
execution_time = end_time - start_time
print(f"Execution time: {execution_time:.4f} seconds")
print_results (list_acc, list_f1_score, list_auc, list_precision, list_recall, list_tp, list_tn, list_fp, list_fn)

### number of clients = 4 & fraction fit = 1 & local epochs = 1

In [None]:
NUM_CLIENTS = 4
start_time = time.time()
FL_process(1, NUM_CLIENTS, df_train_X, df_train_y, df_test_X, df_test_y)

In [None]:
# temps d'execution
end_time = time.time()
execution_time = end_time - start_time
print(f"Execution time: {execution_time:.4f} seconds")
print_results (list_acc, list_f1_score, list_auc, list_precision, list_recall, list_tp, list_tn, list_fp, list_fn)

### number of clients = 8 & fraction fit = 1 & local epochs = 1

In [None]:
NUM_CLIENTS = 8
start_time = time.time()
FL_process(1, NUM_CLIENTS, df_train_X, df_train_y, df_test_X, df_test_y)

In [None]:
# temps d'execution
end_time = time.time()
execution_time = end_time - start_time
print(f"Execution time: {execution_time:.4f} seconds")
print_results (list_acc, list_f1_score, list_auc, list_precision, list_recall, list_tp, list_tn, list_fp, list_fn)

### number of clients = 12 & fraction fit = 1 & local epochs = 1

In [None]:
NUM_CLIENTS = 12
start_time = time.time()
FL_process(1, NUM_CLIENTS, df_train_X, df_train_y, df_test_X, df_test_y)

In [None]:
end_time = time.time()
execution_time = end_time - start_time
print(f"Execution time: {execution_time:.4f} seconds")
print_results (list_acc, list_f1_score, list_auc, list_precision, list_recall, list_tp, list_tn, list_fp, list_fn)

## test Fraction fit

### Fraction fit = 0.1 & local epochs = 1 & number of clients = 8

In [None]:
NUM_CLIENTS = 8
start_time = time.time()
FL_process(0.1, NUM_CLIENTS, df_train_X, df_train_y, df_test_X, df_test_y)

In [None]:
# temps d'execution
end_time = time.time()
execution_time = end_time - start_time
print(f"Execution time: {execution_time:.4f} seconds")
print_results (list_acc, list_f1_score, list_auc, list_precision, list_recall, list_tp, list_tn, list_fp, list_fn)

### Fraction fit = 0.5 & local epochs = 1 & number of clients = 8




In [None]:
start_time = time.time()
FL_process(0.5, NUM_CLIENTS, df_train_X, df_train_y, df_test_X, df_test_y)

In [None]:
# temps d'execution
end_time = time.time()
execution_time = end_time - start_time
print(f"Execution time: {execution_time:.4f} seconds")
print_results (list_acc, list_f1_score, list_auc, list_precision, list_recall, list_tp, list_tn, list_fp, list_fn)

### Fraction fit = 1 & local epochs = 1 & number of clients = 8
- aleardy tested

## test local epochs

### test local epochs = 1 & Fraction fit = 1 & number of clients = 8
- aleardy tested

### test with 2 epochs & Fraction fit = 1 & number of clients = 8

In [None]:
class FlowerClient(fl.client.NumPyClient):
    def __init__(self, model, x_train, y_train, x_val, y_val) -> None:
        warnings.filterwarnings('ignore')
        self.model = model
        self.x_train, self.y_train = x_train, y_train
        self.x_val, self.y_val = x_val, y_val

    def get_parameters(self):
        warnings.filterwarnings('ignore')
        return self.model.get_weights()

    def fit(self, parameters, config):
        warnings.filterwarnings('ignore')
        self.model.set_weights(parameters)
        self.model.fit(self.x_train, self.y_train, epochs=2, verbose=0)

        taille_variable1 = sys.getsizeof(self.x_train)
        taille_variable2 = sys.getsizeof(self.y_train)
        taille_totale = taille_variable1 + taille_variable2
        print(f"La taille du training data est : {taille_totale} octets")

        taille_variable3 = sys.getsizeof(self.model.get_weights())
        print(f"La taille des weights du model est : {taille_variable3} octets")

        return self.model.get_weights(), len(self.x_train), {}

    def evaluate(self, parameters, config):
        warnings.filterwarnings('ignore')
        self.model.set_weights(parameters)
        loss, acc, auc, precision, recall, tp, tn, fp, fn= self.model.evaluate(self.x_val, self.y_val, verbose=2)
        return loss, len(self.x_val), {"accuracy": acc}


list_f1_score =[]
list_acc =[]
list_auc=[]
list_precision=[]
list_recall=[]
list_tp=[]
list_tn=[]
list_fp=[]
list_fn=[]

def FL_process (x_fraction, NUM_CLIENTS, df_train_X, df_train_y, df_test_X, df_test_y):

    model_cent = Sequential()
    model_cent.add(Dense(80, input_dim=122, activation='relu', kernel_initializer='he_normal'))
    model_cent.add(Dense(40, activation='relu'))
    model_cent.add(Dense(30, activation='relu'))
    model_cent.add(Dense(20, activation='relu'))
    model_cent.add(Dense(10, activation='relu'))
    model_cent.add(Dense(1, activation='relu'))
    model_cent.compile(loss='binary_crossentropy', optimizer='adam', metrics=["accuracy",
                                                                            tf.keras.metrics.AUC(from_logits=True),
                                                                            tf.keras.metrics.Precision(),
                                                                            tf.keras.metrics.Recall(), tf.keras.metrics.TruePositives(),
                                                                            tf.keras.metrics.TrueNegatives(), tf.keras.metrics.FalsePositives(),
                                                                            tf.keras.metrics.FalseNegatives()])

    model_xai = model_cent

    # Create FedAvg strategy
    strategy=fl.server.strategy.FedAvg(
            fraction_fit=x_fraction,
            min_fit_clients=2,
            min_available_clients=int(NUM_CLIENTS * 0.75),  # Wait until at least 75 clients are available
            evaluate_fn = get_eval_fn(model_cent, df_test_X, df_test_y),
            initial_parameters=fl.common.ndarrays_to_parameters(model_cent.get_weights()),
    )

    # Start simulation

    fl.simulation.start_simulation(
        client_fn=func_client_fn(model_cent, df_train_X, df_train_y),
        num_clients=NUM_CLIENTS,
        config=fl.server.ServerConfig(num_rounds=50),
        strategy=strategy,
    )

In [None]:
NUM_CLIENTS = 8
start_time = time.time()
FL_process(1, NUM_CLIENTS, df_train_X, df_train_y, df_test_X, df_test_y)

In [None]:
end_time = time.time()
execution_time = end_time - start_time
print(f"Execution time: {execution_time:.4f} seconds")
print_results (list_acc, list_f1_score, list_auc, list_precision, list_recall, list_tp, list_tn, list_fp, list_fn)

### local epochs = 5 & Fraction fit = 1 & number of clients = 8

In [None]:
class FlowerClient(fl.client.NumPyClient):
    def __init__(self, model, x_train, y_train, x_val, y_val) -> None:
        warnings.filterwarnings('ignore')
        self.model = model
        self.x_train, self.y_train = x_train, y_train
        self.x_val, self.y_val = x_val, y_val

    def get_parameters(self):
        warnings.filterwarnings('ignore')
        return self.model.get_weights()

    def fit(self, parameters, config):
        warnings.filterwarnings('ignore')
        self.model.set_weights(parameters)
        self.model.fit(self.x_train, self.y_train, epochs=5, verbose=0)

        taille_variable1 = sys.getsizeof(self.x_train)
        taille_variable2 = sys.getsizeof(self.y_train)
        taille_totale = taille_variable1 + taille_variable2
        print(f"La taille du training data est : {taille_totale} octets")

        taille_variable3 = sys.getsizeof(self.model.get_weights())
        print(f"La taille des weights du model est : {taille_variable3} octets")

        return self.model.get_weights(), len(self.x_train), {}

    def evaluate(self, parameters, config):
        warnings.filterwarnings('ignore')
        self.model.set_weights(parameters)
        loss, acc, auc, precision, recall, tp, tn, fp, fn= self.model.evaluate(self.x_val, self.y_val, verbose=2)
        return loss, len(self.x_val), {"accuracy": acc}


list_f1_score =[]
list_acc =[]
list_auc=[]
list_precision=[]
list_recall=[]
list_tp=[]
list_tn=[]
list_fp=[]
list_fn=[]

def FL_process (x_fraction, NUM_CLIENTS, df_train_X, df_train_y, df_test_X, df_test_y):

    model_cent = Sequential()
    model_cent.add(Dense(80, input_dim=122, activation='relu', kernel_initializer='he_normal'))
    model_cent.add(Dense(40, activation='relu'))
    model_cent.add(Dense(30, activation='relu'))
    model_cent.add(Dense(20, activation='relu'))
    model_cent.add(Dense(10, activation='relu'))
    model_cent.add(Dense(1, activation='relu'))
    model_cent.compile(loss='binary_crossentropy', optimizer='adam', metrics=["accuracy",
                                                                            tf.keras.metrics.AUC(from_logits=True),
                                                                            tf.keras.metrics.Precision(),
                                                                            tf.keras.metrics.Recall(), tf.keras.metrics.TruePositives(),
                                                                            tf.keras.metrics.TrueNegatives(), tf.keras.metrics.FalsePositives(),
                                                                            tf.keras.metrics.FalseNegatives()])

    model_xai = model_cent

    # Create FedAvg strategy
    strategy=fl.server.strategy.FedAvg(
            fraction_fit=x_fraction,
            min_fit_clients=2,
            min_available_clients=int(NUM_CLIENTS * 0.75),  # Wait until at least 75 clients are available
            evaluate_fn = get_eval_fn(model_cent, df_test_X, df_test_y),
            initial_parameters=fl.common.ndarrays_to_parameters(model_cent.get_weights()),
    )

    # Start simulation

    fl.simulation.start_simulation(
        client_fn=func_client_fn(model_cent, df_train_X, df_train_y),
        num_clients=NUM_CLIENTS,
        config=fl.server.ServerConfig(num_rounds=50),
        strategy=strategy,
    )

In [None]:
NUM_CLIENTS = 8
start_time = time.time()
FL_process(1, NUM_CLIENTS, df_train_X, df_train_y, df_test_X, df_test_y)

In [None]:
end_time = time.time()
execution_time = end_time - start_time
print(f"Execution time: {execution_time:.4f} seconds")
print_results (list_acc, list_f1_score, list_auc, list_precision, list_recall, list_tp, list_tn, list_fp, list_fn)

### local epochs = 8 & Fraction fit = 1 & number of clients = 8

In [None]:
class FlowerClient(fl.client.NumPyClient):
    def __init__(self, model, x_train, y_train, x_val, y_val) -> None:
        warnings.filterwarnings('ignore')
        self.model = model
        self.x_train, self.y_train = x_train, y_train
        self.x_val, self.y_val = x_val, y_val

    def get_parameters(self):
        warnings.filterwarnings('ignore')
        return self.model.get_weights()

    def fit(self, parameters, config):
        warnings.filterwarnings('ignore')
        self.model.set_weights(parameters)
        self.model.fit(self.x_train, self.y_train, epochs=8, verbose=0)

        taille_variable1 = sys.getsizeof(self.x_train)
        taille_variable2 = sys.getsizeof(self.y_train)
        taille_totale = taille_variable1 + taille_variable2
        print(f"La taille du training data est : {taille_totale} octets")

        taille_variable3 = sys.getsizeof(self.model.get_weights())
        print(f"La taille des weights du model est : {taille_variable3} octets")

        return self.model.get_weights(), len(self.x_train), {}

    def evaluate(self, parameters, config):
        warnings.filterwarnings('ignore')
        self.model.set_weights(parameters)
        loss, acc, auc, precision, recall, tp, tn, fp, fn= self.model.evaluate(self.x_val, self.y_val, verbose=2)
        return loss, len(self.x_val), {"accuracy": acc}


list_f1_score =[]
list_acc =[]
list_auc=[]
list_precision=[]
list_recall=[]
list_tp=[]
list_tn=[]
list_fp=[]
list_fn=[]

def FL_process (x_fraction, NUM_CLIENTS, df_train_X, df_train_y, df_test_X, df_test_y):

    model_cent = Sequential()
    model_cent.add(Dense(80, input_dim=122, activation='relu', kernel_initializer='he_normal'))
    model_cent.add(Dense(40, activation='relu'))
    model_cent.add(Dense(30, activation='relu'))
    model_cent.add(Dense(20, activation='relu'))
    model_cent.add(Dense(10, activation='relu'))
    model_cent.add(Dense(1, activation='relu'))
    model_cent.compile(loss='binary_crossentropy', optimizer='adam', metrics=["accuracy",
                                                                            tf.keras.metrics.AUC(from_logits=True),
                                                                            tf.keras.metrics.Precision(),
                                                                            tf.keras.metrics.Recall(), tf.keras.metrics.TruePositives(),
                                                                            tf.keras.metrics.TrueNegatives(), tf.keras.metrics.FalsePositives(),
                                                                            tf.keras.metrics.FalseNegatives()])

    model_xai = model_cent

    # Create FedAvg strategy
    strategy=fl.server.strategy.FedAvg(
            fraction_fit=x_fraction,
            min_fit_clients=2,
            min_available_clients=int(NUM_CLIENTS * 0.75),  # Wait until at least 75 clients are available
            evaluate_fn = get_eval_fn(model_cent, df_test_X, df_test_y),
            initial_parameters=fl.common.ndarrays_to_parameters(model_cent.get_weights()),
    )

    # Start simulation

    fl.simulation.start_simulation(
        client_fn=func_client_fn(model_cent, df_train_X, df_train_y),
        num_clients=NUM_CLIENTS,
        config=fl.server.ServerConfig(num_rounds=50),
        strategy=strategy,
    )

In [None]:
NUM_CLIENTS = 8
start_time = time.time()
FL_process(1, NUM_CLIENTS, df_train_X, df_train_y, df_test_X, df_test_y)

In [None]:
end_time = time.time()
execution_time = end_time - start_time
print(f"Execution time: {execution_time:.4f} seconds")
print_results (list_acc, list_f1_score, list_auc, list_precision, list_recall, list_tp, list_tn, list_fp, list_fn)

## XAI

### local epochs = 8 & Fraction fit = 1 & number of clients = 8

In [None]:
class FlowerClient(fl.client.NumPyClient):
    def __init__(self, model, x_train, y_train, x_val, y_val) -> None:
        warnings.filterwarnings('ignore')
        self.model = model
        self.x_train, self.y_train = x_train, y_train
        self.x_val, self.y_val = x_val, y_val

    def get_parameters(self):
        warnings.filterwarnings('ignore')
        return self.model.get_weights()

    def fit(self, parameters, config):
        warnings.filterwarnings('ignore')
        self.model.set_weights(parameters)
        self.model.fit(self.x_train, self.y_train, epochs=8, verbose=0)

        taille_variable1 = sys.getsizeof(self.x_train)
        taille_variable2 = sys.getsizeof(self.y_train)
        taille_totale = taille_variable1 + taille_variable2
        print(f"La taille du training data est : {taille_totale} octets")

        taille_variable3 = sys.getsizeof(self.model.get_weights())
        print(f"La taille des weights du model est : {taille_variable3} octets")

        return self.model.get_weights(), len(self.x_train), {}

    def evaluate(self, parameters, config):
        warnings.filterwarnings('ignore')
        self.model.set_weights(parameters)
        loss, acc, auc, precision, recall, tp, tn, fp, fn= self.model.evaluate(self.x_val, self.y_val, verbose=2)
        return loss, len(self.x_val), {"accuracy": acc}


list_f1_score =[]
list_acc =[]
list_auc=[]
list_precision=[]
list_recall=[]
list_tp=[]
list_tn=[]
list_fp=[]
list_fn=[]

def FL_process (df_train_X, df_train_y, df_test_X, df_test_y):

    model_cent = Sequential()
    model_cent.add(Dense(80, input_dim=122, activation='relu', kernel_initializer='he_normal'))
    model_cent.add(Dense(40, activation='relu'))
    model_cent.add(Dense(30, activation='relu'))
    model_cent.add(Dense(20, activation='relu'))
    model_cent.add(Dense(10, activation='relu'))
    model_cent.add(Dense(1, activation='relu'))
    model_cent.compile(loss='binary_crossentropy', optimizer='adam', metrics=["accuracy",
                                                                            tf.keras.metrics.AUC(from_logits=True),
                                                                            tf.keras.metrics.Precision(),
                                                                            tf.keras.metrics.Recall(), tf.keras.metrics.TruePositives(),
                                                                            tf.keras.metrics.TrueNegatives(), tf.keras.metrics.FalsePositives(),
                                                                            tf.keras.metrics.FalseNegatives()])

    model_xai = model_cent

    # Create FedAvg strategy
    strategy=fl.server.strategy.FedAvg(
            fraction_fit=1,
            min_fit_clients=2,
            min_available_clients=int(NUM_CLIENTS * 0.75),  # Wait until at least 75 clients are available
            evaluate_fn = get_eval_fn(model_cent, df_test_X, df_test_y),
            initial_parameters=fl.common.ndarrays_to_parameters(model_cent.get_weights()),
    )

    # Start simulation

    fl.simulation.start_simulation(
        client_fn=func_client_fn(model_cent, df_train_X, df_train_y),
        num_clients=8,
        config=fl.server.ServerConfig(num_rounds=50),
        strategy=strategy,
    )

In [None]:
start_time = time.time()
FL_process(df_train_X, df_train_y, df_test_X, df_test_y)

In [None]:
# temps d'execution
end_time = time.time()
execution_time = end_time - start_time
print(f"Execution time: {execution_time:.4f} seconds")
print_results (list_acc, list_f1_score, list_auc, list_precision, list_recall, list_tp, list_tn, list_fp, list_fn)

In [None]:
import shap

explainer   = shap.Explainer(model_xai, df_train_X.values)
shap_values = explainer(df_test_X)

shap.plots.beeswarm(shap_values)
shap.plots.bar(shap_values)