# Federated Learning with Tensor Flow

# Import Libraries 

In [74]:
#Importing Libraries
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from keras.models import Sequential
from keras.layers import Dense, LSTM, GRU
from keras.optimizers import Adam
import sklearn
from sklearn.model_selection import train_test_split
from sklearn import metrics
from sklearn.metrics import classification_report, accuracy_score, confusion_matrix

# Import and split dataset

In [75]:
num_rounds = 20
# Load the dataset from the Excel file
data = pd.read_excel('../KDD_DDoS.xlsx')

# Extract input features and output labels
inputs = data.iloc[:, :41].values
labels = data.iloc[:, 41].values

# Split the dataset into training and testing sets
x_train, x_test, y_train, y_test = train_test_split(inputs, labels, test_size=0.2)

# Function to Create Models

In [76]:
#Function creates only RNN Models
def create_model(model_type, num_input, num_layers, num_nodes, num_output):
    '''
    Function creates models based on parameters provided, then compiles model
    the retured model is a compiled model
    '''
    num_output = int(num_output)
    if model_type == 'RNN':
        model = keras.Sequential()
        model.add(layers.SimpleRNN(num_nodes[0], activation='relu', return_sequences=True, input_shape=(num_input, 1)))
        for i in range(1, num_layers - 1):
            model.add(layers.SimpleRNN(num_nodes[i], activation='relu', return_sequences=True))
        model.add(layers.SimpleRNN(num_nodes[-1], activation='relu'))
        model.add(layers.Dense(num_output, activation='sigmoid'))
        model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
        return model

    elif model_type == 'RNN_LSTM':
        model = keras.Sequential()
        model.add(layers.LSTM(num_nodes[0], activation='relu', return_sequences=True, input_shape=(num_input, 1)))
        for i in range(1, num_layers - 1):
            model.add(layers.LSTM(num_nodes[i], activation='relu', return_sequences=True))
        model.add(layers.LSTM(num_nodes[-1], activation='relu'))
        model.add(layers.Dense(num_output, activation='sigmoid'))
        model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
        return model

    elif model_type == 'RNN_GRU':
        model = Sequential()
        model.add(GRU(units=num_nodes[0], activation='relu', return_sequences=True, input_shape=(num_input, 1)))
        for i in range(1, num_layers - 1):
            model.add(GRU(units=num_nodes[i], activation='relu', return_sequences=True))
        model.add(GRU(units=num_nodes[-1], activation='relu'))
        model.add(Dense(units=num_output, activation='sigmoid'))
        model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
        return model

    else:
        print("Invalid model selected / Invalid parameters, try again")


# Perform Federated Learning

In [77]:
num_clients = 20  # Number of clients/participants

# Split the training data into smaller client datasets
client_data = np.array_split(x_train, num_clients)
client_labels = np.array_split(y_train, num_clients)

# Reshape each array within client_data
x_train = x_train.reshape(x_train.shape[0], 2, 1)
x_test = x_test.reshape(x_test.shape[0], 2, 1)
#client_data = [arr.reshape((arr.shape[0], 1, arr.shape[1])) for arr in client_data]

def fed_learning(client_data, client_labels, server_model):    
    local_model = tf.keras.models.clone_model(server_model)
    local_model.set_weights(server_model.get_weights())
    local_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    
    # Reshape each array within client_data
    client_data = [arr[:, :, :2] for arr in client_data]

    # Perform local training on each client's data
    for i in range(len(client_data)):
        local_data = client_data[i]
        local_labels = client_labels[i]
        
        # Train the local model on the client's data
        local_model.fit(local_data, local_labels, epochs=10, batch_size=32, verbose=0)
    
    return local_model
    

#round iteration
def round_iteration(x_test, y_test, num_rounds, num_clients, server_model):
    # Round iteration
    for round_ in range(num_rounds):       
        # Iterate over each client and perform local training
        for client in range(num_clients):
            local_model = fed_learning(client_data[client], client_labels[client], server_model)
            
            # Update the global model with the client's weights
            global_weights = server_model.get_weights()
            local_weights = local_model.get_weights()
            averaged_weights = [(global_weights[i] + local_weights[i]) / 2 for i in range(len(global_weights))]
            server_model.set_weights(averaged_weights)
            
            #reshape x_test
            x_test2=np.array(x_test)
            x_test2=x_test2.reshape(x_test2.shape[0],1,x_test2.shape[1])

            loss, accuracy = server_model.evaluate(x_test2, y_test)
            print("\n*********** Round-",round_,":Client",client," ***********\n")
            print(f'Test loss: {loss:.4f}')
            print(f'Test accuracy: {accuracy:.4f}')

    # Evaluate the global model on the testing data
    print("\n=========== Round-",round_,":Client",client," ===========\n")
    loss, accuracy = model.evaluate(x_test, x_test)
    print(f'Test loss: {loss:.4f}')
    print(f'Test accuracy: {accuracy:.4f}')
    return x_test2

ValueError: cannot reshape array of size 956940 into shape (23340,2,1)

# Function to Calculate metrics and prediction

In [None]:
def get_metrics(x_test2, y_test):
    y_pred = server_model.predict(x_test2)
    ypred = (y_pred > 0.5).astype(int)
    
    #calculate metrics
    cm = confusion_matrix(y_test, y_pred)
    accuracy = accuracy_score(y_test, y_pred)
    
    tn, fp, fn, tp = cm.ravel()
    
    tpr = tp / (tp + fn)
    
    if (tn + fp) != 0:
        fpr = fp / (tn + fp)
    else:
        fpr = 0.0
    print(f'{accuracy}')
    print(f'{tpr}')
    print(f'{fpr}')
    return accuracy, tpr, fpr

# RNN

In [None]:
#Specify Model parameters
model_type = 'RNN'
num_input = 2
num_layers = 5
num_nodes = [64] * num_layers
num_output = 1

#Create and Compile Model
server_model= create_model(x_test, y_test,model_type, num_input, num_layers, num_nodes, num_output)

#implement fed_learning
fed_learning(client_data, client_labels, server_model)

#round iteration
round_iteration(num_rounds, num_clients, server_model)

#metrics and predictions
get_metrics(x_test2, y_test)