In [1]:
'''
Federated learning algorithm

'''

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import SGD

# Load segregated device data paths
device_paths = [f"D:/jenny/Documents/FAUS_Study/Thesis/My_dataset/Train_data/Nor80_back20/Device_data/device_{i}_data.csv" for i in range(1, 22)]  # Example paths for 41 devices

# Helper function to load and preprocess data
def load_and_preprocess_data(file_path):
    data = pd.read_csv(file_path)
    
    # Define required features
    features = [
        "dur", "sbytes", "dbytes", "Sload", "Dload", "Spkts", "Dpkts", 
        "smeansz", "dmeansz", "sloss", "dloss", "Sintpkt", "Dintpkt", 
        "swin", "dwin", "tcprtt", "synack", "ackdat", 
        "ct_srv_src", "ct_srv_dst", "ct_dst_ltm", "ct_src_ ltm","Label"
    ]
    data.columns = data.columns.str.strip()  # Remove spaces from column names
    # Ensure the dataset contains all required features
    missing_features = [f for f in features if f not in data.columns]
    if missing_features:
        print(f"Warning: Missing features {missing_features} in {file_path}. Skipping this device.")
        return None, None, None, None
    
    X = data[features]
    y = data.get("Label", None)  # Ensure "Label" column exists
    if y is None:
        print(f"Warning: 'Label' column is missing in {file_path}. Skipping this device.")
        return None, None, None, None
    
    # Handle small datasets by enforcing a minimum number of samples
    if len(X) < 2:  # Arbitrary threshold for minimum samples
        print(f"Warning: Insufficient data in {file_path}. Skipping this device.")
        return None, None, None, None
    
    # Scale features
    scaler = MinMaxScaler()
    X_scaled = scaler.fit_transform(X)
    
    # Split into training and testing datasets
    try:
        X_train, X_test, y_train, y_test = train_test_split(
            X_scaled, y, test_size=0.2, random_state=42, stratify=y
        )
    except ValueError as e:
        print(f"Error during train-test split for {file_path}: {e}")
        return None, None, None, None
    
    return X_train, X_test, y_train, y_test

# Define the neural network model
def create_model(input_dim):
    model = Sequential([
        Dense(64, activation='relu', input_dim=input_dim),
        Dense(32, activation='relu'),
        Dense(1, activation='sigmoid')  # Binary classification
    ])
    model.compile(optimizer=SGD(learning_rate=0.01), loss='binary_crossentropy', metrics=['accuracy'])
    return model

# Federated Learning Simulation
global_weights = None
n_rounds = 1  # One round for this step
device_models = []
aggregated_weights = None

for round_num in range(n_rounds):
    print(f"--- Round {round_num + 1} ---")
    client_updates = []
    
    for device_idx, device_path in enumerate(device_paths):
        print(f"Training on device {device_idx + 1}...")
        
        # Load and preprocess device data
        X_train, X_test, y_train, y_test = load_and_preprocess_data(device_path)
        if X_train is None:  # Skip if data loading failed
            continue
        
        # Create and train the model
        input_dim = X_train.shape[1]
        model = create_model(input_dim)
        
        if global_weights is not None:
            model.set_weights(global_weights)  # Load global weights
        
        model.fit(X_train, y_train, epochs=5, batch_size=32, verbose=0)
        client_updates.append(model.get_weights())
        device_models.append(model)
    
    # Skip aggregation if no clients contributed
    if not client_updates:
        print("No clients contributed in this round. Exiting.")
        break
    
    # Federated Averaging: Aggregate weights
    print("Aggregating updates...")
    aggregated_weights = [np.mean([client_weights[layer] for client_weights in client_updates], axis=0)
                          for layer in range(len(client_updates[0]))]
    global_weights = aggregated_weights  # Set new global weights
    
    # Update device models with the new global weights
    for model in device_models:
        model.set_weights(global_weights)

print("Federated Learning Round Complete!")


--- Round 1 ---
Training on device 1...


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Training on device 2...


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Training on device 3...


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Training on device 4...


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Training on device 5...


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Training on device 6...


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Training on device 7...


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Training on device 8...


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Training on device 9...


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Training on device 10...


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Training on device 11...
Error during train-test split for D:/jenny/Documents/FAUS_Study/Thesis/My_dataset/Train_data/Nor80_back20/Device_data/device_11_data.csv: The least populated class in y has only 1 member, which is too few. The minimum number of groups for any class cannot be less than 2.
Training on device 12...


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Training on device 13...


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Training on device 14...


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Training on device 15...
Error during train-test split for D:/jenny/Documents/FAUS_Study/Thesis/My_dataset/Train_data/Nor80_back20/Device_data/device_15_data.csv: The test_size = 1 should be greater or equal to the number of classes = 2
Training on device 16...


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Training on device 17...


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Training on device 18...


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Training on device 19...
Training on device 20...


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Training on device 21...
Aggregating updates...
Federated Learning Round Complete!


In [3]:
""""************************************************************************************************************
Evalation Script for each device in the network.

***************************************************************************************************************
"""

from sklearn.metrics import accuracy_score, confusion_matrix, classification_report,f1_score,precision_score,recall_score

# Helper function for model evaluation
def evaluate_model(model, X_test, y_test):
    y_pred = model.predict(X_test)
    y_pred = (y_pred > 0.5).astype("int32")  # Threshold for binary classification
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred, average="binary", zero_division=0)
    recall = recall_score(y_test, y_pred, average="binary", zero_division=0)
    f1 = f1_score(y_test, y_pred, average="binary", zero_division=0)
    print(f"Accuracy: {accuracy}")
    print(f"Precision: {precision}")
    print(f"recall: {recall}")
    print(f"F1: {f1}")
    print("Confusion Matrix:")
    print(confusion_matrix(y_test, y_pred))
    print("Classification Report:")
    print(classification_report(y_test, y_pred))

# Ensure that device_paths and device_models are available
device_paths = [f"D:/jenny/Documents/FAUS_Study/Thesis/My_dataset/Test_data/Nor80_back20/Device_data/device_{i}_data.csv" for i in range(1, 42)]  # Example paths for 41 devices

# Check if device models are available from the federated learning process
if 'device_models' not in globals():
    print("Device models not found. Please run the federated learning first.")
else:
    # Evaluate device models
    for device_idx, device_path in enumerate(device_paths):
        print(f"\nEvaluating model for device {device_idx + 1}...")
        
        # Load and preprocess the data for this device (same as federated learning)
        try:
            X_train, X_test, y_train, y_test = load_and_preprocess_data(device_path)
            
            # Check if the test data is valid (not None and not empty)
            if X_test is not None and len(X_test) > 0:
                device_model = device_models[device_idx]
                print(f"Device {device_idx + 1} model performance:")
                evaluate_model(device_model, X_test, y_test)  # Evaluate using test data
            else:
                print(f"Skipping device {device_idx + 1} due to insufficient or invalid data.")
        except Exception as e:
            print(f"Error loading data for device {device_idx + 1}: {str(e)}")

    # Evaluate global model (if applicable)
    if global_weights is not None:
        print("\nEvaluating global model on aggregated test data...")
        
        # Use test data from the first device (or you can aggregate test data from all devices)
        try:
            X_train, X_test, y_train, y_test = load_and_preprocess_data(device_paths[0])  # Use the first device's test data
            
            # Check if the global test data is valid (not None and not empty)
            if X_test is not None and len(X_test) > 0:
                global_model = create_model(X_test.shape[1])
                global_model.set_weights(global_weights)
                print("Global model performance:")
                evaluate_model(global_model, X_test, y_test)  # Evaluate using aggregated global model
            else:
                print("Skipping global model evaluation due to insufficient or invalid test data.")
        except Exception as e:
            print(f"Error loading data for global model evaluation: {str(e)}")

print("\nEvaluation completed!")


Evaluating model for device 1...
Device 1 model performance:
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 61ms/step
Accuracy: 1.0
Precision: 0.0
recall: 0.0
F1: 0.0
Confusion Matrix:
[[7]]
Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00         7

    accuracy                           1.00         7
   macro avg       1.00      1.00      1.00         7
weighted avg       1.00      1.00      1.00         7


Evaluating model for device 2...
Device 2 model performance:
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 67ms/step
Accuracy: 1.0
Precision: 0.0
recall: 0.0
F1: 0.0
Confusion Matrix:
[[10]]
Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        10

    accuracy                           1.00        10
   macro avg       1.00      1.00      1.00        10
weighted avg       1.00      1.00      1.00     



[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 44ms/step
Accuracy: 1.0
Precision: 0.0
recall: 0.0
F1: 0.0
Confusion Matrix:
[[6]]
Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00         6

    accuracy                           1.00         6
   macro avg       1.00      1.00      1.00         6
weighted avg       1.00      1.00      1.00         6


Evaluating model for device 4...
Device 4 model performance:
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 50ms/step
Accuracy: 1.0
Precision: 0.0
recall: 0.0
F1: 0.0
Confusion Matrix:
[[12]]
Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        12

    accuracy                           1.00        12
   macro avg       1.00      1.00      1.00        12
weighted avg       1.00      1.00      1.00        12


Evaluating model for device 5...
Device 5 model perfor



[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 93ms/step
Accuracy: 1.0
Precision: 0.0
recall: 0.0
F1: 0.0
Confusion Matrix:
[[15]]
Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        15

    accuracy                           1.00        15
   macro avg       1.00      1.00      1.00        15
weighted avg       1.00      1.00      1.00        15


Evaluating model for device 6...
Device 6 model performance:
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 39ms/step



[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 84ms/step
Accuracy: 1.0
Precision: 0.0
recall: 0.0
F1: 0.0
Confusion Matrix:
[[13]]
Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        13

    accuracy                           1.00        13
   macro avg       1.00      1.00      1.00        13
weighted avg       1.00      1.00      1.00        13


Evaluating model for device 7...
Device 7 model performance:
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 58ms/step




Accuracy: 1.0
Precision: 0.0
recall: 0.0
F1: 0.0
Confusion Matrix:
[[12]]
Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        12

    accuracy                           1.00        12
   macro avg       1.00      1.00      1.00        12
weighted avg       1.00      1.00      1.00        12


Evaluating model for device 8...
Device 8 model performance:
[1m1/2[0m [32m━━━━━━━━━━[0m[37m━━━━━━━━━━[0m [1m0s[0m 46ms/step



[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 51ms/step
Accuracy: 0.02702702702702703
Precision: 0.0
recall: 0.0
F1: 0.0
Confusion Matrix:
[[ 1  0]
 [36  0]]
Classification Report:
              precision    recall  f1-score   support

           0       0.03      1.00      0.05         1
           1       0.00      0.00      0.00        36

    accuracy                           0.03        37
   macro avg       0.01      0.50      0.03        37
weighted avg       0.00      0.03      0.00        37


Evaluating model for device 9...
Device 9 model performance:
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 88ms/step


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Accuracy: 1.0
Precision: 0.0
recall: 0.0
F1: 0.0
Confusion Matrix:
[[13]]
Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        13

    accuracy                           1.00        13
   macro avg       1.00      1.00      1.00        13
weighted avg       1.00      1.00      1.00        13


Evaluating model for device 10...
Device 10 model performance:
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 77ms/step




Accuracy: 0.25
Precision: 0.0
recall: 0.0
F1: 0.0
Confusion Matrix:
[[1 0]
 [3 0]]
Classification Report:
              precision    recall  f1-score   support

           0       0.25      1.00      0.40         1
           1       0.00      0.00      0.00         3

    accuracy                           0.25         4
   macro avg       0.12      0.50      0.20         4
weighted avg       0.06      0.25      0.10         4


Evaluating model for device 11...
Device 11 model performance:


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 129ms/step
Accuracy: 1.0
Precision: 0.0
recall: 0.0
F1: 0.0
Confusion Matrix:
[[12]]
Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        12

    accuracy                           1.00        12
   macro avg       1.00      1.00      1.00        12
weighted avg       1.00      1.00      1.00        12


Evaluating model for device 12...
Device 12 model performance:
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step
Accuracy: 1.0
Precision: 0.0
recall: 0.0
F1: 0.0
Confusion Matrix:
[[11]]
Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        11

    accuracy                           1.00        11
   macro avg       1.00      1.00      1.00        11
weighted avg       1.00      1.00      1.00        11


Evaluating model for device 13...
Device 13 model 



[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 67ms/step
Accuracy: 0.5
Precision: 0.0
recall: 0.0
F1: 0.0
Confusion Matrix:
[[2 0]
 [2 0]]
Classification Report:
              precision    recall  f1-score   support

           0       0.50      1.00      0.67         2
           1       0.00      0.00      0.00         2

    accuracy                           0.50         4
   macro avg       0.25      0.50      0.33         4
weighted avg       0.25      0.50      0.33         4


Evaluating model for device 14...
Device 14 model performance:
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 56ms/step
Accuracy: 1.0
Precision: 0.0
recall: 0.0
F1: 0.0
Confusion Matrix:
[[13]]
Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        13

    accuracy                           1.00        13
   macro avg       1.00      1.00      1.00        13
weighted avg       1.00      1.00      1.00  

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 62ms/step
Accuracy: 1.0
Precision: 0.0
recall: 0.0
F1: 0.0
Confusion Matrix:
[[8]]
Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00         8

    accuracy                           1.00         8
   macro avg       1.00      1.00      1.00         8
weighted avg       1.00      1.00      1.00         8


Evaluating model for device 16...
Device 16 model performance:
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 76ms/step
Accuracy: 1.0
Precision: 0.0
recall: 0.0
F1: 0.0
Confusion Matrix:
[[12]]
Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        12

    accuracy                           1.00        12
   macro avg       1.00      1.00      1.00        12
weighted avg       1.00      1.00      1.00        12


Evaluating model for device 17...
Device 17 model pe



[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 66ms/step
Accuracy: 1.0
Precision: 0.0
recall: 0.0
F1: 0.0
Confusion Matrix:
[[14]]
Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        14

    accuracy                           1.00        14
   macro avg       1.00      1.00      1.00        14
weighted avg       1.00      1.00      1.00        14


Evaluating model for device 18...
Error loading data for device 18: list index out of range

Evaluating model for device 19...
Error loading data for device 19: list index out of range

Evaluating model for device 20...
Skipping device 20 due to insufficient or invalid data.

Evaluating model for device 21...
Skipping device 21 due to insufficient or invalid data.

Evaluating model for device 22...
Skipping device 22 due to insufficient or invalid data.

Evaluating model for device 23...
Skipping device 23 due to insufficient or invalid data.

Evaluat

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 108ms/step
Accuracy: 1.0
Precision: 0.0
recall: 0.0
F1: 0.0
Confusion Matrix:
[[7]]
Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00         7

    accuracy                           1.00         7
   macro avg       1.00      1.00      1.00         7
weighted avg       1.00      1.00      1.00         7


Evaluation completed!


