In [4]:
import pandas as pd
import os

# Define input and output directories
input_dir = r"D:/jenny/Documents/FAUS_Study/Thesis/My_dataset/Raw_data/"
output_dir = r"D:/jenny/Documents/FAUS_Study/Thesis/My_dataset/Raw_data/"
features_file = r"D:/jenny/Documents/FAUS_Study/Thesis/UNSW-NB15/CSV Files/NUSW-NB15_features.csv"

# Ensure the output directory exists
os.makedirs(output_dir, exist_ok=True)

# Load the features file
features = pd.read_csv(features_file, encoding="latin1")

# Extract the feature names
column_names = features["Name"].tolist()

# Specify the files to process
files_to_process = ["UNSW-NB15_1.csv", "UNSW-NB15_2.csv", "UNSW-NB15_3.csv", "UNSW-NB15_4.csv"]

# Process each specified file
for file_name in files_to_process:
    input_file = os.path.join(input_dir, file_name)
    output_file = os.path.join(output_dir, file_name)
    
    # Check if the input file exists
    if os.path.exists(input_file):
        # Load the data file
        data = pd.read_csv(input_file, header=None)
        
        # Add headers
        data.columns = column_names

        # Save the updated file
        data.to_csv(output_file, index=False)
        print(f"Processed and saved: {output_file}")
    else:
        print(f"File not found: {input_file}")

print("Processing complete for specified files.")


  data = pd.read_csv(input_file, header=None)


Processed and saved: D:/jenny/Documents/FAUS_Study/Thesis/My_dataset/Raw_data/UNSW-NB15_1.csv


  data = pd.read_csv(input_file, header=None)


Processed and saved: D:/jenny/Documents/FAUS_Study/Thesis/My_dataset/Raw_data/UNSW-NB15_2.csv
Processed and saved: D:/jenny/Documents/FAUS_Study/Thesis/My_dataset/Raw_data/UNSW-NB15_3.csv
Processed and saved: D:/jenny/Documents/FAUS_Study/Thesis/My_dataset/Raw_data/UNSW-NB15_4.csv
Processing complete for specified files.


In [6]:
'''
******************************************************************************************************

Distinguish between normal and attacked data

******************************************************************************************************
'''

import pandas as pd
import os

# Define input directory (where modified files are stored) and output file paths
input_dir = r"D:/jenny/Documents/FAUS_Study/Thesis/My_dataset/Raw_data/"
output_normal_file = r"D:/jenny/Documents/FAUS_Study/Thesis/My_dataset/Normal_data/UNSW-NB15_normal.csv"
output_attack_file = r"D:/jenny/Documents/FAUS_Study/Thesis/My_dataset/Attacked_data/UNSW-NB15_attack.csv"

# Initialize empty dataframes for normal and attack datasets
normal_df = pd.DataFrame()
attack_df = pd.DataFrame()

# List all CSV files in the input directory
data_files = [file for file in os.listdir(input_dir) if file.endswith(".csv")]

# Process each file
for file_name in data_files:
    file_path = os.path.join(input_dir, file_name)
    print(f"Processing file: {file_path}")
    
    # Load the file
    data = pd.read_csv(file_path)
    
    # Check if the required 'label' column exists
    if "Label" in data.columns:
        # Split into normal and attack data
        normal_data = data[data["Label"] == 0]
        attack_data = data[data["Label"] == 1]
        
        # Append to the combined normal and attack datasets
        normal_df = pd.concat([normal_df, normal_data], ignore_index=True)
        print(f"normal_df = {normal_df}")
        attack_df = pd.concat([attack_df, attack_data], ignore_index=True)
        print(f"attack_df = {attack_df}")
    else:
        print(f"Warning: 'label' column not found in file {file_name}. Skipping.")

# Save the final datasets
normal_df.to_csv(output_normal_file, index=False)
attack_df.to_csv(output_attack_file, index=False)

print(f"Normal data saved to: {output_normal_file}")
print(f"Attack data saved to: {output_attack_file}")


Processing file: D:/jenny/Documents/FAUS_Study/Thesis/My_dataset/Raw_data/UNSW-NB15_1.csv


  data = pd.read_csv(file_path)


normal_df =              srcip  sport          dstip dsport proto state       dur  sbytes  \
0       59.166.0.0   1390  149.171.126.6     53   udp   CON  0.001055     132   
1       59.166.0.0  33661  149.171.126.9   1024   udp   CON  0.036133     528   
2       59.166.0.6   1464  149.171.126.7     53   udp   CON  0.001119     146   
3       59.166.0.5   3593  149.171.126.5     53   udp   CON  0.001209     132   
4       59.166.0.3  49664  149.171.126.0     53   udp   CON  0.001169     146   
...            ...    ...            ...    ...   ...   ...       ...     ...   
677781  59.166.0.8  12520  149.171.126.6  31010   tcp   FIN  0.020383     320   
677782  59.166.0.0  18895  149.171.126.9     80   tcp   FIN  1.402957   19410   
677783  59.166.0.0  30103  149.171.126.5   5190   tcp   FIN  0.007108    2158   
677784  59.166.0.6  30388  149.171.126.5    111   udp   CON  0.004435     568   
677785  59.166.0.0   6055  149.171.126.5  54145   tcp   FIN  0.072974    4238   

         dbytes

  data = pd.read_csv(file_path)


normal_df =                   srcip  sport          dstip dsport proto state       dur  \
0            59.166.0.0   1390  149.171.126.6     53   udp   CON  0.001055   
1            59.166.0.0  33661  149.171.126.9   1024   udp   CON  0.036133   
2            59.166.0.6   1464  149.171.126.7     53   udp   CON  0.001119   
3            59.166.0.5   3593  149.171.126.5     53   udp   CON  0.001209   
4            59.166.0.3  49664  149.171.126.0     53   udp   CON  0.001169   
...                 ...    ...            ...    ...   ...   ...       ...   
1325033  149.171.126.18   1043   175.45.176.3     53   udp   INT  0.000005   
1325034  149.171.126.18   1043   175.45.176.3     53   udp   INT  0.000005   
1325035  149.171.126.18   1043   175.45.176.3     53   udp   INT  0.000005   
1325036  149.171.126.18   1043   175.45.176.3     53   udp   INT  0.000005   
1325037      59.166.0.1  18247  149.171.126.4   7662   tcp   FIN  0.119596   

         sbytes  dbytes  sttl  ...  ct_ftp_cmd  ct_

In [8]:
'''
******************************************************************************************************

Distinguish between normal and attacked data

******************************************************************************************************
'''

import pandas as pd
import os

# Define input directory (where modified files are stored) and output file paths
input_dir = r"D:/jenny/Documents/FAUS_Study/Thesis/My_dataset/Test_data/"
output_normal_file = r"D:/jenny/Documents/FAUS_Study/Thesis/My_dataset/Test_data/Normal_data/UNSW-NB15_normal.csv"
output_attack_file = r"D:/jenny/Documents/FAUS_Study/Thesis/My_dataset/Attacked_data/UNSW-NB15_attack.csv"
backdoor_file = r"D:/jenny/Documents/FAUS_Study/Thesis/My_dataset/Test_data/Attacked_data/UNSW-NB15_backdoor_only_test.csv"
rec_file = r"D:/jenny/Documents/FAUS_Study/Thesis/My_dataset/Test_data/Attacked_data/UNSW-NB15_reconnaissance_only_test.csv"

# Load the data
data = pd.read_csv(output_attack_file)

# Debugging: Display column names and unique values in attack_cat
print(f"Columns in data: {data.columns}")
if "attack_cat" in data.columns:
    print(f"Unique values in 'attack_cat': {data['attack_cat'].unique()}")
else:
    print("Column 'attack_cat' not found!")

# Clean column names and values if necessary
data.columns = data.columns.str.strip()  # Remove spaces from column names
data["attack_cat"] = data["attack_cat"].astype(str).str.strip()  # Clean the 'attack_cat' values

# Filter data
backdoor_attack_data = data[data["attack_cat"] == "Backdoors"]
print(f"Backdoor data count: {len(backdoor_attack_data)}")
backdoor_attack_data.to_csv(backdoor_file, index=False)
print(backdoor_attack_data)  # Display filtered rows

reconnaissance_attack_data = data[data["attack_cat"] == "Reconnaissance"]
print(f"Reconnaissance data count: {len(reconnaissance_attack_data)}")
reconnaissance_attack_data.to_csv(rec_file, index=False)
print(reconnaissance_attack_data)

  data = pd.read_csv(output_attack_file)


Columns in data: Index(['srcip', 'sport', 'dstip', 'dsport', 'proto', 'state', 'dur', 'sbytes',
       'dbytes', 'sttl', 'dttl', 'sloss', 'dloss', 'service', 'Sload', 'Dload',
       'Spkts', 'Dpkts', 'swin', 'dwin', 'stcpb', 'dtcpb', 'smeansz',
       'dmeansz', 'trans_depth', 'res_bdy_len', 'Sjit', 'Djit', 'Stime',
       'Ltime', 'Sintpkt', 'Dintpkt', 'tcprtt', 'synack', 'ackdat',
       'is_sm_ips_ports', 'ct_state_ttl', 'ct_flw_http_mthd', 'is_ftp_login',
       'ct_ftp_cmd', 'ct_srv_src', 'ct_srv_dst', 'ct_dst_ltm', 'ct_src_ ltm',
       'ct_src_dport_ltm', 'ct_dst_sport_ltm', 'ct_dst_src_ltm', 'attack_cat',
       'Label'],
      dtype='object')
Unique values in 'attack_cat': ['Exploits' 'Reconnaissance' 'DoS' 'Generic' 'Shellcode' ' Fuzzers'
 'Worms' 'Backdoors' 'Analysis' ' Reconnaissance ' 'Backdoor' ' Fuzzers '
 ' Shellcode ']
Backdoor data count: 534
              srcip  sport           dstip  dsport proto state       dur  \
1405   175.45.176.1  29335  149.171.126.18     51

In [21]:
import pandas as pd
import os

def create_test_data(normal_file, backdoor_file, total_samples, ratio_normal, ratio_backdoor, output_dir="."):
    """
    Create a test data file by concatenating normal and backdoor data with specified ratios.
    
    Args:
        normal_file (str): Path to the normal data CSV file.
        backdoor_file (str): Path to the backdoor data CSV file.
        total_samples (int): Total number of samples in the test data.
        ratio_normal (float): Ratio of normal data in the test data (e.g., 0.5 for 50%).
        ratio_backdoor (float): Ratio of backdoor data in the test data (e.g., 0.5 for 50%).
        output_dir (str): Directory where the test data file will be saved.
        
    Returns:
        str: Path to the saved test data file.
    """
    # Load the data
    normal_data = pd.read_csv(normal_file)
    backdoor_data = pd.read_csv(backdoor_file)
    print(f"total sample = {total_samples}")
    
    # Calculate the number of samples for each dataset
    num_normal = int(total_samples * ratio_normal)
    num_backdoor = int(total_samples * ratio_backdoor)
    print(num_backdoor)
    print(num_normal)
    
    # Check if there are enough samples in the input files
    if num_normal > len(normal_data):
        raise ValueError(f"Not enough normal data. Requested: {num_normal}, Available: {len(normal_data)}")
    if num_backdoor > len(backdoor_data):
        raise ValueError(f"Not enough backdoor data. Requested: {num_backdoor}, Available: {len(backdoor_data)}")
    
    # Sample the data
    sampled_normal = normal_data.sample(n=num_normal, random_state=42)
    sampled_backdoor = backdoor_data.sample(n=num_backdoor, random_state=42)
    
    # Concatenate the data
    test_data = pd.concat([sampled_normal, sampled_backdoor], ignore_index=True)
    
    # Shuffle the data
    test_data = test_data.sample(frac=1, random_state=42).reset_index(drop=True)
    
    # Construct the output filename
    output_filename = f"test_data_n{int(ratio_normal * 100)}_backdoor{int(ratio_backdoor * 100)}.csv"
    output_path = os.path.join(output_dir, output_filename)
    
    # Save the test data
    test_data.to_csv(output_path, index=False)
    print(f"Test data saved to {output_path}")
    
    return output_path


# Example usage
if __name__ == "__main__":
    # Define input files and parameters
    normal_file = "D:/jenny/Documents/FAUS_Study/Thesis/My_dataset/Test_data/Normal_data/UNSW-NB15_normal_test.csv"  # Replace with the path to your normal.csv file
    backdoor_file = "D:/jenny/Documents/FAUS_Study/Thesis/My_dataset/Test_data/Attacked_data/UNSW-NB15_backdoor_only_test.csv"  # Replace with the path to your backdoor.csv file
    total_samples = 650 #total number of test samples
    ratio_normal = 0.2  # Ratio of normal data
    ratio_backdoor = 0.8  # Ratio of backdoor data
    output_dir = "D:/jenny/Documents/FAUS_Study/Thesis/My_dataset/Test_data/Nor20_back80/"  # Directory to save the output file
    
    # Create the test data
    create_test_data(normal_file, backdoor_file, total_samples, ratio_normal, ratio_backdoor, output_dir)


total sample = 650
520
130
Test data saved to D:/jenny/Documents/FAUS_Study/Thesis/My_dataset/Test_data/Nor20_back80/test_data_n20_backdoor80.csv


In [1]:
import pandas as pd

# File paths for the datasets
file_1 = "D:/jenny/Documents/FAUS_Study/Thesis/My_dataset/Raw_data/UNSW-NB15_1.csv"  
file_2 = "D:/jenny/Documents/FAUS_Study/Thesis/My_dataset/Raw_data/UNSW-NB15_2.csv"  
file_3 = "D:/jenny/Documents/FAUS_Study/Thesis/My_dataset/Raw_data/UNSW-NB15_3.csv"  
file_4 = "D:/jenny/Documents/FAUS_Study/Thesis/My_dataset/Raw_data/UNSW-NB15_4.csv"  

# Output file paths
train_output_file = "D:/jenny/Documents/FAUS_Study/Thesis/My_dataset/Train_data/train_data.csv"
test_output_file = "D:/jenny/Documents/FAUS_Study/Thesis/My_dataset/Test_data/test_data.csv"

# Load the data
data_1 = pd.read_csv(file_1)
data_2 = pd.read_csv(file_2)
data_3 = pd.read_csv(file_3)
data_4 = pd.read_csv(file_4)

# Concatenate _1 and _2 for training
train_data = pd.concat([data_1, data_2], ignore_index=True)

# Concatenate _3 and _4 for testing
test_data = pd.concat([data_3, data_4], ignore_index=True)

# Save the concatenated data
train_data.to_csv(train_output_file, index=False)
print(f"Train data saved to {train_output_file}")

test_data.to_csv(test_output_file, index=False)
print(f"Test data saved to {test_output_file}")


  data_1 = pd.read_csv(file_1)
  data_2 = pd.read_csv(file_2)


Train data saved to D:/jenny/Documents/FAUS_Study/Thesis/My_dataset/Train_data/train_data.csv
Test data saved to D:/jenny/Documents/FAUS_Study/Thesis/My_dataset/Test_data/test_data.csv


In [2]:
# Data segration depending on Srcip for creating local devices.
import pandas as pd
import os

# Input file containing normal data
input_file = "D:/jenny/Documents/FAUS_Study/Thesis/My_dataset/Test_data/Normal_data/UNSW-NB15_normal_test.csv"  # Replace with the path to your normal data file
output_dir = "D:/jenny/Documents/FAUS_Study/Thesis/My_dataset/Test_data/Normal_data/Device_data/"  # Directory to store the segregated files

# Create the output directory if it doesn't exist
os.makedirs(output_dir, exist_ok=True)

# Load the dataset
data = pd.read_csv(input_file)

# Ensure the 'srcip' column exists
if "srcip" not in data.columns:
    raise ValueError("The dataset does not contain the 'srcip' column.")

# Get the unique source IPs
unique_srcips = data["srcip"].unique()

# Loop through each unique source IP, filter data, and save to a CSV
for idx, srcip in enumerate(unique_srcips, start=1):
    # Filter data for the current source IP
    device_data = data[data["srcip"] == srcip]
    
    # Define the output file name
    output_file = os.path.join(output_dir, f"device_{idx}_data.csv")
    
    # Save the data to the CSV
    device_data.to_csv(output_file, index=False)
    print(f"Data for srcip '{srcip}' saved to {output_file}")

print(f"Data segregation completed. Files are saved in the '{output_dir}' directory.")


Data for srcip '59.166.0.1' saved to D:/jenny/Documents/FAUS_Study/Thesis/My_dataset/Test_data/Normal_data/Device_data/device_1_data.csv
Data for srcip '59.166.0.3' saved to D:/jenny/Documents/FAUS_Study/Thesis/My_dataset/Test_data/Normal_data/Device_data/device_2_data.csv
Data for srcip '59.166.0.8' saved to D:/jenny/Documents/FAUS_Study/Thesis/My_dataset/Test_data/Normal_data/Device_data/device_3_data.csv
Data for srcip '149.171.126.18' saved to D:/jenny/Documents/FAUS_Study/Thesis/My_dataset/Test_data/Normal_data/Device_data/device_4_data.csv
Data for srcip '175.45.176.0' saved to D:/jenny/Documents/FAUS_Study/Thesis/My_dataset/Test_data/Normal_data/Device_data/device_5_data.csv
Data for srcip '59.166.0.7' saved to D:/jenny/Documents/FAUS_Study/Thesis/My_dataset/Test_data/Normal_data/Device_data/device_6_data.csv
Data for srcip '59.166.0.6' saved to D:/jenny/Documents/FAUS_Study/Thesis/My_dataset/Test_data/Normal_data/Device_data/device_7_data.csv
Data for srcip '175.45.176.1' saved

In [1]:
'''
Federated learning algorithm

'''

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import SGD

# Load segregated device data paths
device_paths = [f"D:/jenny/Documents/FAUS_Study/Thesis/My_dataset/Train_data/Normal_data/Device_data/device_{i}_data.csv" for i in range(1, 42)]  # Example paths for 41 devices

# Helper function to load and preprocess data
def load_and_preprocess_data(file_path):
    data = pd.read_csv(file_path)
    
    # Define required features
    features = [
        "dur", "sbytes", "dbytes", "Sload", "Dload", "Spkts", "Dpkts", 
        "smeansz", "dmeansz", "sloss", "dloss", "Sintpkt", "Dintpkt", 
        "swin", "dwin", "tcprtt", "synack", "ackdat", 
        "ct_srv_src", "ct_srv_dst", "ct_dst_ltm", "ct_src_ ltm","Label"
    ]
    data.columns = data.columns.str.strip()  # Remove spaces from column names
    # Ensure the dataset contains all required features
    missing_features = [f for f in features if f not in data.columns]
    if missing_features:
        print(f"Warning: Missing features {missing_features} in {file_path}. Skipping this device.")
        return None, None, None, None
    
    X = data[features]
    y = data.get("Label", None)  # Ensure "Label" column exists
    if y is None:
        print(f"Warning: 'Label' column is missing in {file_path}. Skipping this device.")
        return None, None, None, None
    
    # Handle small datasets by enforcing a minimum number of samples
    if len(X) < 2:  # Arbitrary threshold for minimum samples
        print(f"Warning: Insufficient data in {file_path}. Skipping this device.")
        return None, None, None, None
    
    # Scale features
    scaler = MinMaxScaler()
    X_scaled = scaler.fit_transform(X)
    
    # Split into training and testing datasets
    try:
        X_train, X_test, y_train, y_test = train_test_split(
            X_scaled, y, test_size=0.2, random_state=42, stratify=y
        )
    except ValueError as e:
        print(f"Error during train-test split for {file_path}: {e}")
        return None, None, None, None
    
    return X_train, X_test, y_train, y_test

# Define the neural network model
def create_model(input_dim):
    model = Sequential([
        Dense(64, activation='relu', input_dim=input_dim),
        Dense(32, activation='relu'),
        Dense(1, activation='sigmoid')  # Binary classification
    ])
    model.compile(optimizer=SGD(learning_rate=0.01), loss='binary_crossentropy', metrics=['accuracy'])
    return model

# Federated Learning Simulation
global_weights = None
n_rounds = 1  # One round for this step
device_models = []
aggregated_weights = None

for round_num in range(n_rounds):
    print(f"--- Round {round_num + 1} ---")
    client_updates = []
    
    for device_idx, device_path in enumerate(device_paths):
        print(f"Training on device {device_idx + 1}...")
        
        # Load and preprocess device data
        X_train, X_test, y_train, y_test = load_and_preprocess_data(device_path)
        if X_train is None:  # Skip if data loading failed
            continue
        
        # Create and train the model
        input_dim = X_train.shape[1]
        model = create_model(input_dim)
        
        if global_weights is not None:
            model.set_weights(global_weights)  # Load global weights
        
        model.fit(X_train, y_train, epochs=5, batch_size=32, verbose=0)
        client_updates.append(model.get_weights())
        device_models.append(model)
    
    # Skip aggregation if no clients contributed
    if not client_updates:
        print("No clients contributed in this round. Exiting.")
        break
    
    # Federated Averaging: Aggregate weights
    print("Aggregating updates...")
    aggregated_weights = [np.mean([client_weights[layer] for client_weights in client_updates], axis=0)
                          for layer in range(len(client_updates[0]))]
    global_weights = aggregated_weights  # Set new global weights
    
    # Update device models with the new global weights
    for model in device_models:
        model.set_weights(global_weights)

print("Federated Learning Round Complete!")


--- Round 1 ---
Training on device 1...


  data = pd.read_csv(file_path)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Training on device 2...


  data = pd.read_csv(file_path)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Training on device 3...


  data = pd.read_csv(file_path)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Training on device 4...


  data = pd.read_csv(file_path)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Training on device 5...


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Training on device 6...


  data = pd.read_csv(file_path)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Training on device 7...


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Training on device 8...


  data = pd.read_csv(file_path)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Training on device 9...


  data = pd.read_csv(file_path)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Training on device 10...


  data = pd.read_csv(file_path)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Training on device 11...


  data = pd.read_csv(file_path)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Training on device 12...


  data = pd.read_csv(file_path)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Training on device 13...


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Training on device 14...


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Training on device 15...


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Training on device 16...


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Training on device 17...


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Training on device 18...


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Training on device 19...


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Training on device 20...


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Training on device 21...


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Training on device 22...


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Training on device 23...


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Training on device 24...


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Training on device 25...


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Training on device 26...


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Training on device 27...


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Training on device 28...


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Training on device 29...


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Training on device 30...


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Training on device 31...


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Training on device 32...


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Training on device 33...


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Training on device 34...


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Training on device 35...


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Training on device 36...


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Training on device 37...


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Training on device 38...


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Training on device 39...


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Training on device 40...
Training on device 41...
Aggregating updates...
Federated Learning Round Complete!


In [2]:
""""************************************************************************************************************
Evalation Script for each device in the network.

***************************************************************************************************************
"""

from sklearn.metrics import accuracy_score, confusion_matrix, classification_report,f1_score,precision_score,recall_score

# Helper function for model evaluation
def evaluate_model(model, X_test, y_test):
    y_pred = model.predict(X_test)
    y_pred = (y_pred > 0.5).astype("int32")  # Threshold for binary classification
    accuracy = accuracy_score(y_test, y_pred)
    print(f"Accuracy: {accuracy}")
    print("Confusion Matrix:")
    print(confusion_matrix(y_test, y_pred))
    print("Classification Report:")
    print(classification_report(y_test, y_pred))

# Ensure that device_paths and device_models are available
device_paths = [f"D:/jenny/Documents/FAUS_Study/Thesis/My_dataset/Test_data/Normal_data/Device_data/device_{i}_data.csv" for i in range(1, 42)]  # Example paths for 41 devices

# Check if device models are available from the federated learning process
if 'device_models' not in globals():
    print("Device models not found. Please run the federated learning first.")
else:
    # Evaluate device models
    for device_idx, device_path in enumerate(device_paths):
        print(f"\nEvaluating model for device {device_idx + 1}...")
        
        # Load and preprocess the data for this device (same as federated learning)
        try:
            X_train, X_test, y_train, y_test = load_and_preprocess_data(device_path)
            
            # Check if the test data is valid (not None and not empty)
            if X_test is not None and len(X_test) > 0:
                device_model = device_models[device_idx]
                print(f"Device {device_idx + 1} model performance:")
                evaluate_model(device_model, X_test, y_test)  # Evaluate using test data
            else:
                print(f"Skipping device {device_idx + 1} due to insufficient or invalid data.")
        except Exception as e:
            print(f"Error loading data for device {device_idx + 1}: {str(e)}")

    # Evaluate global model (if applicable)
    if global_weights is not None:
        print("\nEvaluating global model on aggregated test data...")
        
        # Use test data from the first device (or you can aggregate test data from all devices)
        try:
            X_train, X_test, y_train, y_test = load_and_preprocess_data(device_paths[0])  # Use the first device's test data
            
            # Check if the global test data is valid (not None and not empty)
            if X_test is not None and len(X_test) > 0:
                global_model = create_model(X_test.shape[1])
                global_model.set_weights(global_weights)
                print("Global model performance:")
                evaluate_model(global_model, X_test, y_test)  # Evaluate using aggregated global model
            else:
                print("Skipping global model evaluation due to insufficient or invalid test data.")
        except Exception as e:
            print(f"Error loading data for global model evaluation: {str(e)}")

print("\nEvaluation completed!")


Evaluating model for device 1...
Device 1 model performance:
[1m436/436[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step
Accuracy: 1.0
Confusion Matrix:
[[13931]]
Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00     13931

    accuracy                           1.00     13931
   macro avg       1.00      1.00      1.00     13931
weighted avg       1.00      1.00      1.00     13931


Evaluating model for device 2...




Device 2 model performance:
[1m430/430[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step
Accuracy: 1.0
Confusion Matrix:
[[13740]]
Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00     13740

    accuracy                           1.00     13740
   macro avg       1.00      1.00      1.00     13740
weighted avg       1.00      1.00      1.00     13740


Evaluating model for device 3...




Device 3 model performance:
[1m410/410[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step
Accuracy: 1.0
Confusion Matrix:
[[13093]]
Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00     13093

    accuracy                           1.00     13093
   macro avg       1.00      1.00      1.00     13093
weighted avg       1.00      1.00      1.00     13093


Evaluating model for device 4...




Device 4 model performance:
[1m405/405[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step
Accuracy: 1.0
Confusion Matrix:
[[12952]]
Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00     12952

    accuracy                           1.00     12952
   macro avg       1.00      1.00      1.00     12952
weighted avg       1.00      1.00      1.00     12952


Evaluating model for device 5...
Device 5 model performance:




[1m36/36[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step
Accuracy: 1.0
Confusion Matrix:
[[1129]]
Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00      1129

    accuracy                           1.00      1129
   macro avg       1.00      1.00      1.00      1129
weighted avg       1.00      1.00      1.00      1129


Evaluating model for device 6...




Device 6 model performance:
[1m413/413[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step
Accuracy: 1.0
Confusion Matrix:
[[13206]]
Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00     13206

    accuracy                           1.00     13206
   macro avg       1.00      1.00      1.00     13206
weighted avg       1.00      1.00      1.00     13206


Evaluating model for device 7...




Device 7 model performance:
[1m413/413[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step
Accuracy: 1.0
Confusion Matrix:
[[13195]]
Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00     13195

    accuracy                           1.00     13195
   macro avg       1.00      1.00      1.00     13195
weighted avg       1.00      1.00      1.00     13195


Evaluating model for device 8...
Device 8 model performance:




[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step
Accuracy: 1.0
Confusion Matrix:
[[1273]]
Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00      1273

    accuracy                           1.00      1273
   macro avg       1.00      1.00      1.00      1273
weighted avg       1.00      1.00      1.00      1273


Evaluating model for device 9...




Device 9 model performance:
[1m436/436[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step
Accuracy: 1.0
Confusion Matrix:
[[13934]]
Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00     13934

    accuracy                           1.00     13934
   macro avg       1.00      1.00      1.00     13934
weighted avg       1.00      1.00      1.00     13934


Evaluating model for device 10...




Device 10 model performance:
[1m430/430[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step
Accuracy: 1.0
Confusion Matrix:
[[13731]]
Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00     13731

    accuracy                           1.00     13731
   macro avg       1.00      1.00      1.00     13731
weighted avg       1.00      1.00      1.00     13731


Evaluating model for device 11...




Device 11 model performance:
[1m433/433[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step
Accuracy: 1.0
Confusion Matrix:
[[13832]]
Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00     13832

    accuracy                           1.00     13832
   macro avg       1.00      1.00      1.00     13832
weighted avg       1.00      1.00      1.00     13832


Evaluating model for device 12...




Device 12 model performance:
[1m419/419[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step
Accuracy: 1.0
Confusion Matrix:
[[13389]]
Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00     13389

    accuracy                           1.00     13389
   macro avg       1.00      1.00      1.00     13389
weighted avg       1.00      1.00      1.00     13389


Evaluating model for device 13...




Device 13 model performance:
[1m429/429[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step
Accuracy: 1.0
Confusion Matrix:
[[13718]]
Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00     13718

    accuracy                           1.00     13718
   macro avg       1.00      1.00      1.00     13718
weighted avg       1.00      1.00      1.00     13718


Evaluating model for device 14...
Device 14 model performance:
[1m1/4[0m [32m━━━━━[0m[37m━━━━━━━━━━━━━━━[0m [1m0s[0m 52ms/step



[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 40ms/step
Accuracy: 1.0
Confusion Matrix:
[[114]]
Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00       114

    accuracy                           1.00       114
   macro avg       1.00      1.00      1.00       114
weighted avg       1.00      1.00      1.00       114


Evaluating model for device 15...




Device 15 model performance:
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step
Accuracy: 1.0
Confusion Matrix:
[[106]]
Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00       106

    accuracy                           1.00       106
   macro avg       1.00      1.00      1.00       106
weighted avg       1.00      1.00      1.00       106


Evaluating model for device 16...
Device 16 model performance:




[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
Accuracy: 1.0
Confusion Matrix:
[[201]]
Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00       201

    accuracy                           1.00       201
   macro avg       1.00      1.00      1.00       201
weighted avg       1.00      1.00      1.00       201


Evaluating model for device 17...
Device 17 model performance:




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 151ms/step
Accuracy: 1.0
Confusion Matrix:
[[16]]
Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        16

    accuracy                           1.00        16
   macro avg       1.00      1.00      1.00        16
weighted avg       1.00      1.00      1.00        16


Evaluating model for device 18...
Device 18 model performance:




[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 40ms/step
Accuracy: 1.0
Confusion Matrix:
[[106]]
Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00       106

    accuracy                           1.00       106
   macro avg       1.00      1.00      1.00       106
weighted avg       1.00      1.00      1.00       106


Evaluating model for device 19...
Device 19 model performance:




[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 38ms/step
Accuracy: 1.0
Confusion Matrix:
[[104]]
Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00       104

    accuracy                           1.00       104
   macro avg       1.00      1.00      1.00       104
weighted avg       1.00      1.00      1.00       104


Evaluating model for device 20...
Device 20 model performance:




[1m44/44[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step
Accuracy: 1.0
Confusion Matrix:
[[1406]]
Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00      1406

    accuracy                           1.00      1406
   macro avg       1.00      1.00      1.00      1406
weighted avg       1.00      1.00      1.00      1406


Evaluating model for device 21...
Device 21 model performance:
[1m1/7[0m [32m━━[0m[37m━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 58ms/step



[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step
Accuracy: 1.0
Confusion Matrix:
[[194]]
Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00       194

    accuracy                           1.00       194
   macro avg       1.00      1.00      1.00       194
weighted avg       1.00      1.00      1.00       194


Evaluating model for device 22...
Device 22 model performance:




[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step
Accuracy: 1.0
Confusion Matrix:
[[518]]
Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00       518

    accuracy                           1.00       518
   macro avg       1.00      1.00      1.00       518
weighted avg       1.00      1.00      1.00       518


Evaluating model for device 23...




Device 23 model performance:
[1m49/49[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step
Accuracy: 1.0
Confusion Matrix:
[[1550]]
Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00      1550

    accuracy                           1.00      1550
   macro avg       1.00      1.00      1.00      1550
weighted avg       1.00      1.00      1.00      1550


Evaluating model for device 24...
Device 24 model performance:




[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 104ms/step
Accuracy: 1.0
Confusion Matrix:
[[43]]
Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        43

    accuracy                           1.00        43
   macro avg       1.00      1.00      1.00        43
weighted avg       1.00      1.00      1.00        43


Evaluating model for device 25...
Device 25 model performance:




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 129ms/step
Accuracy: 1.0
Confusion Matrix:
[[2]]
Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00         2

    accuracy                           1.00         2
   macro avg       1.00      1.00      1.00         2
weighted avg       1.00      1.00      1.00         2


Evaluating model for device 26...
Device 26 model performance:




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 180ms/step
Accuracy: 1.0
Confusion Matrix:
[[2]]
Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00         2

    accuracy                           1.00         2
   macro avg       1.00      1.00      1.00         2
weighted avg       1.00      1.00      1.00         2


Evaluating model for device 27...




Device 27 model performance:
[1m281/281[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step
Accuracy: 1.0
Confusion Matrix:
[[8975]]
Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00      8975

    accuracy                           1.00      8975
   macro avg       1.00      1.00      1.00      8975
weighted avg       1.00      1.00      1.00      8975


Evaluating model for device 28...
Device 28 model performance:




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 223ms/step
Accuracy: 1.0
Confusion Matrix:
[[3]]
Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00         3

    accuracy                           1.00         3
   macro avg       1.00      1.00      1.00         3
weighted avg       1.00      1.00      1.00         3


Evaluating model for device 29...
Device 29 model performance:




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 123ms/step
Accuracy: 1.0
Confusion Matrix:
[[3]]
Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00         3

    accuracy                           1.00         3
   macro avg       1.00      1.00      1.00         3
weighted avg       1.00      1.00      1.00         3


Evaluating model for device 30...
Device 30 model performance:




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 160ms/step
Accuracy: 1.0
Confusion Matrix:
[[1]]
Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00         1

    accuracy                           1.00         1
   macro avg       1.00      1.00      1.00         1
weighted avg       1.00      1.00      1.00         1


Evaluating model for device 31...




Device 31 model performance:
[1m190/190[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step
Accuracy: 1.0
Confusion Matrix:
[[6073]]
Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00      6073

    accuracy                           1.00      6073
   macro avg       1.00      1.00      1.00      6073
weighted avg       1.00      1.00      1.00      6073


Evaluating model for device 32...
Device 32 model performance:




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 135ms/step
Accuracy: 1.0
Confusion Matrix:
[[1]]
Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00         1

    accuracy                           1.00         1
   macro avg       1.00      1.00      1.00         1
weighted avg       1.00      1.00      1.00         1


Evaluating model for device 33...
Device 33 model performance:




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 134ms/step
Accuracy: 1.0
Confusion Matrix:
[[1]]
Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00         1

    accuracy                           1.00         1
   macro avg       1.00      1.00      1.00         1
weighted avg       1.00      1.00      1.00         1


Evaluating model for device 34...
Device 34 model performance:
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 79ms/step



[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 161ms/step
Accuracy: 1.0
Confusion Matrix:
[[1]]
Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00         1

    accuracy                           1.00         1
   macro avg       1.00      1.00      1.00         1
weighted avg       1.00      1.00      1.00         1


Evaluating model for device 35...
Device 35 model performance:




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 96ms/step
Accuracy: 1.0
Confusion Matrix:
[[3]]
Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00         3

    accuracy                           1.00         3
   macro avg       1.00      1.00      1.00         3
weighted avg       1.00      1.00      1.00         3


Evaluating model for device 36...




Device 36 model performance:
[1m257/257[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
Accuracy: 1.0
Confusion Matrix:
[[8210]]
Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00      8210

    accuracy                           1.00      8210
   macro avg       1.00      1.00      1.00      8210
weighted avg       1.00      1.00      1.00      8210


Evaluating model for device 37...
Skipping device 37 due to insufficient or invalid data.

Evaluating model for device 38...
Device 38 model performance:
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 58ms/step



[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 96ms/step
Accuracy: 1.0
Confusion Matrix:
[[1]]
Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00         1

    accuracy                           1.00         1
   macro avg       1.00      1.00      1.00         1
weighted avg       1.00      1.00      1.00         1


Evaluating model for device 39...
Device 39 model performance:




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 158ms/step
Accuracy: 1.0
Confusion Matrix:
[[1]]
Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00         1

    accuracy                           1.00         1
   macro avg       1.00      1.00      1.00         1
weighted avg       1.00      1.00      1.00         1


Evaluating model for device 40...
Error loading data for device 40: list index out of range

Evaluating model for device 41...
Error loading data for device 41: list index out of range

Evaluating global model on aggregated test data...




Global model performance:
[1m  1/436[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m23s[0m 55ms/step

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m436/436[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step
Accuracy: 1.0
Confusion Matrix:
[[13931]]
Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00     13931

    accuracy                           1.00     13931
   macro avg       1.00      1.00      1.00     13931
weighted avg       1.00      1.00      1.00     13931


Evaluation completed!


