In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from tensorflow import keras
from tensorflow.keras import layers
from keras.metrics import Recall, Precision
import time
import psutil
import sys
import threading
import json
from web3 import Web3, HTTPProvider

2024-02-09 15:27:22.173030: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2024-02-09 15:27:22.237958: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2024-02-09 15:27:22.239269: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [3]:
blockchain_address = 'http://127.0.0.1:8545'
web3 = Web3(HTTPProvider(blockchain_address))
web3.is_connected()

True

In [4]:
web3.eth.defaultAccount = web3.eth.accounts[0]
print(web3.eth.accounts[0])

0x8348eC8A6293180f7c56baC764A190601c5cF5DD


In [None]:
compiled_contract_path_edge = '/home/administrator/docker-eth/build/contracts/ids.json'
compiled_contract_path_fog = '/home/administrator/docker-eth/build/contracts/ids.json'
compiled_contract_path_cloud = '/home/administrator/docker-eth/build/contracts/ids.json'

def compile_contract (compiled_contract_path):
    with open(compiled_contract_path) as file:
        contract_json = json.load(file) 
        contract_abi = contract_json['abi']
    
    return contract_abi
        
abi_edge = compile_contract(compiled_contract_path_edge)
abi_fog = compile_contract(compiled_contract_path_fog)
abi_cloud = compile_contract(compiled_contract_path_cloud)

In [None]:
deployed_contract_address_edge = '0x9B211180B1E9189305897AfD2f1f82bf9815FCf5'
deployed_contract_address_fog = '0x9B211180B1E9189305897AfD2f1f82bf9815FCf5'
deployed_contract_address_cloud = '0x9B211180B1E9189305897AfD2f1f82bf9815FCf5'

contract_edge = web3.eth.contract(address = deployed_contract_address_edge, abi = abi_edge)
contract_fog = web3.eth.contract(address = deployed_contract_address_fog, abi = abi_fog)
contract_cloud = web3.eth.contract(address = deployed_contract_address_cloud, abi = abi_cloud)

In [2]:
chunksize = 1000

In [3]:
list_of_dataframes = []

for dfp in pd.read_csv('/home/administrator/dataset/CIC-IoT-2023/test_CICIoT2023.csv', chunksize=chunksize):
    list_of_dataframes.append(dfp)

df = pd.concat(list_of_dataframes)

In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 14005974 entries, 0 to 14005973
Data columns (total 47 columns):
 #   Column           Dtype  
---  ------           -----  
 0   flow_duration    float64
 1   Header_Length    float64
 2   Protocol Type    float64
 3   Duration         float64
 4   Rate             float64
 5   Srate            float64
 6   Drate            float64
 7   fin_flag_number  float64
 8   syn_flag_number  float64
 9   rst_flag_number  float64
 10  psh_flag_number  float64
 11  ack_flag_number  float64
 12  ece_flag_number  float64
 13  cwr_flag_number  float64
 14  ack_count        float64
 15  syn_count        float64
 16  fin_count        float64
 17  urg_count        float64
 18  rst_count        float64
 19  HTTP             float64
 20  HTTPS            float64
 21  DNS              float64
 22  Telnet           float64
 23  SMTP             float64
 24  SSH              float64
 25  IRC              float64
 26  TCP              float64
 27  UDP       

In [5]:
df['label'].value_counts()

label
DDoS          10197039
DoS            2426635
Mirai           790305
Benign          328597
Spoofing        145999
Recon           105957
Web               7462
BruteForce        3980
Name: count, dtype: int64

In [6]:
trf_type = df.loc[:, "label"].map(lambda lbl: "0" if lbl == "Benign" else "1")
trf_type.name = "traffic type"
df.loc[:, trf_type.name] = trf_type

In [7]:
df['traffic type'].value_counts()

traffic type
1    13677377
0      328597
Name: count, dtype: int64

In [8]:
df['traffic type'] = df['traffic type'].astype(int)

In [9]:
X_df = df.drop(columns=['label', 'traffic type'])
y_df = df['traffic type']

In [10]:
# Split into training and validation sets
X_train, X_test, y_train, y_test = train_test_split(X_df, y_df, test_size=0.2, random_state=100)

In [11]:
scaler = StandardScaler()

In [12]:
X_train = scaler.fit_transform(X_train)

In [13]:
X_test = scaler.transform(X_test)

In [14]:
def load_CICIoT2023_data(client_id):
    
    # Create non-IID splits based on client_id
    np.random.seed(client_id)
    indices = np.arange(len(X_train))
    np.random.shuffle(indices)

    # Choose a fraction of the data for this client
    fraction = 0.04
    client_data_size = int(fraction * len(X_train))
    client_indices = indices[:client_data_size]

    X_client = X_train[client_indices]
    y_client = y_train.iloc[client_indices]

    return X_client, y_client, X_test, y_test

In [15]:
# Define a simple neural network for binary classification
def create_classification_model(input_shape):
    model = keras.Sequential([
        layers.Dense(21, activation='relu', input_shape=(input_shape,)),
        layers.Dense(13, activation='relu'),
        layers.Dense(7, activation='relu'),
        layers.Dense(5, activation='relu'),
        layers.Dense(1, activation='sigmoid')
    ])
    model.compile(loss='mean_squared_error', optimizer='sgd', metrics=['accuracy', Recall(), Precision()])
    return model

In [16]:
# Function to monitor resources
def monitor_resources(cpu_percent_list, memory_usage_list, stop_event=None):
    while not stop_event.is_set():
        cpu_percent = psutil.cpu_percent(interval=1)
        memory_info = psutil.virtual_memory()

        cpu_percent_list.append(cpu_percent)
        memory_usage_list.append(memory_info.used / (1024 ** 3))  # Convert to GB

In [None]:
# Function to save weight into ethereum blockchain
def save_transaction_edge(weight_edge):
    tx_hash_edge = contract_edge.functions.sendHash(weight_edge).transact({'from': web3.eth.accounts[0]})
    tx_receipt_edge = web3.eth.wait_for_transaction_receipt(tx_hash_edge)
    print('tx_hash_edge: {}'.format(tx_hash_edge.hex()))
    print(tx_receipt_edge)

def save_transaction_fog(weight_fog):
    tx_hash_fog = contract_fog.functions.sendHash(weight_fog).transact({'from': web3.eth.accounts[0]})
    tx_receipt_fog = web3.eth.wait_for_transaction_receipt(tx_hash_fog)
    print('tx_hash_fog: {}'.format(tx_hash_fog.hex()))
    print(tx_receipt_fog)
    
def save_transaction_cloud(weight_cloud):
    tx_hash_cloud = contract_cloud.functions.sendHash(weight_cloud).transact({'from': web3.eth.accounts[0]})
    tx_receipt_cloud = web3.eth.wait_for_transaction_receipt(tx_hash_cloud)
    print('tx_hash_cloud: {}'.format(tx_hash_cloud.hex()))
    print(tx_receipt_cloud)
    
#Function to recall weight
def recall_transaction_edge(num):
    current_block_edge = w3.eth.blockNumber
    edge_models = []
    for i in range(num):
        blocks_before_edge = current_block_edge - i
        contract_state_edge = contract_edge.atBlock(blocks_before_edge)
        output_edge = contract_state_edge.functions.getHash().call()
        edge_models.append(output_edge)
    return edge_models

def recall_transaction_fog(num):
    current_block_fog = w3.eth.blockNumber
    fog_models = []
    for i in range(num):
        blocks_before_fog = current_block_fog - i
        contract_state_fog = contract_fog.atBlock(blocks_before_fog)
        output_fog = contract_state_fog.functions.getHash().call()
        fog_models.append(output_fog)
    return fog_models

def recall_transaction_cloud(num):
    current_block_cloud = w3.eth.blockNumber
    cloud_models = []
    for i in range(num):
        blocks_before_cloud = current_block_cloud - i
        contract_state_cloud = contract_cloud.atBlock(blocks_before_cloud)
        output_cloud = contract_state_cloud.functions.getHash().call()
        cloud_models.append(output_cloud)
    return cloud_models

In [17]:
input_shape = X_train.shape[1]

# Initialize global classification model
cloud_model = create_classification_model(input_shape=input_shape)

In [20]:
# Number of federated learning rounds
num_rounds = 5
num_fog = 5
num_edge = 5

cloud_models = []

# Federated learning simulation
for round in range(num_rounds):

    # Fog layer with 5 devices
    for fog_device_id in range(num_fog):  # Assuming 5 devices
        
        # Create a copy of the cloud model for each fog device
        fog_model = keras.models.clone_model(cloud_model)
        fog_model.compile(loss='mean_squared_error', optimizer='sgd', metrics=['accuracy', Recall(), Precision()])
        
        # Assuming each fog device manages 5 edge devices
        for edge_device_id in range(num_edge):
            
             # Create a copy of the fog model for each edge device
            edge_model = keras.models.clone_model(fog_model)
            edge_model.compile(loss='mean_squared_error', optimizer='sgd', metrics=['accuracy', Recall(), Precision()])
            
            # Load non-IID data for each client
            X_local, y_local, X_test_local, y_test_local = load_CICIoT2023_data(client_id=edge_device_id)

            # Measure resource utilization
            # Create a threading.Event to signal the monitoring thread to stop
            stop_monitoring = threading.Event()

            # Create lists to store CPU and memory utilization values
            cpu_percent_list = []
            memory_usage_list = []

            # Start monitoring in a separate thread
            monitor_thread = threading.Thread(target=monitor_resources, args=(cpu_percent_list, memory_usage_list, stop_monitoring))
            monitor_thread.start()

            # Start time of training
            start_train = time.time()

            # Perform local training using the client's data
            edge_model.fit(X_local, y_local, epochs=10, verbose=0)

            # End time of training
            end_train = time.time()

            # Signal the monitoring thread to stop
            stop_monitoring.set()

            # Wait for the monitoring thread to finish
            monitor_thread.join()

            # Calculte training time
            train_time = end_train - start_train
            
            # Calculate the average CPU and memory utilization
            average_cpu_percent = sum(cpu_percent_list) / len(cpu_percent_list)
            average_memory_usage = sum(memory_usage_list) / len(memory_usage_list)

            # Measure edge model size in bytes
            current_model_size_edge_bytes = sys.getsizeof(edge_model)

            # Convert to megabytes
            current_model_size_edge_mb = current_model_size_edge_bytes / 1024  # 1 MB = 1024 KB

            # Evaluate edge model on the testing set
            loss_egde, accuracy_edge, recall_edge, precision_edge = edge_model.evaluate(X_test_local, y_test_local, verbose=0)

            start_save_time_edge = time.time()
            
            # Append the local model weights to edge_models
            save_transaction_edge(edge_model.get_weights())
            
            end_save_time_edge = time.time()
            
            latency_dlt_edge = end_save_time_edge - start_save_time_edge
            
            print('================================================================================================================')
            print(f"Round {round + 1}, Edge Device {edge_device_id + 1}: Edge Model - Loss: {loss_egde}, Accuracy: {accuracy_edge}, Recall: {recall_edge}, Precision: {precision_edge}")
            print(f"Training Time: {train_time} seconds, Model Size: {current_model_size_edge_mb} KB, DLT Latency - Write data to block: {latency_dlt_edge} seconds")
            print(f"Average CPU Percent: {average_cpu_percent:.2f}%, Average Memory Usage: {average_memory_usage:.2f} GB")
        
        start_recall_time_edge = time.time()
        
        edge_models = recall_transaction_edge(num_edge)
        
        end_recall_time_edge = time.time()
        
        latency_dlt_rec_edge = end_recall_time_edge - start_recall_time_edge
        
        # Coordinate-wise mean aggregation for edge layer in fog device
        global_edge_weights = []
        for layer_edge_weights in zip(*edge_models):
            mean_edge_weights = np.mean(layer_edge_weights, axis=0)
            global_edge_weights.append(mean_edge_weights)

        # Update fog model with aggregated weights for the edge layer
        fog_model.set_weights(global_edge_weights)
        
        # Measure fog model size in bytes
        current_model_size_fog_bytes = sys.getsizeof(fog_model)

        # Convert to megabytes
        current_model_size_fog_mb = current_model_size_fog_bytes / 1024  # 1 MB = 1024 KB
        
        # Evaluate edge model on the testing set
        loss_fog, accuracy_fog, recall_fog, precision_fog = fog_model.evaluate(X_test, y_test, verbose=0)
        
        start_save_time_fog = time.time()
        
        # Append the local model weights to fog_models
        save_transaction_fog(fog_model.get_weights())
        
        end_save_time_fog = time.time()
            
        latency_dlt_fog = end_save_time_fog - start_save_time_fog
                       
        print('================================================================================================================')
        print(f"Round {round + 1}, Fog Device {fog_device_id + 1}: Fog Model - Loss: {loss_fog}, Accuracy: {accuracy_fog}, Recall: {recall_fog}, Precision: {precision_fog}")
        print(f"Global Model Size - {current_model_size_fog_mb} KB")
        print(f"DLT Latency - Write data to block: {latency_dlt_fog} seconds, Read data from block: {latency_dlt_rec_edge} seconds")
    
    start_recall_time_fog = time.time()
    
    fog_models = recall_transaction_fog(num_fog)
    
    end_recall_time_fog = time.time()
        
    latency_dlt_rec_fog = end_recall_time_fog - start_recall_time_fog
    
    # Coordinate-wise mean aggregation for fog layer
    global_fog_weights = []
    for layer_fog_weights in zip(*fog_models):
        mean_fog_weights = np.mean(layer_fog_weights, axis=0)
        global_fog_weights.append(mean_fog_weights)

    # Update global model with aggregated weights for the fog layer
    cloud_model.set_weights(global_fog_weights)
    
    # Measure global model size in bytes
    current_model_size_global_bytes = sys.getsizeof(cloud_model)

    # Convert to megabytes
    current_model_size_global_mb = current_model_size_global_bytes / 1024  # 1 MB = 1024 KB

    # Evaluate global model on the validation set
    score_global = cloud_model.evaluate(X_test, y_test, verbose=0)
    
    start_save_time_cloud = time.time()
    
    # Append the local model weights to cloud_models
    save_transaction_cloud(cloud_model.get_weights())
    
    end_save_time_cloud = time.time()
            
    latency_dlt_cloud = end_save_time_cloud - start_save_time_cloud
    
    print('===================================================================================================================')
    print(f"Round {round + 1}: Global Model - Loss: {score_global[0]}, Accuracy: {score_global[1]}, Recall: {score_global[2]}, Precision: {score_global[3]}")
    print(f"Global Model Size - {current_model_size_global_mb} KB")
    print(f"DLT Latency - Write data to block: {latency_dlt_cloud} seconds, Read data from block: {latency_dlt_rec_fog} seconds")

start_recall_time_cloud = time.time()
    
cloud_models = recall_transaction_cloud(num_rounds)  

end_recall_time_cloud = time.time()
            
latency_dlt_rec_cloud = end_recall_time_cloud - start_recall_time_cloud

# Coordinate-wise mean aggregation for cloud layer
global_cloud_weights = []
for layer_cloud_weights in zip(*cloud_models):
    mean_cloud_weights = np.mean(layer_cloud_weights, axis=0)
    global_cloud_weights.append(mean_cloud_weights)
    
# Final global classification model
final_global_model = create_classification_model(input_shape=input_shape)

# Update final model with aggregated weights for the cloud layer
final_global_model.set_weights(global_cloud_weights)

# Measure final model size in bytes
current_model_size_final_bytes = sys.getsizeof(final_global_mode)

# Convert to megabytes
current_model_size_final_mb = current_model_size_final_bytes / 1024  # 1 MB = 1024 KB

# Evaluate final model on the validation set
score_final = final_global_model.evaluate(X_test, y_test, verbose=0)

print('===================================================================================================================')
print(f"Final Model - Loss: {score_final[0]}, Accuracy: {score_final[1]}, Recall: {score_final[2]}, Precision: {score_final[3]}")
print(f"Final Model Size - {current_model_size_final_mb} KB, DLT Latency - Read data from block: {latency_dlt_rec_fog} seconds")

Round 1, Edge Device 1: Edge Model - Loss: 0.0063963886350393295, Accuracy: 0.9908539056777954, Recall: 0.993236243724823, Precision: 0.9973869919776917
Training Time: 149.38945198059082 seconds, Model Size: 0.046875 KB
Average CPU Percent: 4.86%, Average Memory Usage: 26.79 GB
Round 1, Edge Device 2: Edge Model - Loss: 0.007170390337705612, Accuracy: 0.9903984069824219, Recall: 0.9927833080291748, Precision: 0.9973722696304321
Training Time: 159.3181664943695 seconds, Model Size: 0.046875 KB
Average CPU Percent: 4.65%, Average Memory Usage: 27.40 GB
Round 1, Edge Device 3: Edge Model - Loss: 0.006185179576277733, Accuracy: 0.9910959601402283, Recall: 0.99381023645401, Precision: 0.9970622062683105
Training Time: 159.62023758888245 seconds, Model Size: 0.046875 KB
Average CPU Percent: 4.68%, Average Memory Usage: 28.07 GB
Round 1, Edge Device 4: Edge Model - Loss: 0.015226216055452824, Accuracy: 0.9832992553710938, Recall: 0.9964474439620972, Precision: 0.9865847229957581
Training Time

In [22]:
centralize_learning = create_classification_model(input_shape=input_shape)

# Measure resource utilization 
# Create a threading.Event to signal the monitoring thread to stop
stop_monitoring2 = threading.Event()

# Create lists to store CPU and memory utilization values
cpu_percent_list2 = []
memory_usage_list2 = []

# Start monitoring in a separate thread
monitor_thread2 = threading.Thread(target=monitor_resources, args=(cpu_percent_list2, memory_usage_list2, stop_monitoring2))
monitor_thread2.start()

# Start time of training
start_train_cent = time.time()

# Perform training using the data
centralize_learning.fit(X_train, y_train, epochs=5, verbose=0)

# End time of training
end_train_cent = time.time()

# Signal the monitoring thread to stop
stop_monitoring2.set()

# Wait for the monitoring thread to finish
monitor_thread2.join()

# Calculte training time
train_time_cent = end_train_cent - start_train_cent

# Calculate the average CPU and memory utilization
average_cpu_percent2 = sum(cpu_percent_list2) / len(cpu_percent_list2)
average_memory_usage2 = sum(memory_usage_list2) / len(memory_usage_list2)

# Measure edge model size in bytes
current_model_size_cent_bytes = sys.getsizeof(centralize_learning)

# Convert to megabytes
current_model_size_cent_mb = current_model_size_cent_bytes / 1024  # 1 MB = 1024 KB

# Evaluate edge model on the testing set
loss_cent, accuracy_cent, recall_cent, precision_cent = centralize_learning.evaluate(X_test, y_test, verbose=0)

print('===================================================================================================================\n')
print(f"Centralize Learning - Loss: {loss_cent}, Accuracy: {accuracy_cent}, Recall: {recall_cent}, Precision: {precision_cent}\n")
print(f"Training Time: {train_time_cent} seconds \n")
print(f"Model Size: {current_model_size_cent_mb} KB\n")
print(f"Average CPU Percent: {average_cpu_percent2:.2f}%")
print(f"Average Memory Usage: {average_memory_usage2:.2f} GB")


Centralize Learning - Loss: 0.004564844537526369, Accuracy: 0.9936402440071106, Recall: 0.9955503344535828, Precision: 0.9979321956634521

Training Time: 4041.0752148628235 seconds 

Model Size: 0.046875 KB

Average CPU Percent: 4.70%
Average Memory Usage: 40.78 GB
