# Benchmark and Compare CIC17 base & improved datasets

In [15]:
import subprocess
import os
from concurrent.futures import ThreadPoolExecutor, ProcessPoolExecutor, as_completed
import threading
import time
import numpy as np
import random

np.random.seed(42)
random.seed(42)

## Preparation

Let's prepare a function that helps us running the tests for a given (dataset, target, test) tuple

In [16]:
# lock for thread-safe progress updates
progress_lock = threading.Lock()

def run_benchmark(metadata_path: str, results_path: str, target: str, data_folder: str, test_name: str) -> str:
    """
    Run a single benchmark test from BadSmells repository.
    
    Args:
        metadata_path (str): Path to the metadata JSON file.
        results_path (str): Path to save the benchmark results.
        target (str): The target attack class (e.g., "DoS attacks-Hulk").
        data_folder (str): Path to the dataset folder (e.g., "data/CIC18/").
        test_name (str): The specific test to run (e.g., "CosineTest").
        
    Returns:
        str: Result message indicating success or failure.
    """
    start_time = time.time()
    command = [
        "python3",
        "./DataBadSmells/src/netstats.py",
        "--metadata", metadata_path,
        "--results", results_path,
        "--target", target,
        "--folder",
        "--csv", data_folder,
        "--test", test_name
    ]
    
    try:
        result = subprocess.run(command, capture_output=True, text=True)
        elapsed = time.time() - start_time
        if result.returncode == 0:
            return f"Success: {test_name} for {target} (Elapsed time: {elapsed:.2f}s)"
        else:
            return f"Error: {test_name} for {target} (Elapsed time: {elapsed:.2f}s)\n{result.stderr}"
    
    except Exception as e:
        elapsed = time.time() - start_time
        return f"Exception occurred while running {test_name} for {target} (Elapsed time: {elapsed:.2f}s): {e}"

def run_all_benchmarks_parallel(metadata_path: str, results_path: str, data_folder: str, targets: list[str], tests: list[str], max_workers: int = 8):
    """
    Run benchmarks for all targets and tests in parallel with a progress indicator.
    
    Args:
        metadata_path (str): Path to the metadata JSON file.
        results_path (str): Path to save results.
        data_folder (str): Path to the dataset folder.
        targets (list): List of target attack classes.
        tests (list): List of tests to run for each target.
        max_workers (int): Maximum number of parallel workers.
    """
    total_tasks = len(targets) * len(tests)
    completed_tasks = 0
    overall_start = time.time()
    
    with ThreadPoolExecutor(max_workers=max_workers) as executor:
        future_to_task = {
            executor.submit(run_benchmark, metadata_path, results_path, target, data_folder, test): (target, test)
            for target in targets
            for test in tests
        }
        
        for future in as_completed(future_to_task):
            result_message = future.result()
            target, test = future_to_task[future]
            
            # update progress
            with progress_lock:
                completed_tasks += 1
                percentage = (completed_tasks / total_tasks) * 100
                print(f"[{percentage:.2f}%] {result_message}")

    # final elapsed time
    overall_elapsed = time.time() - overall_start
    print(f"\nAll benchmarks completed in {overall_elapsed:.2f}s.")

In [18]:
cic17_targets = [
    "DoS Hulk",
    "DDoS",
    "DoS GoldenEye",
    "FTP Patator",
    "SSH Patator",
    "Port Scan",
    "DoS slowloris",
    "DoS Slowhttptest",
    "Bot",
    "Web Attack - Brute Force",
    "Infiltration",
    "Web Attack - Sql Injection",
    "Heartbleed"
]

tests = [
   # "CosineTest", # PDDc (Poor Data Diversity), TCc (Traffic Collapse)
   # "PortTest", # UGTc (Unclear Ground Truth)
   # "SingleFeatureEfficacyTest", # HDFc (Highly Dependent Features)
   # "NearestNeighboursTest" # WLc (Wrong Label Smell),
    "CompleteTest"
]

## CIC17 - base version

In [4]:
cic17_base_metadata_path = "./DataBadSmells/metadata/cic/metadata.json"
cic17_base_results_path = "./results/CIC17_base_first_run/"
cic17_base_data_folder = "./data/CIC17/"

In [28]:
run_all_benchmarks_parallel(cic17_base_metadata_path, "./results/CIC17_base_first_run/", cic17_base_data_folder, cic17_targets, tests)

Python(66626) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
Python(66628) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
Python(66629) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
Python(66630) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
Python(66631) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
Python(66632) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
Python(66633) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
Python(66634) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.


[7.69%] Success: CompleteTest for FTP Patator (Elapsed time: 822.20s)


Python(70530) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.


[15.38%] Success: CompleteTest for Port Scan (Elapsed time: 969.21s)


Python(71224) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.


[23.08%] Success: CompleteTest for SSH Patator (Elapsed time: 982.79s)


Python(71288) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.


[30.77%] Success: CompleteTest for Web Attack - Brute Force (Elapsed time: 786.18s)


Python(74968) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.


[38.46%] Success: CompleteTest for DoS slowloris (Elapsed time: 2317.50s)


Python(77536) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.


[46.15%] Success: CompleteTest for DoS GoldenEye (Elapsed time: 2475.75s)
[53.85%] Success: CompleteTest for DoS Slowhttptest (Elapsed time: 2562.64s)
[61.54%] Success: CompleteTest for Web Attack - Sql Injection (Elapsed time: 833.02s)
[69.23%] Success: CompleteTest for Infiltration (Elapsed time: 1618.45s)
[76.92%] Success: CompleteTest for Bot (Elapsed time: 1953.22s)
[84.62%] Success: CompleteTest for DDoS (Elapsed time: 2953.56s)
[92.31%] Success: CompleteTest for DoS Hulk (Elapsed time: 3068.92s)
[100.00%] Success: CompleteTest for Heartbleed (Elapsed time: 786.59s)

All benchmarks completed in 3104.12s.


In [5]:
run_all_benchmarks_parallel(cic17_base_metadata_path, cic17_base_results_path, cic17_base_data_folder, cic17_targets, tests)

[1.92%] Success: PortTest for DDoS (Elapsed time: 50.17s)
[3.85%] Success: PortTest for DoS Hulk (Elapsed time: 62.45s)
[5.77%] Success: CosineTest for DDoS (Elapsed time: 92.66s)
[7.69%] Success: CosineTest for DoS Hulk (Elapsed time: 103.84s)
[9.62%] Success: PortTest for DoS GoldenEye (Elapsed time: 1996.36s)
[11.54%] Success: CosineTest for DoS GoldenEye (Elapsed time: 2031.99s)
[13.46%] Success: CosineTest for FTP Patator (Elapsed time: 133.86s)
[15.38%] Success: NearestNeighboursTest for DoS GoldenEye (Elapsed time: 2109.65s)
[17.31%] Success: PortTest for FTP Patator (Elapsed time: 132.05s)
[19.23%] Success: CosineTest for SSH Patator (Elapsed time: 120.99s)
[21.15%] Error: NearestNeighboursTest for FTP Patator (Elapsed time: 143.89s)
  df = pd.read_csv(_file, encoding_errors='ignore')
Traceback (most recent call last):
  File "/Users/ilcors-dev/src/unibo/unibo-nids-research/./DataBadSmells/src/netstats.py", line 266, in <module>
    main(sys.argv[1:])
  File "/Users/ilcors-dev/

## CIC17 - improved version

In [24]:
cic17_improved_metadata_path = "./DataBadSmells/metadata/cic/metadata_improved.json"
cic17_improved_results_path = "./results/CIC17_improved_third_run/"
cic17_improved_data_folder = "./data/CIC17_improved/"

In [14]:
run_benchmark(cic17_improved_metadata_path, cic17_improved_results_path, 'DoS Hulk', cic17_improved_data_folder, 'CompleteTest')

'Success: CompleteTest for DoS Hulk (Elapsed time: 914.09s)'

In [25]:
run_benchmark(cic17_improved_metadata_path, cic17_improved_results_path, 'DoS slowloris', cic17_improved_data_folder, 'CompleteTest')

'Success: CompleteTest for DoS slowloris (Elapsed time: 221.34s)'

In [27]:
run_all_benchmarks_parallel(cic17_improved_metadata_path, "./results/CIC17_improved_third_run/", cic17_improved_data_folder, cic17_targets, tests)

Python(52455) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
Python(52456) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
Python(52457) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
Python(52458) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
Python(52459) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
Python(52462) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
Python(52464) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
Python(52465) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.


[7.69%] Success: CompleteTest for DoS slowloris (Elapsed time: 594.81s)
[15.38%] Success: CompleteTest for Port Scan (Elapsed time: 594.83s)


Python(55388) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
Python(55389) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.


[23.08%] Success: CompleteTest for FTP Patator (Elapsed time: 617.21s)


Python(55501) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.


[30.77%] Success: CompleteTest for SSH Patator (Elapsed time: 619.57s)


Python(55511) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
Python(57980) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.


[38.46%] Success: CompleteTest for Bot (Elapsed time: 523.28s)
[46.15%] Success: CompleteTest for Web Attack - Sql Injection (Elapsed time: 509.63s)
[53.85%] Success: CompleteTest for Web Attack - Brute Force (Elapsed time: 818.66s)
[61.54%] Success: CompleteTest for DoS Slowhttptest (Elapsed time: 1434.88s)
[69.23%] Success: CompleteTest for DoS GoldenEye (Elapsed time: 1583.38s)
[76.92%] Success: CompleteTest for Infiltration (Elapsed time: 1054.03s)
[84.62%] Success: CompleteTest for Heartbleed (Elapsed time: 604.46s)
[92.31%] Success: CompleteTest for DDoS (Elapsed time: 1873.41s)
[100.00%] Success: CompleteTest for DoS Hulk (Elapsed time: 1922.49s)

All benchmarks completed in 1922.49s.
