# Benchmark and Compare CIC17 & CIC18 base & improved datasets

In [24]:
import subprocess
import os
from concurrent.futures import ThreadPoolExecutor, as_completed
import threading

## Preparation

Let's prepare a function that helps us running the tests for a given (dataset, target, test) tuple

In [25]:
# lock for thread-safe progress updates
progress_lock = threading.Lock()

def run_benchmark(metadata_path: str, results_path: str, target: str, data_folder: str, test_name: str) -> str:
    """
    Run a single benchmark test from BadSmells repository.
    
    Args:
        metadata_path (str): Path to the metadata JSON file.
        results_path (str): Path to save the benchmark results.
        target (str): The target attack class (e.g., "DoS attacks-Hulk").
        data_folder (str): Path to the dataset folder (e.g., "data/CIC18/").
        test_name (str): The specific test to run (e.g., "CosineTest").
        
    Returns:
        str: Result message indicating success or failure.
    """
    command = [
        "python3",
        "./DataBadSmells/src/netstats.py",
        "--metadata", metadata_path,
        "--results", results_path,
        "--target", target,
        "--folder",
        "--csv", data_folder,
        "--test", test_name
    ]
    
    try:
        result = subprocess.run(command, capture_output=True, text=True)
        if result.returncode == 0:
            return f"Success: {test_name} for {target}"
        else:
            return f"Error: {test_name} for {target}\n{result.stderr}"
    
    except Exception as e:
        return f"Exception occurred: {e} for {test_name} for {target}"


def run_all_benchmarks_parallel(metadata_path: str, results_path: str, data_folder: str, targets: list[str], tests: list[str], max_workers: int = 8):
    """
    Run benchmarks for all targets and tests in parallel with a progress indicator.
    
    Args:
        metadata_path (str): Path to the metadata JSON file.
        results_path (str): Path to save results.
        data_folder (str): Path to the dataset folder.
        targets (list): List of target attack classes.
        tests (list): List of tests to run for each target.
        max_workers (int): Maximum number of parallel workers.
    """
    total_tasks = len(targets) * len(tests)
    completed_tasks = 0
    
    with ThreadPoolExecutor(max_workers=max_workers) as executor:
        future_to_task = {
            executor.submit(run_benchmark, metadata_path, results_path, target, data_folder, test): (target, test)
            for target in targets
            for test in tests
        }
        
        for future in as_completed(future_to_task):
            result_message = future.result()
            target, test = future_to_task[future]
            
            # update progress
            with progress_lock:
                completed_tasks += 1
                percentage = (completed_tasks / total_tasks) * 100
                print(f"[{percentage:.2f}%] {result_message}")

In [29]:
cic17_targets = [
    "DoS Hulk",
    "DDoS",
    "DoS GoldenEye",
    "FTP-Patator",
    "SSH-Patator",
    "PortScan",
    "DoS slowloris",
    "DoS Slowhttptest",
    "Bot",
    "Web Attack - Brute Force",
    "Infiltration",
    "Web Attack - Sql Injection",
    "Heartbleed"
]

## CIC17 - base version

In [28]:
metadata_path = "./DataBadSmells/metadata/cic/metadata.json"
results_path = "./results/CIC17/"
data_folder = "/Volumes/EXTERNAL_US/nids_data/data/CIC17/"

tests = [
    "CosineTest",
    "PortTest",
    "SingleFeatureEfficacyTest",
    "NearestNeighboursTest"
]

run_all_benchmarks_parallel(metadata_path, results_path, data_folder, cic17_targets, tests)

[25.00%] Success: PortTest for DoS Hulk
[50.00%] Success: CosineTest for DoS Hulk
[75.00%] Success: NearestNeighboursTest for DoS Hulk
[100.00%] Success: SingleFeatureEfficacyTest for DoS Hulk


In [None]:
## CIC17 - improved version