In [1]:
import pandas as pd
import numpy as np

In [None]:
# main_evaluation.py
 

from utilis.Data_loader import load_and_align_all_data
# Removed granular_to_broad_map import as it's no longer used directly here

from utilis.constants import DATASET_PATHS, TARGET_ATTACK_LABELS_STR_BROAD,PARAM_GRIDS

from cross_evaluation import run_cross_dataset_evaluation
from intra_evaluation import run_intra_dataset_evaluation


def aggregate_per_class_metrics(results_dict, target_report_labels): # Removed evaluation_type
    """
    Aggregates per-class metrics (Precision, Recall, F1-Score, Total Support)
    across different test days for CROSS-DATASET evaluation, weighted by support.

    Args:
        results_dict (dict): The dictionary containing cross-evaluation results.
        target_report_labels (list): List of attack class names to aggregate.

    Returns:
        dict: A nested dictionary with aggregated metrics:
              {model_name: {attack_class: {'Precision': avg_p, 'Recall': avg_r, 'F1-Score': avg_f1, 'Total Support': sum_s}}}
    """
    aggregated_metrics = {}

    # Assuming results_dict is like {scenario_key: {model_name: {'Per_Day_Metrics': ...}}}
    # Get model names from the first scenario
    first_scenario_key = next(iter(results_dict.keys()))
    model_names = results_dict[first_scenario_key].keys()

    for model_name in model_names:
        aggregated_metrics[model_name] = {}
        for target_label in target_report_labels:
            precisions = []
            recalls = []
            f1_scores = []
            supports = []

            # Iterate through each scenario (train_test) in cross-evaluation results
            for scenario_key in results_dict.keys():
                # Aggregate per-day metrics for cross-evaluation
                for day_name in results_dict[scenario_key][model_name]['Per_Day_Metrics'].keys():
                    specific_metrics = results_dict[scenario_key][model_name]['Per_Day_Metrics'][day_name]['Specific Attack Metrics']
                    if target_label in specific_metrics and specific_metrics[target_label]['Support'] is not None and specific_metrics[target_label]['Support'] > 0:
                        precisions.append(specific_metrics[target_label]['Precision'])
                        recalls.append(specific_metrics[target_label]['Recall'])
                        f1_scores.append(specific_metrics[target_label]['F1-Score'])
                        supports.append(specific_metrics[target_label]['Support'])
            
            total_support = sum(supports)
            if total_support > 0:
                avg_precision = np.average(precisions, weights=supports)
                avg_recall = np.average(recalls, weights=supports)
                avg_f1 = np.average(f1_scores, weights=supports)
            else:
                avg_precision = np.nan
                avg_recall = np.nan
                avg_f1 = np.nan
            
            aggregated_metrics[model_name][target_label] = {
                'Precision': avg_precision,
                'Recall': avg_recall,
                'F1-Score': avg_f1,
                'Total Support': total_support # <-- ADDED SUPPORT HERE
            }
    return aggregated_metrics


def display_intra_per_class_metrics(intra_results_dict, target_report_labels):
    """
    Displays per-class metrics for intra-dataset evaluation, separately for each dataset.

    Args:
        intra_results_dict (dict): The dictionary containing intra-evaluation results.
        target_report_labels (list): List of attack class names to display.
    """
    print("\n--- Aggregated Intra-Dataset Per-Class Metrics (Per Dataset) ---")
    for dataset_name, dataset_results in intra_results_dict.items():
        print(f"\nDataset: {dataset_name}")
        for model_name, model_results in dataset_results.items():
            print(f"\nModel: {model_name}")
            specific_metrics = model_results['Intra-Dataset Test Metrics']['Specific Attack Metrics']
            
            # Prepare data for DataFrame
            data = {}
            for target_label in target_report_labels:
                if target_label in specific_metrics:
                    data[target_label] = {
                        'Precision': specific_metrics[target_label]['Precision'],
                        'Recall': specific_metrics[target_label]['Recall'],
                        'F1-Score': specific_metrics[target_label]['F1-Score'],
                        'Support': specific_metrics[target_label]['Support'] # <-- ADDED SUPPORT HERE
                    }
                else:
                    # If class not found in report (e.g., 0 support), show NaN
                    data[target_label] = {
                        'Precision': np.nan, 'Recall': np.nan, 'F1-Score': np.nan, 'Support': 0
                    }
            
            df_metrics = pd.DataFrame.from_dict(data, orient='index')
            print(df_metrics.round(4)) # Print with 4 decimal places


def main():
    """
    Main function to orchestrate data loading, preprocessing,
    and cross-dataset/intra-dataset evaluation.
    """
    print("--- Starting Main Evaluation Script ---")
     # --- SCENARIO CONFIGURATION ---
    RUN_FOCUSED_SCENARIO = True # Set to True to focus on specific labels, False for full evaluation
    RUN_CROSS_DATASET_EVALUATION = False # <-- NEW FLAG: Set to False to skip cross-dataset eval
    if RUN_FOCUSED_SCENARIO:
        print("\n--- Running Focused Evaluation: Benign, DoS, DDoS, Botnet, Infiltration ---")
        # Define the broad string labels to keep for this focused scenario
        focused_broad_labels_to_keep = ['Benign', 'DoS', 'DDoS', 'Botnet', 'Infiltration']
        
        # Define the specific attack labels to report for this focused scenario
        # This list should only contain the attack classes you want to see in the specific metrics report.
        # 'Benign' is usually handled separately.
        focused_target_report_labels = ['DoS', 'DDoS', 'Botnet', 'Infiltration']

        all_combined_dfs, all_individual_dfs_by_dataset, label_encoder, \
        ALL_ENCODED_LABELS, common_features, broad_label_mapper, broad_label_encoder = \
            load_and_align_all_data(DATASET_PATHS, focus_labels=focused_broad_labels_to_keep)
        
        
        # Use the focused report labels for the evaluation functions
        current_target_report_labels = focused_target_report_labels
    else:
        print("\n--- Running Full Multi-Class Evaluation ---")
        all_combined_dfs, all_individual_dfs_by_dataset, label_encoder, \
        ALL_ENCODED_LABELS, common_features, broad_label_mapper, broad_label_encoder = \
            load_and_align_all_data(DATASET_PATHS)
        
        # Use the default target report labels from constants
        current_target_report_labels = TARGET_ATTACK_LABELS_STR_BROAD
    # --- END SCENARIO CONFIGURATION ---

    # --- PLACE THIS DEBUG PRINT HERE ---
    print("\n--- DEBUG (main_evaluation): State of broad_label_encoder after data loading ---")
    print(f"DEBUG (main_evaluation): Type of broad_label_encoder: {type(broad_label_encoder)}")
    if hasattr(broad_label_encoder, 'classes_'):
        print(f"DEBUG (main_evaluation): broad_label_encoder.classes_: {broad_label_encoder.classes_.tolist()}")
        print(f"DEBUG (main_evaluation): broad_label_encoder.classes_ length: {len(broad_label_encoder.classes_)}")
    else:
        print(f"DEBUG (main_evaluation): broad_label_encoder has no 'classes_' attribute.")
    print("-----------------------------------------------------------------------------------\n")
    # --- END DEBUG PRINT ---

     
     
    
    # --- ADD THESE DEBUG PRINTS ---
    print("\n--- DEBUG: Types of objects returned from load_and_align_all_data ---")
    print(f"Type of label_encoder: {type(label_encoder)}")
    print(f"Type of ALL_ENCODED_LABELS: {type(ALL_ENCODED_LABELS)}")
    print(f"Type of broad_label_mapper: {type(broad_label_mapper)}")
    print(f"Type of broad_label_encoder: {type(broad_label_encoder)}")
    print("-------------------------------------------------------------------\n")
    # --- END DEBUG PRINTS ---
    
    # --- Harmonized Label Distribution Check ---
    # This check is performed on the data after granular harmonization and encoding.
    # The labels are decoded back to strings for display purposes.
    print("\n--- Harmonized Label Distribution Check ---")
    
    for dataset_name, df in all_combined_dfs.items():
        print(f"\nLabel distribution for {dataset_name}:")
        
        # Granular Label Distribution (decoded for readability)
        granular_counts_encoded = df['Label'].value_counts()
        granular_labels_decoded = label_encoder.inverse_transform(granular_counts_encoded.index)
        print("Granular Labels (decoded):")
        for i, count in enumerate(granular_counts_encoded):
            print(f"  {granular_labels_decoded[i]}: {count}")

        # Broad Label Distribution (decoded for readability)
        # 'BroadLabel' column is now correctly created and encoded in load_and_align_all_data
        broad_counts_encoded = df['BroadLabel'].value_counts()
        broad_labels_decoded = broad_label_encoder.inverse_transform(broad_counts_encoded.index)

        print("\nBroad Labels (decoded):")

        for i, count in enumerate(broad_counts_encoded):
            print(f"  {broad_labels_decoded[i]}: {count}")

        # Evaluate broad label coverage against TARGET_ATTACK_LABELS_STR_BROAD
        broad_labels_in_df_decoded = set(broad_labels_decoded)
        
        # Ensure TARGET_ATTACK_LABELS_STR_BROAD includes 'Benign' and 'Other Attack' for comparison
        expected_labels_set = set(current_target_report_labels)
        if 'Benign' not in expected_labels_set:
            expected_labels_set.add('Benign')
        if 'Other Attack' not in expected_labels_set:
            expected_labels_set.add('Other Attack')

        unaccounted_labels = broad_labels_in_df_decoded - expected_labels_set
        if unaccounted_labels:
            print(f"  WARNING: Broad labels in {dataset_name} not covered by TARGET_ATTACK_LABELS_STR_BROAD + 'Benign'/'Other Attack': {unaccounted_labels}")

        missing_labels = expected_labels_set - broad_labels_in_df_decoded
        if missing_labels:
            print(f"  INFO: TARGET_ATTACK_LABELS_STR_BROAD includes broad labels not found in {dataset_name}: {missing_labels}")
    print("-------------------------------------------\n")


    # Run Cross-Dataset Evaluation
    cross_results = {} # Initialize to empty dict
    if RUN_CROSS_DATASET_EVALUATION and len(all_combined_dfs) >= 2:
        cross_results = run_cross_dataset_evaluation(
            all_combined_dfs,
            all_individual_dfs_by_dataset,
            label_encoder,          # Granular LabelEncoder
            ALL_ENCODED_LABELS,     # All encoded granular labels
            common_features,
            broad_label_mapper,     # Function to map granular encoded to broad encoded
            broad_label_encoder,    # LabelEncoder for broad labels
            current_target_report_labels # LabelEncoder for broad labels
        )
        print("\n--- Cross-Dataset Evaluation Complete ---")
        print("\n--- Aggregated Cross-Dataset Per-Class Metrics (Weighted Average) ---")
        aggregated_cross_metrics = aggregate_per_class_metrics(cross_results, current_target_report_labels) # No evaluation_type needed
        for model, metrics in aggregated_cross_metrics.items():
            print(f"\nModel: {model}")
            df_metrics = pd.DataFrame.from_dict(metrics, orient='index')
            print(df_metrics.round(4))
    elif not RUN_CROSS_DATASET_EVALUATION:
        print("Skipping Cross-Dataset Evaluation: Explicitly disabled by configuration.")
    else:
        print("Skipping Cross-Dataset Evaluation: Less than two datasets loaded for cross-evaluation.")


    # Run Intra-Dataset Evaluation
    # Corrected: Pass all required arguments including broad_label_mapper and broad_label_encoder
    intra_results = run_intra_dataset_evaluation(
        all_combined_dfs,
        label_encoder,
        ALL_ENCODED_LABELS,
        common_features,
        broad_label_mapper,     # Function to map granular encoded to broad encoded
        broad_label_encoder,    # LabelEncoder for broad labels
        current_target_report_labels # <-- Pass the current target labels

    )
    print("\n--- Intra-Dataset Evaluation Complete ---")
     
    display_intra_per_class_metrics(intra_results, current_target_report_labels)

if __name__ == "__main__":
    main()

--- Starting Main Evaluation Script ---

--- Running Focused Evaluation: Benign, DoS, DDoS, Botnet, Infiltration ---

--- Loading and Preprocessing CIC_IDS_2018 ---
DEBUG: Raw labels in DDoS1-Tuesday-20-02-2018_TrafficForML_CICFlowMeter of CIC_IDS_2018 before harmonization: ['Benign', 'DDoS attacks-LOIC-HTTP']
Harmonized granular labels: {'Benign', 'DDoS'}
DEBUG: Raw labels in Web1-Thursday-22-02-2018_TrafficForML_CICFlowMeter of CIC_IDS_2018 before harmonization: ['Benign', 'Brute Force -Web', 'Brute Force -XSS', 'SQL Injection']
Harmonized granular labels: {'Web Attack - XSS', 'Web Attack - SQL Injection', 'Web Attack - Brute Force', 'Benign'}
DEBUG: Raw labels in Botnet-Friday-02-03-2018_TrafficForML_CICFlowMeter of CIC_IDS_2018 before harmonization: ['Benign', 'Bot']
Harmonized granular labels: {'Botnet', 'Benign'}
DEBUG: Raw labels in DDoS2-Wednesday-21-02-2018_TrafficForML_CICFlowMeter of CIC_IDS_2018 before harmonization: ['Benign', 'DDOS attack-LOIC-UDP', 'DDOS attack-HOIC']
Ha



Hyperparameter optimization for Logistic Regression completed in 367.04 seconds.
Best parameters for Logistic Regression: {'C': 0.1, 'max_iter': 5000, 'penalty': 'l2', 'solver': 'saga', 'tol': 0.01}
Best cross-validation score (f1_weighted) for Logistic Regression: 0.8994

--- Hyperparameter Optimization for Random Forest (Intra-Dataset: CIC_IDS_2018) ---
Fitting 5 folds for each of 8 candidates, totalling 40 fits
[CV] END max_features=sqrt, min_samples_leaf=1, min_samples_split=2, n_estimators=50; total time= 4.1min
[CV] END max_features=sqrt, min_samples_leaf=1, min_samples_split=2, n_estimators=50; total time= 4.2min
[CV] END max_features=sqrt, min_samples_leaf=1, min_samples_split=5, n_estimators=50; total time= 4.3min
[CV] END max_features=sqrt, min_samples_leaf=1, min_samples_split=2, n_estimators=50; total time= 4.3min
[CV] END max_features=sqrt, min_samples_leaf=1, min_samples_split=2, n_estimators=50; total time= 4.3min
[CV] END max_features=sqrt, min_samples_leaf=1, min_sampl



Hyperparameter optimization for Random Forest completed in 1349.73 seconds.
Best parameters for Random Forest: {'max_features': 'sqrt', 'min_samples_leaf': 2, 'min_samples_split': 5, 'n_estimators': 100}
Best cross-validation score (f1_weighted) for Random Forest: 0.9737

--- Hyperparameter Optimization for XGBoost (Intra-Dataset: CIC_IDS_2018) ---
Fitting 5 folds for each of 16 candidates, totalling 80 fits


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.


[CV] END colsample_bytree=0.8, learning_rate=0.1, max_depth=3, n_estimators=50, subsample=1.0; total time= 1.1min
[CV] END colsample_bytree=0.8, learning_rate=0.1, max_depth=3, n_estimators=50, subsample=1.0; total time= 1.1min
[CV] END colsample_bytree=0.8, learning_rate=0.1, max_depth=3, n_estimators=50, subsample=0.8; total time= 1.1min
[CV] END colsample_bytree=0.8, learning_rate=0.1, max_depth=3, n_estimators=50, subsample=0.8; total time= 1.1min
[CV] END colsample_bytree=0.8, learning_rate=0.1, max_depth=3, n_estimators=50, subsample=0.8; total time= 1.1min
[CV] END colsample_bytree=0.8, learning_rate=0.1, max_depth=3, n_estimators=50, subsample=0.8; total time= 1.1min
[CV] END colsample_bytree=0.8, learning_rate=0.1, max_depth=3, n_estimators=50, subsample=0.8; total time= 1.1min
[CV] END colsample_bytree=0.8, learning_rate=0.1, max_depth=3, n_estimators=50, subsample=1.0; total time= 1.1min
[CV] END colsample_bytree=0.8, learning_rate=0.1, max_depth=3, n_estimators=50, subsampl

Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


[CV] END colsample_bytree=0.8, learning_rate=0.1, max_depth=3, n_estimators=100, subsample=0.8; total time= 2.0min
[CV] END colsample_bytree=0.8, learning_rate=0.1, max_depth=3, n_estimators=100, subsample=0.8; total time= 2.1min


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


[CV] END colsample_bytree=0.8, learning_rate=0.1, max_depth=5, n_estimators=50, subsample=0.8; total time= 1.3min
[CV] END colsample_bytree=0.8, learning_rate=0.1, max_depth=5, n_estimators=50, subsample=0.8; total time= 1.3min


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


[CV] END colsample_bytree=0.8, learning_rate=0.1, max_depth=3, n_estimators=100, subsample=1.0; total time= 2.0min
[CV] END colsample_bytree=0.8, learning_rate=0.1, max_depth=3, n_estimators=100, subsample=1.0; total time= 2.0min
[CV] END colsample_bytree=0.8, learning_rate=0.1, max_depth=3, n_estimators=100, subsample=1.0; total time= 2.0min
[CV] END colsample_bytree=0.8, learning_rate=0.1, max_depth=3, n_estimators=100, subsample=1.0; total time= 2.0min
[CV] END colsample_bytree=0.8, learning_rate=0.1, max_depth=3, n_estimators=100, subsample=0.8; total time= 2.1min
[CV] END colsample_bytree=0.8, learning_rate=0.1, max_depth=3, n_estimators=100, subsample=0.8; total time= 2.1min
[CV] END colsample_bytree=0.8, learning_rate=0.1, max_depth=3, n_estimators=100, subsample=0.8; total time= 2.1min
[CV] END colsample_bytree=0.8, learning_rate=0.1, max_depth=5, n_estimators=50, subsample=0.8; total time= 1.3min
[CV] END colsample_bytree=0.8, learning_rate=0.1, max_depth=3, n_estimators=100, 

Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


[CV] END colsample_bytree=0.8, learning_rate=0.1, max_depth=5, n_estimators=50, subsample=0.8; total time= 1.3min


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


[CV] END colsample_bytree=0.8, learning_rate=0.1, max_depth=5, n_estimators=50, subsample=0.8; total time= 1.2min
[CV] END colsample_bytree=0.8, learning_rate=0.1, max_depth=5, n_estimators=50, subsample=1.0; total time= 1.2min


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


[CV] END colsample_bytree=0.8, learning_rate=0.1, max_depth=5, n_estimators=50, subsample=1.0; total time= 1.2min
[CV] END colsample_bytree=0.8, learning_rate=0.1, max_depth=5, n_estimators=50, subsample=1.0; total time= 1.2min
[CV] END colsample_bytree=0.8, learning_rate=0.1, max_depth=5, n_estimators=50, subsample=1.0; total time= 1.2min
[CV] END colsample_bytree=0.8, learning_rate=0.1, max_depth=5, n_estimators=50, subsample=1.0; total time= 1.2min


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


[CV] END colsample_bytree=0.8, learning_rate=0.1, max_depth=5, n_estimators=100, subsample=0.8; total time= 2.2min
[CV] END colsample_bytree=0.8, learning_rate=0.1, max_depth=5, n_estimators=100, subsample=0.8; total time= 2.3min
[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=3, n_estimators=50, subsample=0.8; total time= 1.1min
[CV] END colsample_bytree=0.8, learning_rate=0.1, max_depth=5, n_estimators=100, subsample=0.8; total time= 2.2min
[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=3, n_estimators=50, subsample=0.8; total time= 1.1min
[CV] END colsample_bytree=0.8, learning_rate=0.1, max_depth=5, n_estimators=100, subsample=0.8; total time= 2.2min
[CV] END colsample_bytree=0.8, learning_rate=0.1, max_depth=5, n_estimators=100, subsample=0.8; total time= 2.2min


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


[CV] END colsample_bytree=0.8, learning_rate=0.1, max_depth=5, n_estimators=100, subsample=1.0; total time= 2.2min


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


[CV] END colsample_bytree=0.8, learning_rate=0.1, max_depth=5, n_estimators=100, subsample=1.0; total time= 2.2min
[CV] END colsample_bytree=0.8, learning_rate=0.1, max_depth=5, n_estimators=100, subsample=1.0; total time= 2.3min


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=3, n_estimators=50, subsample=0.8; total time= 1.1min
[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=3, n_estimators=50, subsample=1.0; total time= 1.0min
[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=3, n_estimators=50, subsample=1.0; total time= 1.0min
[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=3, n_estimators=50, subsample=0.8; total time= 1.1min
[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=3, n_estimators=50, subsample=0.8; total time= 1.1min
[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=3, n_estimators=50, subsample=1.0; total time= 1.0min
[CV] END colsample_bytree=0.8, learning_rate=0.1, max_depth=5, n_estimators=100, subsample=1.0; total time= 2.2min
[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=3, n_estimators=50, subsample=1.0; total time= 1.0min
[CV] END colsample_bytree=0.8, learning_rate=0.1, max_depth=5, n_estimators=100, subsam

Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=5, n_estimators=50, subsample=0.8; total time= 1.1min
[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=5, n_estimators=50, subsample=0.8; total time= 1.1min
[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=3, n_estimators=100, subsample=0.8; total time= 1.9min
[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=3, n_estimators=100, subsample=0.8; total time= 1.9min


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=3, n_estimators=100, subsample=1.0; total time= 1.9min
[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=3, n_estimators=100, subsample=1.0; total time= 1.9min
[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=3, n_estimators=100, subsample=0.8; total time= 1.9min
[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=3, n_estimators=100, subsample=0.8; total time= 1.9min
[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=3, n_estimators=100, subsample=0.8; total time= 1.9min
[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=3, n_estimators=100, subsample=1.0; total time= 1.9min
[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=3, n_estimators=100, subsample=1.0; total time= 1.9min
[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=3, n_estimators=100, subsample=1.0; total time= 1.9min


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=5, n_estimators=50, subsample=0.8; total time= 1.2min
[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=5, n_estimators=50, subsample=0.8; total time= 1.2min
[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=5, n_estimators=50, subsample=0.8; total time= 1.2min
[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=5, n_estimators=50, subsample=1.0; total time= 1.2min


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=5, n_estimators=50, subsample=1.0; total time= 1.2min
[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=5, n_estimators=50, subsample=1.0; total time= 1.2min
[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=5, n_estimators=50, subsample=1.0; total time= 1.2min
[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=5, n_estimators=50, subsample=1.0; total time= 1.2min


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=5, n_estimators=100, subsample=0.8; total time= 2.1min
[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=5, n_estimators=100, subsample=0.8; total time= 2.1min
[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=5, n_estimators=100, subsample=0.8; total time= 2.1min
[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=5, n_estimators=100, subsample=0.8; total time= 2.1min
[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=5, n_estimators=100, subsample=1.0; total time= 1.8min
[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=5, n_estimators=100, subsample=0.8; total time= 1.8min
[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=5, n_estimators=100, subsample=1.0; total time= 1.7min
[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=5, n_estimators=100, subsample=1.0; total time= 1.7min
[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=5, n_estimators=100,

Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Hyperparameter optimization for XGBoost completed in 685.97 seconds.
Best parameters for XGBoost: {'colsample_bytree': 0.8, 'learning_rate': 0.1, 'max_depth': 5, 'n_estimators': 100, 'subsample': 0.8}
Best cross-validation score (f1_weighted) for XGBoost: 0.9737
  Mean CV balanced_accuracy: 0.8037

--- Evaluation of Optimized XGBoost on Intra-Dataset Test Set (CIC_IDS_2018) ---

--- DEBUG: calculate_specificity called ---
DEBUG: Type of broad_label_encoder: <class 'sklearn.preprocessing._label.LabelEncoder'>
DEBUG: broad_label_encoder.classes_ (all): ['Benign', 'Botnet', 'DDoS', 'DoS', 'Infiltration']
DEBUG: broad_label_encoder.classes_ (length): 5
DEBUG: y_true unique values: [0, 1, 2, 3, 4]
DEBUG: y_pred unique values: [0, 1, 2, 3, 4]
Accuracy: 0.9819
Precision (weighted): 0.9756
Recall (weighted): 0.9819
F1-Score (weighted): 0.9736
Balanced Accuracy: 0.8029
Specificity (avg): 0.9810
ROC AUC (weighted): 0.9875
  Mean CV precision_weighted: 0.9756

--- Evaluation of Optimized XGBoost 



Hyperparameter optimization for Logistic Regression completed in 265.82 seconds.
Best parameters for Logistic Regression: {'C': 1, 'max_iter': 5000, 'penalty': 'l2', 'solver': 'saga', 'tol': 0.01}
Best cross-validation score (f1_weighted) for Logistic Regression: 0.9471

--- Hyperparameter Optimization for Random Forest (Intra-Dataset: CIC_IDS_2017) ---
Fitting 5 folds for each of 8 candidates, totalling 40 fits
[CV] END max_features=sqrt, min_samples_leaf=1, min_samples_split=2, n_estimators=50; total time= 3.6min
[CV] END max_features=sqrt, min_samples_leaf=1, min_samples_split=2, n_estimators=50; total time= 3.6min
[CV] END max_features=sqrt, min_samples_leaf=1, min_samples_split=2, n_estimators=50; total time= 3.7min
[CV] END max_features=sqrt, min_samples_leaf=1, min_samples_split=2, n_estimators=50; total time= 3.9min
[CV] END max_features=sqrt, min_samples_leaf=1, min_samples_split=2, n_estimators=50; total time= 3.9min
[CV] END max_features=sqrt, min_samples_leaf=1, min_samples



Hyperparameter optimization for Random Forest completed in 1192.98 seconds.
Best parameters for Random Forest: {'max_features': 'sqrt', 'min_samples_leaf': 1, 'min_samples_split': 5, 'n_estimators': 100}
Best cross-validation score (f1_weighted) for Random Forest: 0.9987

--- Hyperparameter Optimization for XGBoost (Intra-Dataset: CIC_IDS_2017) ---
Fitting 5 folds for each of 16 candidates, totalling 80 fits


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.


[CV] END colsample_bytree=0.8, learning_rate=0.1, max_depth=3, n_estimators=50, subsample=1.0; total time=  53.6s
[CV] END colsample_bytree=0.8, learning_rate=0.1, max_depth=3, n_estimators=50, subsample=1.0; total time=  54.0s
[CV] END colsample_bytree=0.8, learning_rate=0.1, max_depth=3, n_estimators=50, subsample=1.0; total time=  54.1s
[CV] END colsample_bytree=0.8, learning_rate=0.1, max_depth=3, n_estimators=50, subsample=1.0; total time=  54.4s
[CV] END colsample_bytree=0.8, learning_rate=0.1, max_depth=3, n_estimators=50, subsample=1.0; total time=  54.5s
[CV] END colsample_bytree=0.8, learning_rate=0.1, max_depth=3, n_estimators=50, subsample=0.8; total time=  54.9s
[CV] END colsample_bytree=0.8, learning_rate=0.1, max_depth=3, n_estimators=50, subsample=0.8; total time=  55.0s
[CV] END colsample_bytree=0.8, learning_rate=0.1, max_depth=3, n_estimators=50, subsample=0.8; total time=  55.3s
[CV] END colsample_bytree=0.8, learning_rate=0.1, max_depth=3, n_estimators=50, subsampl

Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


[CV] END colsample_bytree=0.8, learning_rate=0.1, max_depth=3, n_estimators=100, subsample=0.8; total time= 1.8min
[CV] END colsample_bytree=0.8, learning_rate=0.1, max_depth=3, n_estimators=100, subsample=0.8; total time= 1.8min


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


[CV] END colsample_bytree=0.8, learning_rate=0.1, max_depth=5, n_estimators=50, subsample=0.8; total time= 1.0min
[CV] END colsample_bytree=0.8, learning_rate=0.1, max_depth=5, n_estimators=50, subsample=0.8; total time= 1.0min


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


[CV] END colsample_bytree=0.8, learning_rate=0.1, max_depth=3, n_estimators=100, subsample=1.0; total time= 1.8min
[CV] END colsample_bytree=0.8, learning_rate=0.1, max_depth=3, n_estimators=100, subsample=1.0; total time= 1.8min
[CV] END colsample_bytree=0.8, learning_rate=0.1, max_depth=3, n_estimators=100, subsample=1.0; total time= 1.8min
[CV] END colsample_bytree=0.8, learning_rate=0.1, max_depth=3, n_estimators=100, subsample=0.8; total time= 1.8min
[CV] END colsample_bytree=0.8, learning_rate=0.1, max_depth=3, n_estimators=100, subsample=1.0; total time= 1.8min
[CV] END colsample_bytree=0.8, learning_rate=0.1, max_depth=3, n_estimators=100, subsample=0.8; total time= 1.8min
[CV] END colsample_bytree=0.8, learning_rate=0.1, max_depth=3, n_estimators=100, subsample=1.0; total time= 1.8min
[CV] END colsample_bytree=0.8, learning_rate=0.1, max_depth=5, n_estimators=50, subsample=0.8; total time=  59.2s
[CV] END colsample_bytree=0.8, learning_rate=0.1, max_depth=5, n_estimators=50, s

Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


[CV] END colsample_bytree=0.8, learning_rate=0.1, max_depth=5, n_estimators=50, subsample=0.8; total time=  57.5s
[CV] END colsample_bytree=0.8, learning_rate=0.1, max_depth=5, n_estimators=50, subsample=1.0; total time=  57.1s


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


[CV] END colsample_bytree=0.8, learning_rate=0.1, max_depth=5, n_estimators=50, subsample=1.0; total time=  59.6s
[CV] END colsample_bytree=0.8, learning_rate=0.1, max_depth=5, n_estimators=50, subsample=1.0; total time=  59.7s
[CV] END colsample_bytree=0.8, learning_rate=0.1, max_depth=5, n_estimators=50, subsample=1.0; total time=  59.8s
[CV] END colsample_bytree=0.8, learning_rate=0.1, max_depth=5, n_estimators=50, subsample=1.0; total time=  60.0s


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=3, n_estimators=50, subsample=0.8; total time= 1.0min
[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=3, n_estimators=50, subsample=0.8; total time= 1.0min
[CV] END colsample_bytree=0.8, learning_rate=0.1, max_depth=5, n_estimators=100, subsample=0.8; total time= 2.1min
[CV] END colsample_bytree=0.8, learning_rate=0.1, max_depth=5, n_estimators=100, subsample=1.0; total time= 2.0min
[CV] END colsample_bytree=0.8, learning_rate=0.1, max_depth=5, n_estimators=100, subsample=0.8; total time= 2.1min
[CV] END colsample_bytree=0.8, learning_rate=0.1, max_depth=5, n_estimators=100, subsample=0.8; total time= 2.1min
[CV] END colsample_bytree=0.8, learning_rate=0.1, max_depth=5, n_estimators=100, subsample=0.8; total time= 2.1min
[CV] END colsample_bytree=0.8, learning_rate=0.1, max_depth=5, n_estimators=100, subsample=0.8; total time= 2.1min


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


[CV] END colsample_bytree=0.8, learning_rate=0.1, max_depth=5, n_estimators=100, subsample=1.0; total time= 2.1min
[CV] END colsample_bytree=0.8, learning_rate=0.1, max_depth=5, n_estimators=100, subsample=1.0; total time= 2.1min


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=3, n_estimators=50, subsample=0.8; total time=  59.6s
[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=3, n_estimators=50, subsample=0.8; total time=  59.2s
[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=3, n_estimators=50, subsample=1.0; total time=  58.8s
[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=3, n_estimators=50, subsample=0.8; total time=  59.4s
[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=3, n_estimators=50, subsample=1.0; total time=  58.6s
[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=3, n_estimators=50, subsample=1.0; total time=  57.4s
[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=3, n_estimators=50, subsample=1.0; total time=  57.8s
[CV] END colsample_bytree=0.8, learning_rate=0.1, max_depth=5, n_estimators=100, subsample=1.0; total time= 2.1min
[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=3, n_estimators=50, subsamp

Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=5, n_estimators=50, subsample=0.8; total time= 1.0min
[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=5, n_estimators=50, subsample=0.8; total time= 1.1min
[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=3, n_estimators=100, subsample=0.8; total time= 1.8min
[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=3, n_estimators=100, subsample=0.8; total time= 1.8min


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=3, n_estimators=100, subsample=0.8; total time= 1.9min
[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=3, n_estimators=100, subsample=0.8; total time= 1.9min
[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=3, n_estimators=100, subsample=1.0; total time= 1.9min
[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=3, n_estimators=100, subsample=1.0; total time= 1.9min
[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=3, n_estimators=100, subsample=1.0; total time= 1.9min
[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=3, n_estimators=100, subsample=1.0; total time= 1.9min
[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=3, n_estimators=100, subsample=1.0; total time= 1.9min
[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=3, n_estimators=100, subsample=0.8; total time= 1.9min


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=5, n_estimators=50, subsample=0.8; total time= 1.1min
[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=5, n_estimators=50, subsample=0.8; total time= 1.1min
[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=5, n_estimators=50, subsample=1.0; total time= 1.1min
[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=5, n_estimators=50, subsample=0.8; total time= 1.1min


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=5, n_estimators=50, subsample=1.0; total time= 1.0min
[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=5, n_estimators=50, subsample=1.0; total time= 1.0min
[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=5, n_estimators=50, subsample=1.0; total time= 1.0min
[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=5, n_estimators=50, subsample=1.0; total time= 1.0min


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=5, n_estimators=100, subsample=0.8; total time= 1.8min
[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=5, n_estimators=100, subsample=0.8; total time= 1.8min
[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=5, n_estimators=100, subsample=0.8; total time= 1.9min
[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=5, n_estimators=100, subsample=0.8; total time= 1.9min
[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=5, n_estimators=100, subsample=1.0; total time= 1.7min
[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=5, n_estimators=100, subsample=1.0; total time= 1.7min
[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=5, n_estimators=100, subsample=0.8; total time= 1.7min
[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=5, n_estimators=100, subsample=1.0; total time= 1.7min
[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=5, n_estimators=100,

Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Hyperparameter optimization for XGBoost completed in 618.27 seconds.
Best parameters for XGBoost: {'colsample_bytree': 1.0, 'learning_rate': 0.1, 'max_depth': 5, 'n_estimators': 100, 'subsample': 0.8}
Best cross-validation score (f1_weighted) for XGBoost: 0.9990
  Mean CV balanced_accuracy: 0.7096

--- Evaluation of Optimized XGBoost on Intra-Dataset Test Set (CIC_IDS_2017) ---

--- DEBUG: calculate_specificity called ---
DEBUG: Type of broad_label_encoder: <class 'sklearn.preprocessing._label.LabelEncoder'>
DEBUG: broad_label_encoder.classes_ (all): ['Benign', 'Botnet', 'DDoS', 'DoS', 'Infiltration']
DEBUG: broad_label_encoder.classes_ (length): 5
DEBUG: y_true unique values: [0, 1, 2, 3, 4]
DEBUG: y_pred unique values: [0, 1, 2, 3]
Accuracy: 0.9991
Precision (weighted): 0.9991
Recall (weighted): 0.9991
F1-Score (weighted): 0.9991
Balanced Accuracy: 0.7219
Specificity (avg): 0.9992
ROC AUC (weighted): 1.0000
  Mean CV precision_weighted: 0.9990

--- Evaluation of Optimized XGBoost on 