### Model evaluation  using state-of-the-art classification metrics such as accuracy, precision, recall, F1-score, and ROC-AUC. 

In [9]:
#Load Libraries
import pandas as pd
import numpy as np
import torch
import joblib
import lightgbm as lgb
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
import os


In [10]:
# File Paths
DATASET_PATH = "D:/FYP/data/datasets/final_dataset_02.csv"
MODEL_PATHS = {
    #"CatBoost": "D:/FYP/models/trained_models/catboost_model.cbm",
    "LightGBM": "D:/FYP/models/trained_models/lightgbm_intrusion_detection.pkl",
    "Stacking": "D:/FYP/models/trained_models/tabnet_meta_model.pkl",
    "XGboost": "D:/FYP/models/trained_models/xgboost_native_model.pkl"
}

# Load the dataset
df = pd.read_csv(DATASET_PATH)
X = df.drop(columns=['label'])  # Features
y = df['label']  # Target

In [11]:
# Function to load models
def load_model(model_name, model_path):
    """Loads a trained model based on its type."""
    if model_name == "LightGBM":
        # Load LightGBM model (saved as .pkl using joblib)
        model = joblib.load(model_path)
    elif model_name == "XGboost":
        # Load XGBoost model (saved as .pkl using joblib)
        model = joblib.load(model_path)
    elif model_name == "Stacking":
        # Load Stacking model (saved as .pkl using joblib)
        model = joblib.load(model_path)
    else:
        raise ValueError(f"Unsupported model type: {model_name}")
    return model

In [12]:
# Function to make predictions
def predict_model(model_name, model, X_test):
    """Runs inference using the loaded model."""
    if model_name == "Stacking":
        # Stacking model (TabNet or other meta-model) predictions
        return model.predict(X_test)
    elif model_name == "LightGBM":
        # LightGBM outputs probabilities, so round them to get binary predictions
        return np.round(model.predict_proba(X_test)[:, 1])  # Use probabilities for class 1
    elif model_name == "XGboost":
        # XGBoost outputs probabilities, so round them to get binary predictions
        return np.round(model.predict_proba(X_test)[:, 1])  # Use probabilities for class 1
    else:
        raise ValueError(f"Unsupported model type: {model_name}")

In [13]:
# Function to evaluate models
def evaluate_model(model_name, y_true, y_pred):
    """Calculates evaluation metrics for a model."""
    metrics = {
        "Accuracy": accuracy_score(y_true, y_pred),
        "Precision": precision_score(y_true, y_pred, average='weighted'),
        "Recall": recall_score(y_true, y_pred, average='weighted'),
        "F1-score": f1_score(y_true, y_pred, average='weighted'),
        "ROC-AUC": roc_auc_score(y_true, y_pred)
    }
    return metrics

In [14]:
# Set the environment variable to silence the warning
os.environ["LOKY_MAX_CPU_COUNT"] = "4"  # Set to the number of cores you want to use

# Evaluate all models
results = {}
for model_name, model_path in MODEL_PATHS.items():
    print(f"\n Evaluating {model_name}...")
    model = load_model(model_name, model_path)  # Load Model
    
    # Ensure X is in the correct format for the model
    if model_name == "Stacking":
        # TabNet expects a numpy array or DataFrame without column names
        X_formatted = X.values if isinstance(X, pd.DataFrame) else X
    else:
        # LightGBM and XGBoost can handle pandas DataFrames directly
        X_formatted = X
    
    # Make predictions
    y_pred = predict_model(model_name, model, X_formatted)  # Make Predictions
    
    # Compute metrics
    results[model_name] = evaluate_model(model_name, y, y_pred)  # Compute Metrics


 Evaluating LightGBM...

 Evaluating Stacking...


RuntimeError: running_mean should contain 11 elements not 2

In [15]:
# Display Results
results_df = pd.DataFrame(results).T  # Convert dictionary to DataFrame
print("\n📊 Model Evaluation Metrics:")
print(results_df)

# Save results to CSV
RESULTS_PATH = "D:/FYP/models/trained_models/evaluation_results.csv"
results_df.to_csv(RESULTS_PATH)
print(f"\n✅ Evaluation results saved to {RESULTS_PATH}")



📊 Model Evaluation Metrics:
          Accuracy  Precision    Recall  F1-score   ROC-AUC
LightGBM  0.999955   0.999955  0.999955  0.999955  0.999955

✅ Evaluation results saved to D:/FYP/models/trained_models/evaluation_results.csv
