In [1]:
# Now this code compares the predected values with the ture values and generates some analytics
import pandas as pd
import numpy as np
from sklearn.metrics import (
    classification_report,
    confusion_matrix,
    accuracy_score,
    matthews_corrcoef,
    balanced_accuracy_score
)
from sklearn.preprocessing import LabelEncoder
import matplotlib.pyplot as plt
import seaborn as sns

# --- Configuration ---
PREDICTIONS_FILE = 'unseen_predictions.csv'

# --- 1. Load Data and Extract Columns ---
print(f"1. Loading data from {PREDICTIONS_FILE} and extracting the last two columns...")
try:
    df = pd.read_csv(PREDICTIONS_FILE)
    df.columns = df.columns.str.strip()
except FileNotFoundError:
    print(f"ERROR: File {PREDICTIONS_FILE} not found.")
    exit()

y_pred_labels = df.iloc[:, -2].astype(str).str.strip()
y_true_labels = df.iloc[:, -1].astype(str).str.strip()

# --- 2. Encode Labels for Scikit-learn ---
print("2. Encoding labels...")
le = LabelEncoder()
all_labels = pd.concat([y_true_labels, y_pred_labels]).unique()
le.fit(all_labels)

y_true_encoded = le.transform(y_true_labels)
y_pred_encoded = le.transform(y_pred_labels)
target_names = le.classes_
print(f"   Labels encoded: {dict(zip(le.classes_, le.transform(le.classes_)))}")

# --- 3. Performance Metrics Calculation and Analysis ---
print("\n3. Analyzing Model Performance and Metrics...")

# 3a. Overall Accuracy & Balanced Accuracy
accuracy = accuracy_score(y_true_encoded, y_pred_encoded)
balanced_acc = balanced_accuracy_score(y_true_encoded, y_pred_encoded)  # NEW

print(f"\n--- Key Summary Metrics ---")
print(f"Model Accuracy (Overall): {accuracy * 100:.4f}%")
print(f"Balanced Accuracy: {balanced_acc * 100:.4f}%")  # Better for imbalance

# 3b. Matthews Correlation Coefficient (MCC) - NEW
# MCC is generally regarded as a robust measure, suitable for imbalanced data.
# Range is [-1, 1], where 1 is perfect prediction, 0 is random, and -1 is total disagreement.
mcc = matthews_corrcoef(y_true_encoded, y_pred_encoded)
print(f"Matthews Correlation Coefficient (MCC): {mcc:.4f}")

# 3c. Classification Report
print("\n--- Classification Report (Detailed Metrics) ---")
print(classification_report(y_true_encoded, y_pred_encoded, target_names=target_names))

# 3d. Confusion Matrix (Visual Error Breakdown)
cm = confusion_matrix(y_true_encoded, y_pred_encoded)
print("\n--- Confusion Matrix (Raw Numbers) ---")
print(cm)

plt.figure(figsize=(7, 6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=target_names, yticklabels=target_names)
plt.title('Confusion Matrix: True vs. Predicted')
plt.ylabel('True Label')
plt.xlabel('Predicted Label')
plt.savefig('final_performance_confusion_matrix.png')
plt.close()
print("   Confusion matrix plot saved as final_performance_confusion_matrix.png")

# --- 4. NEW: Misclassification Analysis ---
# Identifying the specific errors is essential for model tuning and security context.
print("\n4. Error Analysis (Misclassifications) ---")

analysis_df = pd.DataFrame({
    'True_Label': y_true_labels.values,
    'Predicted_Label': y_pred_labels.values
})
misclassified_df = analysis_df[analysis_df['True_Label'] != analysis_df['Predicted_Label']]
error_count = len(misclassified_df)

if error_count > 0:
    print(f"Total Misclassifications: {error_count}")

    # Error Type 1: False Positives (Benign wrongly flagged as Bot - Alert Fatigue)
    false_positives = misclassified_df[misclassified_df['Predicted_Label'] == 'Bot']
    print(f"   - False Positives (Benign -> Bot): {len(false_positives)}")

    # Error Type 2: False Negatives (Bot wrongly missed as Benign - Security Risk!)
    false_negatives = misclassified_df[misclassified_df['Predicted_Label'] == 'Benign']
    print(f"   - False Negatives (Bot -> Benign): {len(false_negatives)}")

    print("\nMisclassification Summary (True Label -> Predicted Label):")
    error_summary = misclassified_df.groupby(['True_Label', 'Predicted_Label']).size().reset_index(name='Count')
    print(error_summary.to_markdown(index=False))
else:
    print("Zero misclassifications found! (Perfect model or very small test set)")

1. Loading data from unseen_predictions.csv and extracting the last two columns...
2. Encoding labels...
   Labels encoded: {'Benign': np.int64(0), 'Bot': np.int64(1)}

3. Analyzing Model Performance and Metrics...

--- Key Summary Metrics ---
Model Accuracy (Overall): 100.0000%
Balanced Accuracy: 100.0000%
Matthews Correlation Coefficient (MCC): 1.0000

--- Classification Report (Detailed Metrics) ---
              precision    recall  f1-score   support

      Benign       1.00      1.00      1.00       500
         Bot       1.00      1.00      1.00       500

    accuracy                           1.00      1000
   macro avg       1.00      1.00      1.00      1000
weighted avg       1.00      1.00      1.00      1000


--- Confusion Matrix (Raw Numbers) ---
[[500   0]
 [  0 500]]
   Confusion matrix plot saved as final_performance_confusion_matrix.png

4. Error Analysis (Misclassifications) ---
Zero misclassifications found! (Perfect model or very small test set)
