## 1. Imports and Configuration

In [1]:
# Standard Library
import os
from datetime import datetime

# Third-Party Libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import joblib
import lightgbm as lgb

# Scikit-Learn - Model Selection
from sklearn.model_selection import train_test_split, RandomizedSearchCV, StratifiedKFold

# Scikit-Learn - Classifiers
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.linear_model import LogisticRegression
from sklearn.neural_network import MLPClassifier

# Scikit-Learn - Metrics
from sklearn.metrics import (
    accuracy_score, precision_score, recall_score, f1_score,
    roc_auc_score, roc_curve, confusion_matrix, classification_report
)
from sklearn.metrics import make_scorer

# Visualization Settings
sns.set(style="whitegrid")
%matplotlib inline

MODELS_DIR = os.path.join("data", "models")
os.makedirs(MODELS_DIR, exist_ok=True)

# Configuration
RANDOM_STATE = 42
np.random.seed(RANDOM_STATE)


In [2]:
def load_feature_method(method):
    """Load train/test engineered CSVs for a given method (raw/pca/ica).

    Returns: (train_df, test_df)
    """
    method_dir = os.path.join("data", "features", method)
    train_path = os.path.join(method_dir, f"combined_engineered_{method}_train.csv")
    test_path = os.path.join(method_dir, f"combined_engineered_{method}_test.csv")

    if not os.path.exists(train_path):
        raise FileNotFoundError(f"Train file for method '{method}' not found at: {train_path}")

    train_df = pd.read_csv(train_path)
    test_df = pd.read_csv(test_path) if os.path.exists(test_path) else pd.DataFrame()

    if test_df.empty:
        print(f"Warning: test file missing for method '{method}'. A 30% stratified holdout will be created from train.")
    print(f"Loaded {method} → train: {train_df.shape}, test: {test_df.shape if not test_df.empty else 'missing'}")
    return train_df, test_df


def available_feature_methods():
    base = os.path.join("data", "features")
    methods = []
    for m in ("raw", "pca", "ica"):
        p = os.path.join(base, m, f"combined_engineered_{m}_train.csv")
        if os.path.exists(p):
            methods.append(m)
    print(f"Available methods with train files: {methods}")
    return methods

In [3]:
# --- Select feature method (change to 'raw', 'pca' or 'ica' as needed) ---
FEATURE_METHOD = globals().get('FEATURE_METHOD', 'ica')  # default: 'raw'
print(f"Selected FEATURE_METHOD: {FEATURE_METHOD}")

# Show what's available on disk
available = available_feature_methods()
print(f"Available on disk: {available}")

# If user already has DataFrames in memory, prefer them; otherwise load from CSVs
if 'train_df' in globals() and 'X_train' in globals():
    print("Using existing in-memory `train_df` and `X_train` variables.")
else:
    train_df, test_df = load_feature_method(FEATURE_METHOD)

# Define label columns and create X/y
label_cols = ["label1", "label2", "label3", "label4", "label_full"]
if 'train_df' not in globals():
    raise RuntimeError("train_df not available. Load data into notebook or choose an available FEATURE_METHOD.")

X_train = train_df.drop(columns=label_cols, errors='ignore')
# Convert primary labels to binary 0/1 here: 0 = benign, 1 = attack
# Treat any label equal to 'benign' (case-insensitive) as benign; everything else -> attack
y_train = (train_df['label1'].astype(str).str.lower() != 'benign').astype(int)

# Prepare test set (use provided test_df if present, otherwise stratified split)
if 'test_df' in globals() and (not test_df.empty):
    X_test = test_df.drop(columns=label_cols, errors='ignore')
    y_test = (test_df['label1'].astype(str).str.lower() != 'benign').astype(int)
    print(f"Using provided test_df → X_test: {X_test.shape}, y_test: {y_test.shape}")
else:
    print("No test_df found — creating stratified holdout from train (30%).")
    X_train_full = X_train.copy()
    y_train_full = y_train.copy()
    X_train, X_test, y_train, y_test = train_test_split(
        X_train_full, y_train_full, test_size=0.30, random_state=RANDOM_STATE, stratify=y_train_full
    )
    print(f"After split → X_train: {X_train.shape}, X_test: {X_test.shape}")

# Quick sanity print
print(f"Final shapes → X_train: {X_train.shape}, y_train: {y_train.shape}, X_test: {X_test.shape}, y_test: {y_test.shape}")


Selected FEATURE_METHOD: ica
Available methods with train files: ['raw', 'pca', 'ica']
Available on disk: ['raw', 'pca', 'ica']
Loaded ica → train: (194926, 24), test: (83540, 24)
Using provided test_df → X_test: (83540, 19), y_test: (83540,)
Final shapes → X_train: (194926, 19), y_train: (194926,), X_test: (83540, 19), y_test: (83540,)


## 4. Metrics Computation Functions

In [4]:
def compute_metrics(y_true, y_pred, y_prob=None, class_labels=None):
    """
    Compute binary classification metrics assuming numeric 0/1 labels.

    Expects:
      - y_true: iterable of 0/1
      - y_pred: iterable of 0/1
      - y_prob: probabilities for the positive class (1), optional

    Returns a dict with accuracy, precision, recall, specificity, f1, roc_auc and confusion_matrix.
    """
    # Coerce to integer numpy arrays (expect 0/1)
    y_true_int = np.array(y_true).astype(int)
    y_pred_int = np.array(y_pred).astype(int)

    # Basic metrics
    acc = accuracy_score(y_true_int, y_pred_int)
    prec = precision_score(y_true_int, y_pred_int, zero_division=0)
    rec = recall_score(y_true_int, y_pred_int, zero_division=0)
    f1 = f1_score(y_true_int, y_pred_int, zero_division=0)

    # Confusion matrix and specificity
    cm = confusion_matrix(y_true_int, y_pred_int)
    if cm.shape == (2, 2):
        tn, fp, fn, tp = cm.ravel()
        spec = tn / (tn + fp) if (tn + fp) > 0 else 0.0
    else:
        spec = np.nan

    # ROC-AUC using provided probabilities for positive class (1)
    if y_prob is not None:
        y_prob_arr = np.array(y_prob)
        y_true_bin = (y_true_int == 1).astype(int)
        try:
            roc_auc = roc_auc_score(y_true_bin, y_prob_arr)
        except Exception:
            roc_auc = np.nan
    else:
        roc_auc = np.nan

    return {
        "accuracy": acc,
        "precision": prec,
        "recall": rec,
        "specificity": spec,
        "f1": f1,
        "roc_auc": roc_auc,
        "confusion_matrix": cm
    }


## 5. Model Definition and Hyperparameter Grids

In [5]:
model_defs = {}

# ============================================================================
# LINEAR MODELS
# ============================================================================

# Logistic Regression - Fast baseline linear classifier
model_defs["log_reg"] = {
    "estimator": LogisticRegression(max_iter=5000, random_state=RANDOM_STATE),
    "param_grid": {
        "penalty": ["l2"],
        "C": [0.1, 1, 10, 100],
        "solver": ["lbfgs", "saga"],
        "class_weight": [None, "balanced"]
    }
}

# ============================================================================
# TREE-BASED MODELS
# ============================================================================

# Random Forest - Ensemble of decision trees
model_defs["rf"] = {
    "estimator": RandomForestClassifier(random_state=RANDOM_STATE, n_jobs=-1),
    "param_grid": {
        "n_estimators": [50, 100, 200],
        "max_depth": [10, 20, 30, None],
        "min_samples_split": [2, 5, 10],
        "min_samples_leaf": [1, 2, 4],
        "max_features": ["sqrt"],
        "class_weight": [None, "balanced"]
    }
}

# LightGBM - Fast gradient boosting classifier
model_defs["lgb"] = {
    "estimator": lgb.LGBMClassifier(
        random_state=RANDOM_STATE, 
        n_jobs=-1, 
        verbose=-1,
        is_unbalance=True
    ),
    "param_grid": {
        "n_estimators": [100, 200, 300],
        "max_depth": [5, 10, 15, -1],
        "learning_rate": [0.01, 0.05, 0.1],
        "num_leaves": [20, 30, 40, 50],
        "min_data_in_leaf": [10, 20, 30],
        "feature_fraction": [0.8, 0.9, 1.0],
        "bagging_fraction": [0.8, 0.9, 1.0]
    }
}

# ============================================================================
# NEURAL NETWORK MODELS
# ============================================================================

# Multi-Layer Perceptron - Neural network classifier
model_defs["mlp"] = {
    "estimator": MLPClassifier(random_state=RANDOM_STATE, max_iter=500, early_stopping=True),
    "param_grid": {
        "hidden_layer_sizes": [(64,), (128,), (64, 32), (128, 64)],
        "activation": ["relu", "tanh"],
        "alpha": [0.0001, 0.001, 0.01],
        "learning_rate": ["constant", "adaptive"],
        "batch_size": [32, 64]
    }
}

# ============================================================================
# DISTANCE-BASED & PROBABILISTIC MODELS
# ============================================================================

# K-Nearest Neighbors - Distance-based instance classifier
model_defs["knn"] = {
    "estimator": KNeighborsClassifier(),
    "param_grid": {
        "n_neighbors": [3, 5, 7, 9, 11],
        "weights": ["uniform", "distance"],
        "metric": ["euclidean", "manhattan"]
    }
}

# Gaussian Naive Bayes - Probabilistic classifier
model_defs["nb"] = {
    "estimator": GaussianNB(),
    "param_grid": {
        "var_smoothing": np.logspace(-10, -6, 5)
    }
}


# ============================================================================
# MODEL TRAINING CONFIGURATION
# ============================================================================

MODELS_TO_RUN = ["log_reg", "rf", "lgb", "mlp", "knn", "nb"]

print(f"Models to train: {MODELS_TO_RUN}")
print(f"Optimization method: RandomizedSearchCV")
print(f"Cross-validation folds: 3")
print(f"Iterations per model: 10")
print(f"Total models: {len(MODELS_TO_RUN)}")

Models to train: ['log_reg', 'rf', 'lgb', 'mlp', 'knn', 'nb']
Optimization method: RandomizedSearchCV
Cross-validation folds: 3
Iterations per model: 10
Total models: 6


## 6. Model Training and Evaluation Function

In [6]:
def train_and_evaluate_model(model_key, model_def, X_train, X_test, y_train, y_test,
                             base_dir, cv_folds=3, n_iter=10, class_labels=None):
    """
    Train a model using RandomizedSearchCV and evaluate on test set.
    
    Parameters:
    -----------
    model_key : str
        Model identifier
    model_def : dict
        Dictionary containing estimator and parameter grid
    X_train, X_test : array-like
        Training and test feature matrices
    y_train, y_test : array-like
        Training and test labels (expected 0/1)
    base_dir : str
        Base directory for saving results
    cv_folds : int
        Number of cross-validation folds
    n_iter : int
        Number of RandomizedSearchCV iterations
        
    Returns:
    --------
    tuple : (results_dict, best_model, predictions, probabilities, metrics)
    """
    estimator = model_def["estimator"]
    param_grid = model_def["param_grid"]
    
    MODEL_DIR = os.path.join(base_dir, model_key)
    os.makedirs(MODEL_DIR, exist_ok=True)
    
    print(f"\n{'='*60}")
    print(f"Training {model_key.upper()}")
    print(f"{'='*60}")
    
    # Perform hyperparameter tuning using RandomizedSearchCV
    if param_grid:
        cv = StratifiedKFold(n_splits=cv_folds, shuffle=True, random_state=RANDOM_STATE)
        search = RandomizedSearchCV(
            estimator,
            param_distributions=param_grid,
            n_iter=n_iter,
            scoring="f1",
            cv=cv,
            n_jobs=-1,
            random_state=RANDOM_STATE,
            verbose=1
        )
        search.fit(X_train, y_train)
        best_model = search.best_estimator_
        best_params = search.best_params_
        cv_f1 = search.best_score_
    else:
        best_model = estimator
        best_model.fit(X_train, y_train)
        best_params = {}
        cv_f1 = np.nan
    
    print(f"Best parameters: {best_params}")
    if not np.isnan(cv_f1):
        print(f"Cross-validation F1 score: {cv_f1:.4f}")
    else:
        print(f"Cross-validation F1 score: N/A")
    
    # Generate predictions on test set
    y_pred = best_model.predict(X_test)
    
    # Extract probability estimates for ROC curve
    if hasattr(best_model, "predict_proba"):
        y_prob = best_model.predict_proba(X_test)[:, 1]
    elif hasattr(best_model, "decision_function"):
        y_prob = best_model.decision_function(X_test)
        y_prob = (y_prob - y_prob.min()) / (y_prob.max() - y_prob.min() + 1e-10)
    else:
        y_prob = y_pred.astype(float)
    
    # Compute comprehensive metrics (expect numeric 0/1 labels)
    labels_names = ["Benign", "Attack"]
    metrics = compute_metrics(y_test, y_pred, y_prob)
    
    # Generate and save confusion matrix visualization
    cm = metrics["confusion_matrix"]
    plt.figure(figsize=(5, 4))
    sns.heatmap(cm, annot=True, fmt="d", cmap="Blues",
                xticklabels=labels_names, yticklabels=labels_names,
                cbar_kws={'label': 'Count'})
    plt.title(f"{model_key.upper()} - Confusion Matrix")
    plt.xlabel("Predicted")
    plt.ylabel("True")
    cm_path = os.path.join(MODEL_DIR, f"{model_key}_confusion_matrix.png")
    plt.tight_layout()
    plt.savefig(cm_path, dpi=300, bbox_inches='tight')
    plt.close()
    
    # Generate and save ROC curve visualization
    # For ROC we need binary true labels (0/1)
    y_test_bin = np.array(y_test).astype(int)
    fpr, tpr, _ = roc_curve(y_test_bin, y_prob)
    roc_auc = roc_auc_score(y_test_bin, y_prob)
    plt.figure(figsize=(6, 5))
    plt.plot(fpr, tpr, color='darkorange', lw=2.5, label=f"AUC = {roc_auc:.4f}")
    plt.plot([0, 1], [0, 1], "k--", lw=1.5, label="Random Classifier")
    plt.xlim([0.0, 1.0])
    plt.ylim([0.0, 1.05])
    plt.xlabel("False Positive Rate", fontsize=11)
    plt.ylabel("True Positive Rate", fontsize=11)
    plt.title(f"ROC Curve - {model_key.upper()}", fontsize=12)
    plt.legend(loc="lower right")
    plt.grid(alpha=0.3)
    roc_path = os.path.join(MODEL_DIR, f"{model_key}_roc_curve.png")
    plt.tight_layout()
    plt.savefig(roc_path, dpi=300, bbox_inches='tight')
    plt.close()
    
    print(f"Visualizations saved for {model_key}")
    
    # Return summary metrics (include best_model for saving later)
    return {
        "model": model_key,
        "best_params": str(best_params),
        "cv_f1_score": cv_f1,
        "test_accuracy": metrics["accuracy"],
        "test_precision": metrics["precision"],
        "test_recall": metrics["recall"],
        "test_specificity": metrics["specificity"],
        "test_f1": metrics["f1"],
        "test_roc_auc": metrics["roc_auc"]
    }, best_model, y_pred, y_prob, metrics

## 7. Main Training Loop

In [7]:
# Generate timestamp for unique results directory

# Read class labels directly from training data (do not hardcode)
class_labels = sorted(np.unique(y_train.astype(str)))
if len(class_labels) != 2:
    raise ValueError(f"Expected binary labels in 'label1' but found {len(class_labels)} classes: {class_labels}")
# Define positive label for ROC as the second label in sorted order
positive_label = class_labels[1]
print(f"Class labels read from data: {class_labels}; positive_label: {positive_label}")

timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
RUN_DIR = os.path.join("binary_classification", FEATURE_METHOD, f"results_{timestamp}")
os.makedirs(RUN_DIR, exist_ok=True)
print(f"Results will be saved to: {RUN_DIR}\n")

results_list = []
models_dict = {}
y_preds_dict = {}
y_probs_dict = {}
metrics_dict = {}

# Train all models using optimized RandomizedSearchCV
for key in MODELS_TO_RUN:
    result_row, best_model, y_pred, y_prob, metrics = train_and_evaluate_model(
        model_key=key,
        model_def=model_defs[key],
        X_train=X_train, X_test=X_test, y_train=y_train, y_test=y_test,
        base_dir=RUN_DIR,
        cv_folds=3,
        n_iter=10,
        class_labels=class_labels
    )
    results_list.append(result_row)
    models_dict[key] = best_model
    y_preds_dict[key] = y_pred
    y_probs_dict[key] = y_prob
    metrics_dict[key] = metrics

print(f"\n{'='*60}")
print("MODEL TRAINING COMPLETED SUCCESSFULLY")
print(f"{'='*60}\n")

# Create metrics summary dataframe
metrics_df = pd.DataFrame(results_list)
metrics_csv = os.path.join(RUN_DIR, "01_metrics_summary_all_models.csv")
metrics_df.to_csv(metrics_csv, index=False)
print(f"Metrics summary saved: {metrics_csv}\n")
display(metrics_df)

# Identify best performing model
best_model_key = metrics_df.loc[metrics_df["test_f1"].idxmax(), "model"]
best_model_f1 = metrics_df.loc[metrics_df["test_f1"].idxmax(), "test_f1"]
print(f"\n{'='*60}")
print(f"BEST MODEL: {best_model_key.upper()}")
print(f"Test F1-Score: {best_model_f1:.4f}")
print(f"{'='*60}\n")

Class labels read from data: ['0', '1']; positive_label: 1
Results will be saved to: binary_classification\ica\results_20251217_194002


Training LOG_REG
Fitting 3 folds for each of 10 candidates, totalling 30 fits
Best parameters: {'solver': 'lbfgs', 'penalty': 'l2', 'class_weight': 'balanced', 'C': 100}
Cross-validation F1 score: 0.8686
Visualizations saved for log_reg

Training RF
Fitting 3 folds for each of 10 candidates, totalling 30 fits
Best parameters: {'n_estimators': 50, 'min_samples_split': 5, 'min_samples_leaf': 2, 'max_features': 'sqrt', 'max_depth': None, 'class_weight': 'balanced'}
Cross-validation F1 score: 0.9485
Visualizations saved for rf

Training LGB
Fitting 3 folds for each of 10 candidates, totalling 30 fits
Best parameters: {'num_leaves': 40, 'n_estimators': 300, 'min_data_in_leaf': 30, 'max_depth': -1, 'learning_rate': 0.1, 'feature_fraction': 1.0, 'bagging_fraction': 0.8}
Cross-validation F1 score: 0.9482
Visualizations saved for lgb

Training MLP
Fitting 3 fo



Best parameters: {'var_smoothing': np.float64(1e-10)}
Cross-validation F1 score: 0.6369
Visualizations saved for nb

MODEL TRAINING COMPLETED SUCCESSFULLY

Metrics summary saved: binary_classification\ica\results_20251217_194002\01_metrics_summary_all_models.csv



Unnamed: 0,model,best_params,cv_f1_score,test_accuracy,test_precision,test_recall,test_specificity,test_f1,test_roc_auc
0,log_reg,"{'solver': 'lbfgs', 'penalty': 'l2', 'class_we...",0.868553,0.879136,0.952459,0.798109,0.960163,0.868479,0.923204
1,rf,"{'n_estimators': 50, 'min_samples_split': 5, '...",0.948527,0.951006,0.988918,0.912234,0.989777,0.949029,0.977716
2,lgb,"{'num_leaves': 40, 'n_estimators': 300, 'min_d...",0.948238,0.949521,0.991287,0.907015,0.992028,0.94728,0.977067
3,mlp,"{'learning_rate': 'adaptive', 'hidden_layer_si...",0.938952,0.941286,0.988809,0.892674,0.989897,0.938286,0.968328
4,knn,"{'weights': 'distance', 'n_neighbors': 7, 'met...",0.946013,0.949461,0.988779,0.909241,0.989682,0.947343,0.972049
5,nb,{'var_smoothing': np.float64(1e-10)},0.636936,0.725952,0.943473,0.480704,0.971199,0.636903,0.88504



BEST MODEL: RF
Test F1-Score: 0.9490



## 8. Best Model Detailed Report

In [8]:
# Extract best model results
best_model_obj = models_dict[best_model_key]
y_pred_best = y_preds_dict[best_model_key]
y_prob_best = y_probs_dict[best_model_key]
metrics_best = metrics_dict[best_model_key]

# Save best model to its results directory
best_model_path = os.path.join(RUN_DIR, f"best_binary_model_{best_model_key}.pkl")
joblib.dump(best_model_obj, best_model_path)
print(f"Best model saved: {best_model_path}\n")

# Also save to production directory for deployment
best_model_prod_path = os.path.join(MODELS_DIR, f"best_binary_classification_model.pkl")
joblib.dump(best_model_obj, best_model_prod_path)
print(f"Best model also saved to production: {best_model_prod_path}\n")

# Generate classification report for best model
class_report = classification_report(y_test, y_pred_best,
                                     target_names=["Benign", "Attack"],
                                     digits=4)

# Create detailed report text
report_text = f"""
{'='*70}
BINARY INTRUSION DETECTION - BEST MODEL REPORT
{'='*70}

Execution Timestamp: {timestamp}
Best Model: {best_model_key.upper()}
Results Location: {best_model_path}
Production Location: {best_model_prod_path}

{'='*70}
MODEL PERFORMANCE METRICS
{'='*70}

Accuracy:       {metrics_best['accuracy']:.4f}
Precision:      {metrics_best['precision']:.4f}
Recall:         {metrics_best['recall']:.4f}
Specificity:    {metrics_best['specificity']:.4f}
F1-Score:       {metrics_best['f1']:.4f}
ROC-AUC:        {metrics_best['roc_auc']:.4f}

Confusion Matrix:
{metrics_best['confusion_matrix']}

{'='*70}
CLASSIFICATION REPORT
{'='*70}

{class_report}

{'='*70}
BEST MODEL HYPERPARAMETERS
{'='*70}

{str(model_defs[best_model_key]['param_grid'])}

{'='*70}
"""

# Save report to file
report_path = os.path.join(RUN_DIR, "02_best_model_report.txt")
with open(report_path, "w") as f:
    f.write(report_text)

print("Best model report generated and saved")
print(report_text)

Best model saved: binary_classification\ica\results_20251217_194002\best_binary_model_rf.pkl

Best model also saved to production: data\models\best_binary_classification_model.pkl

Best model report generated and saved

BINARY INTRUSION DETECTION - BEST MODEL REPORT

Execution Timestamp: 20251217_194002
Best Model: RF
Results Location: binary_classification\ica\results_20251217_194002\best_binary_model_rf.pkl
Production Location: data\models\best_binary_classification_model.pkl

MODEL PERFORMANCE METRICS

Accuracy:       0.9510
Precision:      0.9889
Recall:         0.9122
Specificity:    0.9898
F1-Score:       0.9490
ROC-AUC:        0.9777

Confusion Matrix:
[[41343   427]
 [ 3666 38104]]

CLASSIFICATION REPORT

              precision    recall  f1-score   support

      Benign     0.9185    0.9898    0.9528     41770
      Attack     0.9889    0.9122    0.9490     41770

    accuracy                         0.9510     83540
   macro avg     0.9537    0.9510    0.9509     83540
weigh

## 9. Model Comparison Visualizations

In [9]:
# 1. Performance metrics comparison across models
fig, axes = plt.subplots(2, 3, figsize=(15, 10))
fig.suptitle("Model Performance Comparison", fontsize=16, fontweight='bold')

metrics_to_plot = ["test_accuracy", "test_precision", "test_recall",
                   "test_specificity", "test_f1", "test_roc_auc"]
colors = ['#FF6B6B' if model == best_model_key else '#4ECDC4'
          for model in metrics_df["model"]]

for idx, metric in enumerate(metrics_to_plot):
    ax = axes[idx // 3, idx % 3]
    bars = ax.bar(metrics_df["model"], metrics_df[metric], color=colors, alpha=0.8, edgecolor='black')
    ax.set_ylabel(metric.replace("test_", ""), fontsize=10)
    ax.set_title(metric.replace("test_", "").upper(), fontsize=11, fontweight='bold')
    ax.set_ylim([0, 1.05])
    ax.grid(axis='y', alpha=0.3)

    # Add value labels on bars
    for bar in bars:
        height = bar.get_height()
        ax.text(bar.get_x() + bar.get_width()/2., height,
                f'{height:.3f}', ha='center', va='bottom', fontsize=9)

    plt.setp(ax.xaxis.get_majorticklabels(), rotation=45, ha='right')

plt.tight_layout()
comparison_path = os.path.join(RUN_DIR, "03_models_comparison_metrics.png")
plt.savefig(comparison_path, dpi=300, bbox_inches='tight')
plt.close()
print("Saved: models_comparison_metrics.png")

# 2. ROC curves comparison
plt.figure(figsize=(10, 8))
for model_key in MODELS_TO_RUN:
    y_prob = y_probs_dict[model_key]
    # Convert string labels to binary using positive_label
    y_test_bin = (np.array(y_test) == positive_label).astype(int)
    fpr, tpr, _ = roc_curve(y_test_bin, y_prob)
    auc = roc_auc_score(y_test_bin, y_prob)

    line_width = 3 if model_key == best_model_key else 1.5
    line_style = '-' if model_key == best_model_key else '--'
    alpha = 1.0 if model_key == best_model_key else 0.7

    plt.plot(fpr, tpr, lw=line_width, linestyle=line_style, alpha=alpha,
             label=f"{model_key.upper()} (AUC={auc:.4f})")

plt.plot([0, 1], [0, 1], 'k--', lw=2, label='Random Classifier')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel("False Positive Rate", fontsize=12)
plt.ylabel("True Positive Rate", fontsize=12)
plt.title("ROC Curves - All Models Comparison", fontsize=14, fontweight='bold')
plt.legend(loc="lower right", fontsize=10)
plt.grid(alpha=0.3)
roc_comparison_path = os.path.join(RUN_DIR, "04_roc_curves_comparison.png")
plt.tight_layout()
plt.savefig(roc_comparison_path, dpi=300, bbox_inches='tight')
plt.close()
print("Saved: roc_curves_comparison.png")

# 3. F1-score ranking
fig, ax = plt.subplots(figsize=(10, 6))
sorted_df = metrics_df.sort_values("test_f1", ascending=True)
colors_rank = ['#FF6B6B' if model == best_model_key else '#95E1D3'
               for model in sorted_df["model"]]
bars = ax.barh(sorted_df["model"], sorted_df["test_f1"], color=colors_rank, edgecolor='black', alpha=0.85)

ax.set_xlabel("F1-Score", fontsize=12)
ax.set_title("Models Ranked by F1-Score", fontsize=14, fontweight='bold')
ax.set_xlim([0, 1.05])
ax.grid(axis='x', alpha=0.3)

for idx, bar in enumerate(bars):
    width = bar.get_width()
    ax.text(width, bar.get_y() + bar.get_height()/2.,
            f' {width:.4f}', ha='left', va='center', fontsize=10, fontweight='bold')

plt.tight_layout()
ranking_path = os.path.join(RUN_DIR, "05_f1_score_ranking.png")
plt.savefig(ranking_path, dpi=300, bbox_inches='tight')
plt.close()
print("Saved: f1_score_ranking.png")

# 4. Metrics heatmap
fig, ax = plt.subplots(figsize=(12, 6))
metrics_for_heatmap = metrics_df[["model", "test_accuracy", "test_precision",
                                   "test_recall", "test_specificity", "test_f1", "test_roc_auc"]].set_index("model")
sns.heatmap(metrics_for_heatmap.T, annot=True, fmt='.4f', cmap='RdYlGn',
            cbar_kws={'label': 'Score'}, ax=ax, linewidths=0.5)
ax.set_title("Model Metrics Heatmap", fontsize=14, fontweight='bold')
plt.tight_layout()
heatmap_path = os.path.join(RUN_DIR, "06_metrics_heatmap.png")
plt.savefig(heatmap_path, dpi=300, bbox_inches='tight')
plt.close()
print("Saved: metrics_heatmap.png")

print("\nAll comparison visualizations generated successfully")

Saved: models_comparison_metrics.png




Saved: roc_curves_comparison.png
Saved: f1_score_ranking.png
Saved: metrics_heatmap.png

All comparison visualizations generated successfully


## 10. Execution Summary and Results Verification

In [10]:
print(f"\n{'='*70}")
print("RESULTS DIRECTORY STRUCTURE")
print(f"{'='*70}\n")

# List all generated files in results directory
for root, dirs, files in os.walk(RUN_DIR):
    level = root.replace(RUN_DIR, '').count(os.sep)
    indent = ' ' * 2 * level
    print(f'{indent}{os.path.basename(root)}/')
    sub_indent = ' ' * 2 * (level + 1)
    for file in files:
        file_path = os.path.join(root, file)
        file_size = os.path.getsize(file_path)
        size_str = f"{file_size / 1024:.1f}KB" if file_size > 1024 else f"{file_size}B"
        print(f'{sub_indent}{file} ({size_str})')

print(f"\n{'='*70}")
print("EXECUTION SUMMARY")
print(f"{'='*70}\n")

# Generate execution summary
summary_text = f"""
BINARY CLASSIFICATION MODEL TRAINING SUMMARY

Total Models Trained: {len(MODELS_TO_RUN)}
Models: {', '.join([m.upper() for m in MODELS_TO_RUN])}

Best Performing Model: {best_model_key.upper()}
Best F1-Score: {best_model_f1:.4f}

Results Location: {os.path.abspath(RUN_DIR)}

Generated Files:
- 01_metrics_summary_all_models.csv: Comprehensive metrics for all models
- 02_best_model_report.txt: Detailed analysis of the best model
- 03_models_comparison_metrics.png: Performance metrics comparison
- 04_roc_curves_comparison.png: ROC curves for all models
- 05_f1_score_ranking.png: Model ranking by F1-score
- 06_metrics_heatmap.png: Metrics heatmap visualization

Model-Specific Outputs:
"""

for model_key in MODELS_TO_RUN:
    model_dir = os.path.join(RUN_DIR, model_key)
    if os.path.exists(model_dir):
        files = os.listdir(model_dir)
        summary_text += f"\n  {model_key.upper()}:\n"
        for file in sorted(files):
            summary_text += f"    - {file}\n"

summary_text += f"""
{'='*70}
"""

# Save summary to file
summary_path = os.path.join(RUN_DIR, "00_EXECUTION_SUMMARY.txt")
with open(summary_path, "w") as f:
    f.write(summary_text)

print(summary_text)
print(f"Summary saved to: {summary_path}")


RESULTS DIRECTORY STRUCTURE

results_20251217_194002/
  01_metrics_summary_all_models.csv (1.5KB)
  02_best_model_report.txt (1.7KB)
  03_models_comparison_metrics.png (291.6KB)
  04_roc_curves_comparison.png (188.5KB)
  05_f1_score_ranking.png (87.8KB)
  06_metrics_heatmap.png (252.3KB)
  best_binary_model_rf.pkl (19358.4KB)
  knn/
    knn_confusion_matrix.png (82.4KB)
    knn_roc_curve.png (104.7KB)
  lgb/
    lgb_confusion_matrix.png (84.4KB)
    lgb_roc_curve.png (104.6KB)
  log_reg/
    log_reg_confusion_matrix.png (86.7KB)
    log_reg_roc_curve.png (112.8KB)
  mlp/
    mlp_confusion_matrix.png (82.1KB)
    mlp_roc_curve.png (107.0KB)
  nb/
    nb_confusion_matrix.png (84.1KB)
    nb_roc_curve.png (113.4KB)
  rf/
    rf_confusion_matrix.png (82.4KB)
    rf_roc_curve.png (103.1KB)

EXECUTION SUMMARY


BINARY CLASSIFICATION MODEL TRAINING SUMMARY

Total Models Trained: 6
Models: LOG_REG, RF, LGB, MLP, KNN, NB

Best Performing Model: RF
Best F1-Score: 0.9490

Results Location: c:\Us