In [None]:
# === Standard Libraries ===
import os  # For file and directory operations
import warnings  # To suppress unnecessary warnings

# === Data Handling ===
import pandas as pd  # For data manipulation using DataFrames
import numpy as np  # For numerical operations

# === Plotting and Visualization ===
import matplotlib.pyplot as plt  # Basic plotting
import seaborn as sns  # Beautiful statistical plots

# === Preprocessing & Scaling ===
from sklearn.preprocessing import StandardScaler, label_binarize  # Feature scaling and label conversion

# === Dimensionality Reduction ===
from sklearn.decomposition import PCA  # For visualizing data in 2D using PCA

# === Model Evaluation Metrics ===
from sklearn.metrics import (
    accuracy_score,
    classification_report,
    average_precision_score,
    precision_recall_curve
)

# === Data Splitting ===
from sklearn.model_selection import train_test_split  # For splitting train and test sets

# === Machine Learning Models ===
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.neural_network import MLPClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from xgboost import XGBClassifier  # Boosted trees

# === Optimization Algorithms (Custom Implementations) ===
from Optimizer.jasa_optimizer import JASAOptimizer
from Optimizer.dhoa_sa_optimizer import DHOA_SA_Optimizer
from Optimizer.iwo_sa_optimizer import IWO_SA_Optimizer
from Optimizer.cos_optimizer import COSOptimizer
from Optimizer.coiwso_sa_optimizer import COIWSO_SA_Optimizer

# === Miscellaneous Utilities ===
import inspect  # For inspecting Python objects (e.g., function signatures)

# 🔇 Ignore any warnings to keep the notebook output clean
warnings.filterwarnings("ignore")

In [None]:
# === Plot: PCA Decision Boundary ===
def plot_pca_decision_boundary(X, y, model, title, save_dir):
    """
    Projects data to 2D using PCA and plots the decision boundary of the given model.

    Parameters:
    - X : np.ndarray
        Feature matrix (original, high-dimensional)
    - y : np.ndarray
        Corresponding class labels
    - model : sklearn classifier
        A scikit-learn style model (must have .fit and .predict)
    - title : str
        Title for the plot and filename
    - save_dir : str
        Directory to save the resulting PNG file
    """

    # Step 1: Reduce feature space to 2D using PCA
    pca = PCA(n_components=2)
    X_pca = pca.fit_transform(X)

    # Step 2: Train model on PCA-transformed data
    model.fit(X_pca, y)

    # Step 3: Generate mesh grid for plotting the decision boundary
    x_min, x_max = X_pca[:, 0].min() - 1, X_pca[:, 0].max() + 1
    y_min, y_max = X_pca[:, 1].min() - 1, X_pca[:, 1].max() + 1
    xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.1),
                         np.arange(y_min, y_max, 0.1))

    # Step 4: Predict across the mesh to get class labels
    Z = model.predict(np.c_[xx.ravel(), yy.ravel()])
    Z = Z.reshape(xx.shape)

    # Step 5: Plot the decision boundary and the data points
    plt.figure(figsize=(8, 6))
    plt.contourf(xx, yy, Z, alpha=0.3)  # Soft background
    plt.scatter(X_pca[:, 0], X_pca[:, 1], c=y, edgecolor='k', s=20)  # Points
    plt.title(f"PCA Decision Boundary: {title}")
    plt.xlabel("Principal Component 1")
    plt.ylabel("Principal Component 2")
    plt.grid(True)
    plt.tight_layout()

    # Step 6: Save the figure
    plt.savefig(os.path.join(save_dir, f"PCA_{title}.png"))
    plt.close()

In [None]:
# === Plot: Precision-Recall Curve ===
def plot_precision_recall(y_test, y_probs, model_name, save_dir):
    """
    Plots the Precision-Recall Curve for a binary classifier and saves it as a PNG.

    Parameters:
    - y_test : array-like
        True binary labels (0 or 1)
    - y_probs : array-like
        Predicted probabilities for the positive class (class 1)
    - model_name : str
        Name of the model (used in plot title and filename)
    - save_dir : str
        Directory path to save the generated plot
    """

    # Step 1: Convert y_test to binary format (ensures shape is correct)
    y_bin = label_binarize(y_test, classes=[0, 1]).ravel()

    # Step 2: Calculate precision and recall values for class 1
    precision_1, recall_1, _ = precision_recall_curve(y_bin, y_probs)

    # Step 3: Calculate average precision (area under PR curve)
    ap_1 = average_precision_score(y_bin, y_probs)

    # Step 4: Plot Precision-Recall curve
    plt.figure(figsize=(7, 5))
    plt.plot(recall_1, precision_1, label=f"Class 1 (AP = {ap_1:.4f})", color='blue')
    plt.xlabel("Recall")
    plt.ylabel("Precision")
    plt.title(f"Precision-Recall Curve - {model_name}")
    plt.legend(loc="lower left")
    plt.grid(True)
    plt.tight_layout()

    # Step 5: Save figure
    plt.savefig(os.path.join(save_dir, f"PRC_{model_name}.png"))
    plt.close()

In [None]:
# === File Paths ===

# Root directory where all extracted EEG feature datasets are stored
data_dir = "/Users/myatpwintphyu/Desktop/EEG Feature Extract Result"

# Directory where model results (plots, reports, etc.) will be saved
result_root_dir = "/Users/myatpwintphyu/Desktop/Results"

In [None]:
# === List of EEG Feature Dataset Subfolders ===

# These folders should exist under `data_dir`, and each contains `.npy` files for features and labels
dataset_files = [
    "eeg_1dcnn_features",        # Features extracted using 1D CNN
    "eeg_3dcnn_features",        # Features extracted using 3D CNN
    "eeg_stft_beta_features",    # STFT-based beta-band power features
    "eeg_stft_features"          # Full-band STFT features across all common EEG bands
]

In [None]:
# 🔧 Models to Evaluate
# Dictionary of model name → sklearn-compatible classifier instance
models = {
    "Naive Bayes": GaussianNB(),  # Simple probabilistic model based on Bayes' theorem

    "Logistic Regression": LogisticRegression(max_iter=500),  
    # Linear model for binary classification (can also handle multiclass with softmax)

    "KNN": KNeighborsClassifier(n_neighbors=2),  
    # k-Nearest Neighbors with k=2 (you can tune this later)

    "XGBoost": XGBClassifier(use_label_encoder=False, eval_metric='mlogloss'),  
    # Gradient boosting decision tree model (powerful for structured data)

    "MLP": MLPClassifier(max_iter=1000),  
    # Multi-layer Perceptron (neural network) with 1000 training iterations

    "Random Forest": RandomForestClassifier(),  
    # Ensemble of decision trees (randomly sampled features and data)
}

In [None]:
# === Loop through all EEG feature datasets and evaluate models ===

# Lists to store results across all datasets
all_results = []
all_report_rows = []

# Iterate over each EEG feature folder
for folder_name in dataset_files:
    dataset_name = folder_name.replace("eeg_", "")  # Clean dataset name
    dataset_path = os.path.join(data_dir, folder_name)

    # Create folders for saving results (PCA plots, PRC plots, etc.)
    dataset_result_dir = os.path.join(result_root_dir, dataset_name)
    os.makedirs(dataset_result_dir, exist_ok=True)
    pca_dir = os.path.join(dataset_result_dir, "PCA")
    prc_dir = os.path.join(dataset_result_dir, "PRC")
    os.makedirs(pca_dir, exist_ok=True)
    os.makedirs(prc_dir, exist_ok=True)

    # === Load all feature chunks (X and y) ===
    X_chunks, y_chunks = [], []
    for fname in os.listdir(dataset_path):
        if fname.startswith("X_feats_"):
            chunk_id = fname.split("_")[-1].replace(".npy", "")
            X = np.load(os.path.join(dataset_path, f"X_feats_{chunk_id}.npy"))
            y = np.load(os.path.join(dataset_path, f"y_labels_{chunk_id}.npy"))
            X_chunks.append(X)
            y_chunks.append(y)

    # === Align all feature chunks to the same dimension (truncate or pad) ===
    target_dim = min([x.shape[1] for x in X_chunks])  # Smallest feature dimension
    X_aligned = []
    for x in X_chunks:
        if x.shape[1] > target_dim:
            x_trimmed = x[:, :target_dim]  # Truncate
        elif x.shape[1] < target_dim:
            x_trimmed = np.pad(x, ((0, 0), (0, target_dim - x.shape[1])), mode='constant')  # Pad
        else:
            x_trimmed = x
        X_aligned.append(x_trimmed)

    # Combine all chunks into full dataset
    X_all = np.vstack(X_aligned)
    y_all = np.concatenate(y_chunks)

    # === Split into train, validation, and test sets ===
    X_train, X_temp, y_train, y_temp = train_test_split(X_all, y_all, test_size=0.4, stratify=y_all, random_state=42)
    X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, stratify=y_temp, random_state=42)

    # === Normalize features ===
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_val_scaled = scaler.transform(X_val)
    X_test_scaled = scaler.transform(X_test)

    # === Evaluate each ML model ===
    results = []        # For summary (Accuracy, MAP)
    report_rows = []    # For classification report

    for model_name, model in models.items():
        # Train model
        model.fit(X_train, y_train)

        # Make predictions
        y_pred = model.predict(X_test)
        y_proba = model.predict_proba(X_test)[:, 1] if hasattr(model, "predict_proba") else None

        # Calculate metrics
        acc = accuracy_score(y_test, y_pred)
        map_score = average_precision_score(y_test, y_proba) if y_proba is not None else None

        # Store summary metrics
        results.append({
            'Feature Set': dataset_name,
            'Model': model_name,
            'Accuracy': acc,
            'MAP': map_score
        })

        # Print quick summary
        print(f"✅ Feature Set: {dataset_name} | Model: {model_name} | Accuracy: {acc:.4f} | MAP: {map_score:.4f}")

        # Store classification report details
        report = classification_report(y_test, y_pred, output_dict=True)
        for cls, metrics in report.items():
            if isinstance(metrics, dict):  # Skip 'accuracy' scalar
                report_rows.append({
                    'Feature Set': dataset_name,
                    'Model': model_name,
                    'Class': cls,
                    'Precision': metrics.get('precision'),
                    'Recall': metrics.get('recall'),
                    'F1-Score': metrics.get('f1-score'),
                    'Support': metrics.get('support')
                })

        # Plot PCA decision boundary
        try:
            plot_pca_decision_boundary(X_test, y_test, model, model_name, pca_dir)
        except Exception as e:
            print(f"⚠️ PCA plot error for {model_name} on {dataset_name}: {e}")

        # Plot precision-recall curve
        if y_proba is not None:
            try:
                plot_precision_recall(y_test, y_proba, model_name, prc_dir)
            except Exception as e:
                print(f"⚠️ PRC plot error for {model_name} on {dataset_name}: {e}")

    # Aggregate results from this dataset
    all_results.extend(results)
    all_report_rows.extend(report_rows)

In [None]:
# === ✅ Save final evaluation results after processing all EEG datasets ===

# Convert the collected results into DataFrames
final_results_df = pd.DataFrame(all_results)         # Summary: accuracy + MAP
final_report_df = pd.DataFrame(all_report_rows)      # Detailed classification report

# Save both results as CSV files to the results folder
final_results_df.to_csv(os.path.join(result_root_dir, "all_model_summary.csv"), index=False)
final_report_df.to_csv(os.path.join(result_root_dir, "all_classification_report.csv"), index=False)

# Final status message
print("✅ All model evaluations complete. Combined CSVs saved to Final_CV_Only_Results.")

In [None]:
# === 🔧 Step 2: Define hyperparameter search space for XGBoost ===
# Each key corresponds to a tunable hyperparameter, and the value is the allowed range

param_space = {
    'learning_rate': (0.05, 0.2),         # Controls how quickly the model adapts (smaller = slower, more stable)
    'max_depth': (3, 6),                  # Maximum depth of trees (controls model complexity)
    'n_estimators': (100, 300),           # Number of boosting rounds (more = potentially better fit, but slower)
    'subsample': (0.7, 1.0),              # Fraction of training data used per tree (helps prevent overfitting)
    'colsample_bytree': (0.7, 1.0)        # Fraction of features used per tree (adds diversity)
}

In [None]:
# === 🔧 Model Configuration Dictionary ===
# This setup maps model names to:
# (1) a lambda function that creates the model with given parameters,
# (2) the corresponding hyperparameter search space.

model_configs = {
    'XGBoost': (
        # 👇 Lambda returns an XGBoost classifier with tuned parameters injected
        lambda **params: XGBClassifier(
            use_label_encoder=False,
            eval_metric='mlogloss',        # Standard for binary/multiclass classification
            scale_pos_weight=1.2604,       # Optional: adjusts for class imbalance (e.g., depression cases)
            random_state=42,               # Ensures reproducibility
            **params                       # Parameters passed by optimizer
        ),
        {
            # 🔍 Define the search space for hyperparameter optimization
            'learning_rate': (0.05, 0.2),       # Step size shrinkage (smaller = more stable)
            'max_depth': (3, 6),                # Tree depth (controls complexity)
            'n_estimators': (100, 300),         # Number of boosting rounds
            'subsample': (0.7, 1.0),            # Fraction of training data used in each boosting round
            'colsample_bytree': (0.7, 1.0)      # Fraction of features used per tree
        }
    )
}

In [None]:
# === ⚙️ Available Optimizers ===
# Dictionary mapping names to their respective optimizer classes.
# These optimizers will be used to tune model hyperparameters.

optimizers = {
    'JASA': JASAOptimizer,             # Jellyfish Algorithm with Simulated Annealing
    'DHOA-SA': DHOA_SA_Optimizer,      # Dynamic Harris Hawk Optimization with SA
    'IWO-SA': IWO_SA_Optimizer,        # Invasive Weed Optimization with SA
    'COS': COSOptimizer,               # Crisscross Optimization Strategy
    'COIWSO-SA': COIWSO_SA_Optimizer   # Hybrid of COS and IWO with SA
}

In [None]:
# === 🧠 Dynamic Optimizer Builder ===
# This function constructs and returns an optimizer instance,
# injecting only the relevant arguments based on the optimizer's constructor.

def build_optimizer(OptClass, model_class, param_space_used):
    # 🧰 Default arguments for all optimizers (some may ignore extra ones)
    kwargs = {
        'model_class': model_class,             # Model to be optimized (e.g., XGBoost constructor)
        'param_space': param_space_used,        # Search space for hyperparameters
        'max_iter': 30,                         # Number of optimization iterations
        'population_size': 10,                  # For population-based optimizers (e.g., IWO)
        'initial_pop': 5,                       # Starting population
        'max_pop': 15,                          # Max growth of population (IWO-related)
        'swarm_size': 5,                        # Swarm size (if applicable)
        'temperature': 1.0,                     # Initial temp for simulated annealing
        'cooling_rate': 0.95,                   # Cooling schedule
        'mutation_rate': 0.1,                   # Mutation for evolutionary optimizers
        'metric_to_optimize': 'accuracy'        # Main metric for fitness evaluation
    }

    # 🧼 Filter out any arguments not accepted by this optimizer class
    sig = inspect.signature(OptClass.__init__)
    valid_args = {k: v for k, v in kwargs.items() if k in sig.parameters}

    # 🛠️ Build and return the optimizer instance
    return OptClass(**valid_args)

In [None]:
# === 🚀 Start Optimization Across All Datasets ===

optimizer_summary = []           # 📊 Store best result from each iteration per optimizer
final_optimizer_results = []     # 🏆 Store final best config (lowest cost + best acc + MAP)

# === Loop through each dataset (1D CNN, 3D CNN, STFT beta, STFT full) ===
for folder_name in dataset_files:
    dataset_name = folder_name.replace("eeg_", "")
    dataset_path = os.path.join(data_dir, folder_name)

    # 📁 Create result folders
    dataset_result_dir = os.path.join(result_root_dir, dataset_name)
    pca_dir = os.path.join(dataset_result_dir, "PCA")
    prc_dir = os.path.join(dataset_result_dir, "PRC")
    os.makedirs(pca_dir, exist_ok=True)
    os.makedirs(prc_dir, exist_ok=True)

    # 📥 Load feature chunks
    X_chunks, y_chunks = [], []
    for fname in os.listdir(dataset_path):
        if fname.startswith("X_feats_"):
            chunk_id = fname.split("_")[-1].replace(".npy", "")
            X = np.load(os.path.join(dataset_path, f"X_feats_{chunk_id}.npy"))
            y = np.load(os.path.join(dataset_path, f"y_labels_{chunk_id}.npy"))
            X_chunks.append(X)
            y_chunks.append(y)

    # 🔄 Align all feature sets to the same number of dimensions
    target_dim = min([x.shape[1] for x in X_chunks])
    X_aligned = []
    for x in X_chunks:
        if x.shape[1] > target_dim:
            x_trimmed = x[:, :target_dim]
        elif x.shape[1] < target_dim:
            x_trimmed = np.pad(x, ((0, 0), (0, target_dim - x.shape[1])), mode='constant')
        else:
            x_trimmed = x
        X_aligned.append(x_trimmed)

    # 🔗 Combine all features & labels
    X_all = np.vstack(X_aligned)
    y_all = np.concatenate(y_chunks)

    # 📊 Split data into Train / Validation / Test sets
    X_train, X_temp, y_train, y_temp = train_test_split(X_all, y_all, test_size=0.4, stratify=y_all, random_state=42)
    X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, stratify=y_temp, random_state=42)

    # 🧼 Normalize features
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_val_scaled = scaler.transform(X_val)
    X_test_scaled = scaler.transform(X_test)

    # 📦 Get XGBoost model and its parameter space
    model_builder, param_space = model_configs["XGBoost"]

    # === 🧪 Loop through each optimizer ===
    for opt_name, OptClass in optimizers.items():
        print(f"\n🔧 Optimizer: {opt_name} on {dataset_name}")

        # 🔨 Build and run the optimizer
        optimizer = build_optimizer(OptClass, model_builder, param_space)
        optimizer.fit(X_train, y_train, X_test, y_test)

        # 📈 Track all iterations
        df_full = pd.DataFrame(optimizer.history)

        # 🏅 Select best candidate per iteration based on:
        # 1. Lowest cost ➝ 2. Highest accuracy ➝ 3. Highest MAP
        best_rows = []
        for iter_id, group in df_full.groupby("iteration"):
            min_cost = group["Cost Function"].min()
            lowest_cost_rows = group[group["Cost Function"] == min_cost]
            max_acc = lowest_cost_rows["accuracy"].max()
            best_acc_rows = lowest_cost_rows[lowest_cost_rows["accuracy"] == max_acc]
            best_row = best_acc_rows.loc[best_acc_rows["MAP"].idxmax()]
            best_row_dict = best_row.to_dict()
            best_row_dict["iteration"] = iter_id
            best_rows.append(best_row_dict)

        # 🗂️ Store best per iteration
        df = pd.DataFrame(best_rows).reset_index(drop=True)
        df.insert(0, "Feature Set", dataset_name)
        df.insert(1, "Optimizer", opt_name)

        # 📑 Only keep relevant columns
        keep_cols = [
            "Feature Set", "Optimizer", "iteration", "learning_rate", "max_depth", "n_estimators",
            "subsample", "colsample_bytree", "accuracy", "MAR", "FDR", "FPR", "MCC",
            "precision", "MAP", "Cost Function"
        ]
        df = df[[col for col in keep_cols if col in df.columns]]
        optimizer_summary.append(df)

        # 🎯 Select final best config (lowest cost ➝ highest acc ➝ highest MAP)
        best_cost = df["Cost Function"].min()
        lowest_cost_df = df[df["Cost Function"] == best_cost]
        max_acc = lowest_cost_df["accuracy"].max()
        highest_acc_df = lowest_cost_df[lowest_cost_df["accuracy"] == max_acc]
        final_best_row = highest_acc_df.loc[highest_acc_df["MAP"].idxmax()]
        final_optimizer_results.append(final_best_row)

        # 📊 Accuracy per iteration plot
        fig_acc, ax_acc = plt.subplots()
        ax_acc.plot(optimizer.best_accuracy_per_iteration)
        ax_acc.set_title(f"{opt_name} Accuracy on {dataset_name}")
        ax_acc.set_xlabel("Iteration")
        ax_acc.set_ylabel("Accuracy")
        ax_acc.grid(True)
        fig_acc.tight_layout()
        fig_acc.savefig(os.path.join(dataset_result_dir, f"{opt_name}_accuracy_plot.png"))
        plt.close(fig_acc)

        # 📉 Cost function per iteration plot
        fig_cost, ax_cost = plt.subplots()
        ax_cost.plot(optimizer.best_cost_per_iteration)
        ax_cost.set_title(f"{opt_name} Cost Function on {dataset_name}")
        ax_cost.set_xlabel("Iteration")
        ax_cost.set_ylabel("Cost")
        ax_cost.grid(True)
        fig_cost.tight_layout()
        fig_cost.savefig(os.path.join(dataset_result_dir, f"{opt_name}_cost_plot.png"))
        plt.close(fig_cost)

# === 💾 Save Final Results ===
summary_df = pd.concat(optimizer_summary, ignore_index=True)
summary_df.to_csv(os.path.join(result_root_dir, "all_optimizer_summary.csv"), index=False)

final_df = pd.DataFrame(final_optimizer_results)
final_df.to_csv(os.path.join(result_root_dir, "final_optimizer_results.csv"), index=False)

print("✅ All optimizer results saved:")
print(" - all_optimizer_summary.csv (best per iteration)")
print(" - final_optimizer_results.csv (final best only)")

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
import matplotlib.patches as mpatches
import numpy as np
import os
import pandas as pd

# === Paths to final CSVs ===
final_result_path = os.path.join(result_root_dir, "final_optimizer_results.csv")
all_iter_path = os.path.join(result_root_dir, "all_optimizer_summary.csv")

# === Load CSVs ===
final_results = pd.read_csv(final_result_path)
summary_df = pd.read_csv(all_iter_path)

# === Metrics to visualize
metrics_to_plot = {
    "accuracy": "Accuracy (Higher is Better)",
    "Cost Function": "Cost Function (Lower is Better)",
    "MAP": "Mean Average Precision (MAP)"
}

# === Loop through each EEG dataset (feature set)
for feature_set in final_results["Feature Set"].unique():
    print(f"\n📊 Generating visualizations for: {feature_set}")

    # Filter for this feature set
    subset = final_results[final_results["Feature Set"] == feature_set]
    subset_iter = summary_df[summary_df["Feature Set"] == feature_set]

    # Create output folder
    dataset_dir = os.path.join(result_root_dir, feature_set)
    os.makedirs(dataset_dir, exist_ok=True)

    # === 1. Bar Charts for accuracy, cost, MAP ===
    for metric, title in metrics_to_plot.items():
        plt.figure(figsize=(8, 4))
        sorted_subset = subset.sort_values(metric, ascending=(metric == "Cost Function"))

        ax = sns.barplot(
            x="Optimizer",
            y=metric,
            data=sorted_subset,
            palette="viridis"
        )

        # Annotate bars with value
        for bar in ax.patches:
            height = bar.get_height()
            ax.text(
                bar.get_x() + bar.get_width() / 2,
                height + 0.002,
                f"{height:.3f}",
                ha='center',
                va='bottom',
                fontsize=9
            )

        plt.title(f"{title} – {feature_set.capitalize()}", fontsize=12)
        plt.xlabel("Optimizer", fontsize=10)
        plt.ylabel(title, fontsize=10)
        plt.xticks(rotation=45)
        plt.grid(axis='y', linestyle='--', alpha=0.7)
        plt.tight_layout()

        bar_path = os.path.join(dataset_dir, f"{metric.replace(' ', '_')}_bar_chart.png")
        plt.savefig(bar_path)
        plt.close()
        print(f"✅ Saved: {bar_path}")

    # === 2. Bubble Chart (Simple View) ===
    plt.figure(figsize=(8, 6))
    scaled_size = (subset['MAP'] - 0.927) * 900000
    scaled_size = np.clip(scaled_size, 50, 3000)

    scatter = plt.scatter(
        subset['Cost Function'],
        subset['accuracy'],
        s=scaled_size,
        c=subset['MAP'],
        cmap='viridis',
        alpha=0.7,
        edgecolors='w',
        linewidths=1.5
    )

    plt.xlabel('Cost Function (Lower is Better)', fontsize=14)
    plt.ylabel('Accuracy (Higher is Better)', fontsize=14)
    plt.title(f'Bubble Chart: {feature_set.capitalize()}', fontsize=16)
    plt.grid(True)

    cbar = plt.colorbar(scatter)
    cbar.set_label('MAP Score', fontsize=12)

    bubble_path = os.path.join(dataset_dir, f"bubble_chart_simple.png")
    plt.tight_layout()
    plt.savefig(bubble_path)
    plt.close()
    print(f"✅ Saved: {bubble_path}")

    # === 3. Bubble Chart (Fancy with Legend) ===
    plt.figure(figsize=(10, 6))
    norm = plt.Normalize(vmin=subset['MAP'].min(), vmax=subset['MAP'].max())
    colors = plt.cm.plasma(norm(subset['MAP']))

    scatter = plt.scatter(
        subset['Cost Function'],
        subset['accuracy'],
        s=scaled_size,
        c=colors,
        alpha=0.8,
        edgecolors='w',
        linewidths=1.5
    )

    plt.xlabel('Cost Function (Lower is Better)', fontsize=14)
    plt.ylabel('Accuracy (Higher is Better)', fontsize=14)
    plt.title(f'{feature_set.capitalize()} - Optimizer Bubble Chart', fontsize=16)
    plt.grid(True)

    cbar = plt.colorbar(scatter)
    cbar.set_label('MAP Score', fontsize=12)

    # Add optimizer legend manually
    legend_elements = [
        mpatches.Patch(
            facecolor=colors[i],
            edgecolor='black',
            label=f"{row['Optimizer']} (MAP={row['MAP']:.3f})"
        )
        for i, (_, row) in enumerate(subset.iterrows())
    ]

    plt.legend(
        handles=legend_elements,
        title="Optimizer",
        loc='upper center',
        bbox_to_anchor=(0.5, -0.18),
        ncol=3,
        frameon=False
    )

    fancy_path = os.path.join(dataset_dir, f"bubble_chart_fancy.png")
    plt.tight_layout()
    plt.savefig(fancy_path)
    plt.close()
    print(f"✅ Saved: {fancy_path}")

    # === 4. Step Plot for Cost Function over Iterations ===
    cost_data = subset_iter[['iteration', 'Optimizer', 'Cost Function']].copy()
    marker_positions = np.arange(5, cost_data['iteration'].max() + 1, 5)
    optimizers = cost_data['Optimizer'].unique()
    palette = sns.color_palette('tab10', len(optimizers))
    color_dict = dict(zip(optimizers, palette))

    plt.figure(figsize=(12, 6))
    for optimizer, group in cost_data.groupby('Optimizer'):
        plt.plot(
            group['iteration'],
            group['Cost Function'],
            label=optimizer,
            color=color_dict[optimizer],
            drawstyle='steps-post',
            linewidth=2
        )
        marker_group = group[group['iteration'].isin(marker_positions)]
        plt.scatter(
            marker_group['iteration'],
            marker_group['Cost Function'],
            color=color_dict[optimizer],
            s=20,
            zorder=5
        )

    plt.title(f'{feature_set.capitalize()} - Optimizer Cost Function Progress', fontsize=16)
    plt.xlabel('Iteration', fontsize=14)
    plt.ylabel('Cost Function', fontsize=14)
    plt.grid(True, linestyle='--', alpha=0.7)
    plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left', title='Optimizer', fontsize=10)
    plt.tight_layout()

    step_path = os.path.join(dataset_dir, f"cost_function_step_plot.png")
    plt.savefig(step_path)
    plt.close()
    print(f"✅ Saved: {step_path}")

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import os

# === Load the optimizer summary CSV ===
file_path = "/Users/myatpwintphyu/Desktop/Results/all_optimizer_summary.csv"
df = pd.read_csv(file_path)

# === Mapping short optimizer codes to readable labels for legends ===
optimizer_label_map = {
    "JASA": "Jellyfish",
    "DHOA-SA": "DHOA-SA",
    "COS": "Crisscross",
    "IWO-SAO": "Weed-SA",         # You may want to double-check this spelling (should it be "IWO-SA"?)
    "COIWSO-SA": "COIWSO-SA"
}

# === High-contrast color scheme for distinguishable lines ===
colors = {
    "JASA": "#E41A1C",       # Red
    "DHOA-SA": "#377EB8",    # Blue
    "COS": "#4DAF4A",        # Green
    "COIWSO-SA": "#FF7F00",  # Orange
    "IWO-SA": "#984EA3",     # Purple
}

# === Identify the unique EEG datasets (feature sets) ===
feature_sets = df["Feature Set"].unique()

# === Create subplots: 2 rows × 2 columns ===
fig, axes = plt.subplots(2, 2, figsize=(12, 10))
axes = axes.flatten()  # Make it easier to loop through

# === Plot cost function trends for the first 4 feature sets ===
for idx, feature_set in enumerate(feature_sets[:4]):
    ax = axes[idx]
    subset = df[df["Feature Set"] == feature_set]
    
    # Plot each optimizer's progress over iterations
    for optimizer in subset["Optimizer"].unique():
        opt_data = subset[subset["Optimizer"] == optimizer]
        ax.plot(
            opt_data["iteration"],
            opt_data["Cost Function"],
            marker='o',
            linewidth=2,
            markersize=5,
            label=optimizer_label_map.get(optimizer, optimizer),   # Fallback if label missing
            color=colors.get(optimizer, "gray")                   # Fallback to gray if color missing
        )
    
    # Add subplot title and axes labels
    ax.set_title(f"({chr(97 + idx)}) {feature_set}", loc='left', fontsize=13, fontweight='bold')
    ax.set_xlabel("Iterations", fontsize=12)
    ax.set_ylabel("Cost Function", fontsize=12)
    ax.grid(True, linestyle='--', alpha=0.5)
    ax.legend(fontsize=10)

# === Final layout adjustment and display ===
plt.tight_layout()
plt.show()

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import os

# === Load optimizer summary CSV ===
file_path = "/Users/myatpwintphyu/Desktop/Results/all_optimizer_summary.csv"
df = pd.read_csv(file_path)

# === Map internal optimizer names to readable labels ===
optimizer_label_map = {
    "JASA": "Jellyfish",
    "DHOA-SA": "DHOA-SA",
    "COS": "Crisscross",
    "IWO-SAO": "Weed-SA",         # Double-check spelling: may need to fix "IWO-SA"
    "COIWSO-SA": "COIWSO-SA"
}

# === Assign high-contrast colors to each optimizer ===
colors = {
    "JASA": "#E41A1C",       # Bright red
    "DHOA-SA": "#377EB8",    # Blue
    "COS": "#4DAF4A",        # Green
    "COIWSO-SA": "#FF7F00",  # Orange
    "IWO-SA": "#984EA3",     # Purple
}

# === Create folder for separate plots ===
output_dir = "/Users/myatpwintphyu/Desktop/Results/Separate_Figures"
os.makedirs(output_dir, exist_ok=True)

# === Generate one plot per feature set ===
feature_sets = df["Feature Set"].unique()

for feature_set in feature_sets:
    plt.figure(figsize=(8, 6))
    subset = df[df["Feature Set"] == feature_set]

    # Plot each optimizer’s cost curve
    for optimizer in subset["Optimizer"].unique():
        opt_data = subset[subset["Optimizer"] == optimizer]
        plt.plot(
            opt_data["iteration"],
            opt_data["Cost Function"],
            marker='o',
            linewidth=2,
            markersize=5,
            label=optimizer_label_map.get(optimizer, optimizer),  # fallback if not in map
            color=colors.get(optimizer, "gray")                   # fallback color
        )
    
    # Format the plot
    plt.xlabel("Iterations", fontsize=12)
    plt.ylabel("Cost Function", fontsize=12)
    plt.grid(True, linestyle='--', alpha=0.5)
    plt.legend(fontsize=10)
    plt.tight_layout()
    
    # Save the figure with a clean filename
    save_path = os.path.join(output_dir, f"{feature_set.replace(' ', '_')}_Cost_Function_Analysis.png")
    plt.savefig(save_path, dpi=300)
    plt.close()

print("✅ All separate plots saved to:", output_dir)