In [1]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import roc_curve, auc
from sklearn.model_selection import train_test_split
# =============================================================================
# --- Configuration ---
# ❗️ Match these settings to the training run you want to analyze.
# =============================================================================
MODE = "ALL_VALIDS"
FEATURE_MODE = "DEFAULT"
MODEL_NAME = "cnn_att_lstm"
dataset = "Italian"

# =============================================================================
# --- Path Setup ---
# =============================================================================
BASE_PATH = os.getcwd()
MODEL_RESULT_PATH = os.path.join(BASE_PATH, dataset, f"results_{MODE}_{FEATURE_MODE}", MODEL_NAME)
PLOTS_SAVE_PATH = os.path.join(MODEL_RESULT_PATH, "performance_plots")
os.makedirs(PLOTS_SAVE_PATH, exist_ok=True)

HISTORY_FILE_PATH = os.path.join(MODEL_RESULT_PATH, "history.csv")
BEST_MODEL_PATH = os.path.join(MODEL_RESULT_PATH, "best_model.keras")
FEATURES_FILE_PATH = os.path.join(BASE_PATH, dataset, "data", f"features_{MODE}_{FEATURE_MODE}.npz")

def plot_history_metric(history_df, metric, val_metric, title, ylabel, save_path):
    """Generic function to plot a training metric vs. its validation counterpart."""
    plt.style.use('seaborn-v0_8-whitegrid')
    plt.figure(figsize=(10, 6))

    # Use the 'epoch' column for the x-axis
    epochs = history_df['epoch']

    plt.plot(epochs, history_df[metric], 'o-', color='dodgerblue', label=f'Training {ylabel}')
    plt.plot(epochs, history_df[val_metric], 'o-', color='darkorange', label=f'Validation {ylabel}')

    plt.title(title, fontsize=16)
    plt.xlabel('Epoch', fontsize=12)
    plt.ylabel(ylabel, fontsize=12)
    plt.legend(fontsize=12)
    plt.tight_layout()

    # Find the epoch with the best validation value
    best_epoch = history_df[val_metric].idxmax() if 'auc' in val_metric or 'accuracy' in val_metric else history_df[val_metric].idxmin()
    best_value = history_df[val_metric][best_epoch]
    plt.axvline(x=best_epoch, color='crimson', linestyle='--', linewidth=1, label=f'Best Epoch: {best_epoch}')

    plt.legend() # Re-call legend to include the new label
    filename = f"{ylabel.lower().replace(' ', '_')}_plot.png"
    plt.savefig(os.path.join(save_path, filename), dpi=300)
    plt.close()
    print(f"✅ Successfully saved '{filename}'")

def plot_roc_curve(model, X_test, y_test, save_path):
    """Loads the best model, predicts on test data, and plots the ROC curve."""
    print("\nGenerating ROC Curve...")
    try:
        y_pred_probs = model.predict(X_test).ravel()
        fpr, tpr, _ = roc_curve(y_test, y_pred_probs)
        roc_auc = auc(fpr, tpr)

        plt.style.use('seaborn-v0_8-whitegrid')
        plt.figure(figsize=(8, 8))
        plt.plot(fpr, tpr, color='darkorange', lw=2, label=f'ROC curve (AUC = {roc_auc:.3f})')
        plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--', label='Chance')

        plt.xlim([0.0, 1.0])
        plt.ylim([0.0, 1.05])
        plt.xlabel('False Positive Rate', fontsize=12)
        plt.ylabel('True Positive Rate', fontsize=12)
        plt.title('Receiver Operating Characteristic (ROC)', fontsize=16)
        plt.legend(loc="lower right", fontsize=12)
        plt.tight_layout()
        plt.savefig(os.path.join(save_path, "roc_curve.png"), dpi=300)
        plt.close()
        print("✅ Successfully saved 'roc_curve.png'")
    except Exception as e:
        print(f"❌ Error generating ROC curve: {e}")
import matplotlib.pyplot as plt
import os
import matplotlib.pyplot as plt
import os
import pandas as pd # Make sure pandas is imported

def plot_auc_boxplot(auc_scores, save_path):
    """
    Generates and saves a box plot for a list of AUC scores from multiple runs.
    """
    # --- FIX IS HERE ---
    # A pandas Series cannot be evaluated with 'not'. We use its built-in .empty attribute.
    if isinstance(auc_scores, pd.Series) and auc_scores.empty:
         print("⚠️ Warning: The provided AUC score Series is empty.")
         return
    if len(auc_scores) < 2:
        print("⚠️ Warning: Cannot generate a box plot with fewer than two data points.")
        return

    plt.style.use('seaborn-v0_8-whitegrid')
    plt.figure(figsize=(8, 6))

    # Create the box plot
    plt.boxplot(auc_scores, vert=True, patch_artist=True,
                boxprops=dict(facecolor='lightblue', color='black'),
                medianprops=dict(color='red', linewidth=2))

    # Add labels and title
    plt.title('AUC Score Distribution Across Folds', fontsize=16)
    plt.ylabel('Area Under Curve (AUC)', fontsize=12)
    plt.xticks([1], ['CNN-Attention-LSTM Model'])
    plt.ylim(top=1.0)
    plt.grid(axis='y', linestyle='--', alpha=0.7)

    # Add text for mean and standard deviation
    mean_auc = sum(auc_scores) / len(auc_scores)
    std_auc = pd.Series(auc_scores).std() # Using pandas .std() is more robust
    plt.text(0.95, 0.05, f'Mean: {mean_auc:.3f}\nStd Dev: {std_auc:.3f}',
             transform=plt.gca().transAxes,
             fontsize=12, verticalalignment='bottom', horizontalalignment='right',
             bbox=dict(boxstyle='round,pad=0.5', fc='wheat', alpha=0.5))

    plt.tight_layout()
    filename = "auc_distribution_boxplot.png"
    plt.savefig(os.path.join(save_path, filename), dpi=300)
    plt.close()
    print(f"✅ Successfully saved '{filename}'")

In [2]:
history_df = pd.read_csv(HISTORY_FILE_PATH)

# Plot Loss
plot_history_metric(history_df, 'loss', 'val_loss', 'Model Loss Over Epochs', 'Loss', PLOTS_SAVE_PATH)

# Plot Accuracy
plot_history_metric(history_df, 'accuracy', 'val_accuracy', 'Model Accuracy Over Epochs', 'Accuracy', PLOTS_SAVE_PATH)

# Plot AUC over Time
# Note: Your request for an "AUC box plot" is best interpreted as plotting the AUC value
# over epochs, as a box plot is used to show a distribution, which you don't have
# from a single training run.
plot_history_metric(history_df, 'auc', 'val_auc', 'Model AUC Over Epochs', 'AUC', PLOTS_SAVE_PATH)

✅ Successfully saved 'loss_plot.png'
✅ Successfully saved 'accuracy_plot.png'
✅ Successfully saved 'auc_plot.png'


In [3]:
history_df

Unnamed: 0,epoch,accuracy,auc,loss,val_accuracy,val_auc,val_loss
0,0,0.583691,0.609687,2.832635,0.732857,0.820061,2.282528
1,1,0.677396,0.743753,2.107424,0.79,0.880837,1.833909
2,2,0.761803,0.834711,1.708853,0.794286,0.905849,1.462852
3,3,0.804363,0.888337,1.358909,0.854286,0.949331,1.163799
4,4,0.841559,0.913478,1.11538,0.827143,0.940996,1.00993
5,5,0.856938,0.933584,0.924537,0.892857,0.962976,0.812156
6,6,0.863376,0.939596,0.801912,0.905714,0.966,0.684914
7,7,0.862661,0.939557,0.712483,0.894286,0.971759,0.597414
8,8,0.896638,0.960613,0.582238,0.932857,0.979882,0.486717
9,9,0.895565,0.961164,0.529643,0.91,0.969163,0.510319


In [4]:
# This is placeholder data. Replace it with your actual scores.
final_auc_scores_from_cv = history_df['auc']

print("\nGenerating AUC Box Plot from Cross-Validation Scores...")
plot_auc_boxplot(final_auc_scores_from_cv, PLOTS_SAVE_PATH)


Generating AUC Box Plot from Cross-Validation Scores...
✅ Successfully saved 'auc_distribution_boxplot.png'
