# Methodology Visuals

In [None]:
import os
import json
import joblib
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib
import seaborn as sns
import optuna
import optuna.visualization as vis
from sklearn.metrics import classification_report, confusion_matrix, roc_curve, auc
from sklearn.preprocessing import label_binarize

### Densenet-121 Hyperparameter Tuning

In [None]:

# Configuration
output_dir = r"C:/Users/Xuxu/Desktop/Master Thesis/OptunaDensenetFull"
trial_logs_dir = os.path.join(output_dir, "trial_logs")


# Load all metrics.csv logs from Optuna trials
all_trials = []
for trial_name in sorted(os.listdir(trial_logs_dir)):
    trial_path = os.path.join(trial_logs_dir, trial_name, "version_0", "metrics.csv")
    if os.path.exists(trial_path):
        df_trial = pd.read_csv(trial_path)
        trial_id = trial_name.replace("trial_", "")
        df_trial["trial"] = trial_id
        all_trials.append(df_trial)

if not all_trials:
    print("No metrics.csv files found in:", trial_logs_dir)
    exit()

df_all = pd.concat(all_trials, ignore_index=True)

# Identify the best trial based on final validation F1 score
if "val_f1" not in df_all.columns:
    print("The 'val_f1' column was not found in the metric logs.")
    exit()

last_val_f1_per_trial = (
    df_all.dropna(subset=["val_f1"])
    .sort_values(by=["trial", "epoch"])
    .groupby("trial")
    .tail(1)
)

sorted_trials = last_val_f1_per_trial.sort_values("val_f1", ascending=False).reset_index(drop=True)

# Display best trial based on final validation F1 score
best_trial_row = sorted_trials.iloc[0]
best_trial_id = best_trial_row["trial"]
print("\nBest Trial Based on Final Validation F1 Score:")
print(f"Trial ID: {best_trial_id} | Final Val F1: {best_trial_row['val_f1']:.4f}\n")

# Display summary of all trials sorted by final F1 score
print("Summary of All Trials (Sorted by Final Val F1):")
for idx, row in sorted_trials.iterrows():
    print(f"Trial {row['trial']} | Val F1: {row['val_f1']:.4f}")


# Define improved plotting function for metrics
def plot_metric(df_trial, metric_col, title):
    if df_trial is None or metric_col not in df_trial.columns:
        print(f"Metric '{metric_col}' not found.")
        return

    df_plot = df_trial.sort_values(by="epoch").dropna(subset=[metric_col])
    if df_plot.empty:
        print(f"No data available to plot for '{metric_col}'.")
        return

    plt.figure(figsize=(10, 6))
    plt.plot(
        df_plot["epoch"],
        df_plot[metric_col],
        color="#2E8B57",  # SeaGreen for main line
        linestyle="-",
        linewidth=2.5,
        alpha=0.9,
        label=f"{metric_col.replace('_', ' ').capitalize()}"
    )

    plt.title(f"{title}", fontsize=18, fontweight="bold")
    plt.xlabel("Epoch", fontsize=14)
    plt.ylabel(metric_col.replace("_", " ").capitalize(), fontsize=14)
    plt.xticks(fontsize=12)
    plt.yticks(fontsize=12)
    plt.legend(fontsize=12, loc="lower right")
    plt.tight_layout()
    plt.show()

# Plot validation loss and validation F1 score for the best trial
df_best = df_all[df_all["trial"] == best_trial_id]

metrics_to_plot = {
    "val_loss": "Validation Loss Over Epochs",
    "val_f1": "Validation F1 Score Over Epochs"
}

for metric_col, title in metrics_to_plot.items():
    plot_metric(df_best, metric_col, title)

# Load your study
study = joblib.load(r"C:\Users\Xuxu\Desktop\Master Thesis\OptunaDensenetFull\new_densenet_study.pkl")

# Plot optimization history
fig1 = vis.plot_optimization_history(study)
fig1.show()

# Plot hyperparameter importance
fig2 = vis.plot_param_importances(study)
fig2.show()

# Plot parallel coordinates
fig3 = vis.plot_parallel_coordinate(study)
fig3.show()

In [None]:

#get DataFrame
trials_df = study.trials_dataframe()
trials_df


### ConvNext-Tiny Hyperparameter Tuning

In [None]:
# configuration
output_dir = r"C:/Users/Xuxu/Desktop/Master Thesis/OptunaConvNeXtFull"
trial_logs_dir = os.path.join(output_dir, "trial_logs")

# load all metrics.csv logs from optuna trials
all_trials = []
for trial_name in sorted(os.listdir(trial_logs_dir)):
    trial_path = os.path.join(trial_logs_dir, trial_name, "version_0", "metrics.csv")
    if os.path.exists(trial_path):
        df_trial = pd.read_csv(trial_path)
        trial_id = trial_name.replace("trial_", "")
        df_trial["trial"] = trial_id
        all_trials.append(df_trial)

if not all_trials:
    print("no metrics.csv files found in:", trial_logs_dir)
    exit()

df_all = pd.concat(all_trials, ignore_index=True)

# identify the best trial based on final validation f1 score
if "val_f1" not in df_all.columns:
    print("the 'val_f1' column was not found in the metric logs.")
    exit()

last_val_f1_per_trial = (
    df_all.dropna(subset=["val_f1"])
    .sort_values(by=["trial", "epoch"])
    .groupby("trial")
    .tail(1)
)

sorted_trials = last_val_f1_per_trial.sort_values("val_f1", ascending=False).reset_index(drop=True)

# display best trial based on final validation f1 score
best_trial_row = sorted_trials.iloc[0]
best_trial_id = best_trial_row["trial"]
print("\nbest trial based on final validation f1 score:")
print(f"trial id: {best_trial_id} | final val f1: {best_trial_row['val_f1']:.4f}\n")

# display summary of all trials sorted by final f1 score
print("summary of all trials (sorted by final val f1):")
for idx, row in sorted_trials.iterrows():
    print(f"trial {row['trial']} | val f1: {row['val_f1']:.4f}")

# load optuna study and best hyperparameters
pkl_path = r"C:/Users/Xuxu/Desktop/Master Thesis/OptunaConvNeXtFull/new_convnext_study.pkl"
study = joblib.load(pkl_path)

print("\nbest hyperparameters (from optuna):")
for key, value in study.best_trial.params.items():
    print(f"{key}: {value}")

# define improved plotting function for metrics
def plot_metric(df_trial, metric_col, title):
    if df_trial is None or metric_col not in df_trial.columns:
        print(f"metric '{metric_col}' not found.")
        return

    df_plot = df_trial.sort_values(by="epoch").dropna(subset=[metric_col])
    if df_plot.empty:
        print(f"no data available to plot for '{metric_col}'.")
        return

    plt.figure(figsize=(10, 6))
    plt.plot(
        df_plot["epoch"],
        df_plot[metric_col],
        color="#2E8B57",  # seagreen for main line
        linestyle="-",
        linewidth=2.5,
        alpha=0.9,
        label=f"{metric_col.replace('_', ' ').capitalize()}"
    )

    plt.title(f"{title}", fontsize=18, fontweight="bold")
    plt.xlabel("epoch", fontsize=14)
    plt.ylabel(metric_col.replace("_", " ").capitalize(), fontsize=14)
    plt.xticks(fontsize=12)
    plt.yticks(fontsize=12)
    plt.legend(fontsize=12, loc="lower right")
    plt.tight_layout()
    plt.show()

# plot validation loss and validation f1 score for the best trial
df_best = df_all[df_all["trial"] == best_trial_id]

metrics_to_plot = {
    "val_loss": "validation loss over epochs",
    "val_f1": "validation f1 score over epochs"
}

for metric_col, title in metrics_to_plot.items():
    plot_metric(df_best, metric_col, title)

# load your study
study = joblib.load(r"C:\Users\Xuxu\Desktop\Master Thesis\OptunaConvNeXtFull\new_convnext_study.pkl")

# plot optimization history
fig1 = vis.plot_optimization_history(study)
fig1.show()

# plot hyperparameter importance
fig2 = vis.plot_param_importances(study)
fig2.show()

# plot parallel coordinates
fig3 = vis.plot_parallel_coordinate(study)
fig3.show()


In [None]:

# Get DataFrame
trials_df = study.trials_dataframe()
trials_df

### EfficientNet-B0 Hyperparameter Tuning

In [None]:
# configuration
output_dir = r"C:/Users/Xuxu/Desktop/Master Thesis/OptunaEfficientNetB0Full"
trial_logs_dir = os.path.join(output_dir, "trial_logs")

# load all metrics.csv logs from optuna trials
all_trials = []
for trial_name in sorted(os.listdir(trial_logs_dir)):
    trial_path = os.path.join(trial_logs_dir, trial_name, "version_0", "metrics.csv")
    if os.path.exists(trial_path):
        df_trial = pd.read_csv(trial_path)
        trial_id = trial_name.replace("trial_", "")
        df_trial["trial"] = trial_id
        all_trials.append(df_trial)

if not all_trials:
    print("no metrics.csv files found in:", trial_logs_dir)
    exit()

df_all = pd.concat(all_trials, ignore_index=True)

# identify the best trial based on final validation f1 score
if "val_f1" not in df_all.columns:
    print("the 'val_f1' column was not found in the metric logs.")
    exit()

last_val_f1_per_trial = (
    df_all.dropna(subset=["val_f1"])
    .sort_values(by=["trial", "epoch"])
    .groupby("trial")
    .tail(1)
)

sorted_trials = last_val_f1_per_trial.sort_values("val_f1", ascending=False).reset_index(drop=True)

# display best trial based on final validation f1 score
best_trial_row = sorted_trials.iloc[0]
best_trial_id = best_trial_row["trial"]
print("\nbest trial based on final validation f1 score:")
print(f"trial id: {best_trial_id} | final val f1: {best_trial_row['val_f1']:.4f}\n")

# display summary of all trials sorted by final f1 score
print("summary of all trials (sorted by final val f1):")
for idx, row in sorted_trials.iterrows():
    print(f"trial {row['trial']} | val f1: {row['val_f1']:.4f}")

# load optuna study and best hyperparameters
pkl_path = r"C:/Users/Xuxu/Desktop/Master Thesis/OptunaEfficientNetB0Full/new_efficientnet_study.pkl"
study = joblib.load(pkl_path)

print("\nbest hyperparameters (from optuna):")
for key, value in study.best_trial.params.items():
    print(f"{key}: {value}")

# define improved plotting function for metrics
def plot_metric(df_trial, metric_col, title):
    if df_trial is None or metric_col not in df_trial.columns:
        print(f"metric '{metric_col}' not found.")
        return

    df_plot = df_trial.sort_values(by="epoch").dropna(subset=[metric_col])
    if df_plot.empty:
        print(f"no data available to plot for '{metric_col}'.")
        return

    plt.figure(figsize=(10, 6))
    plt.plot(
        df_plot["epoch"],
        df_plot[metric_col],
        color="#2E8B57",  # seagreen for main line
        linestyle="-",
        linewidth=2.5,
        alpha=0.9,
        label=f"{metric_col.replace('_', ' ').capitalize()}"
    )

    plt.title(f"{title}", fontsize=18, fontweight="bold")
    plt.xlabel("epoch", fontsize=14)
    plt.ylabel(metric_col.replace("_", " ").capitalize(), fontsize=14)
    plt.xticks(fontsize=12)
    plt.yticks(fontsize=12)
    plt.legend(fontsize=12, loc="lower right")
    plt.tight_layout()
    plt.show()

# plot validation loss and validation f1 score for the best trial
df_best = df_all[df_all["trial"] == best_trial_id]

metrics_to_plot = {
    "val_loss": "validation loss over epochs",
    "val_f1": "validation f1 score over epochs"
}

for metric_col, title in metrics_to_plot.items():
    plot_metric(df_best, metric_col, title)

# load your study
study = joblib.load(r"C:\Users\Xuxu\Desktop\Master Thesis\OptunaEfficientNetB0Full\new_efficientnet_study.pkl")

# plot optimization history
fig1 = vis.plot_optimization_history(study)
fig1.show()

# plot hyperparameter importance
fig2 = vis.plot_param_importances(study)
fig2.show()

# plot parallel coordinates
fig3 = vis.plot_parallel_coordinate(study)
fig3.show()


In [None]:

# Get DataFrame
trials_df = study.trials_dataframe()
trials_df

### Densenet-121 + Best Hyperparameter

In [None]:
# configuration
save_dir = r"C:/Users/Xuxu/Desktop/Master Thesis/BestHyperDensenet121Full"
class_idx_path = r"C:/Users/Xuxu/Desktop/Master Thesis/OptunaDensenetFull/class_to_idx.json"
num_folds = 5

# load class mapping
with open(class_idx_path, "r") as f:
    class_to_idx = json.load(f)
idx_to_class = {v: k for k, v in class_to_idx.items()}
class_names = [idx_to_class[i] for i in range(len(idx_to_class))]

# load predictions across folds
all_preds_all_folds = []
all_targets_all_folds = []

for fold in range(num_folds):
    fold_dir = os.path.join(save_dir, f"fold_{fold}", "version_0")
    preds_path = os.path.join(fold_dir, "all_preds.npy")
    targets_path = os.path.join(fold_dir, "all_targets.npy")

    if os.path.exists(preds_path) and os.path.exists(targets_path):
        all_preds_all_folds.extend(np.load(preds_path))
        all_targets_all_folds.extend(np.load(targets_path))

# convert predictions and targets to numpy arrays
all_preds_all_folds = np.array(all_preds_all_folds)
all_targets_all_folds = np.array(all_targets_all_folds)

# fold-wise final metrics summary
metrics_cols = [
    "train_f1", "train_precision", "train_recall", "train_loss",
    "val_f1", "val_precision", "val_recall", "val_loss"
]
metrics_summary = []

for fold in range(num_folds):
    metrics_file = os.path.join(save_dir, f"fold_{fold}", "version_0", "metrics.csv")
    if os.path.exists(metrics_file):
        df = pd.read_csv(metrics_file)

        df_val = df.dropna(subset=["val_f1"])
        df_train = df.dropna(subset=["train_f1"])

        last_val = df_val.sort_values("epoch").groupby("epoch").tail(1).iloc[-1]
        last_train = df_train.sort_values("epoch").groupby("epoch").tail(1).iloc[-1]

        row = [
            last_train.get("train_f1", np.nan),
            last_train.get("train_precision", np.nan),
            last_train.get("train_recall", np.nan),
            last_train.get("train_loss", np.nan),
            last_val.get("val_f1", np.nan),
            last_val.get("val_precision", np.nan),
            last_val.get("val_recall", np.nan),
            last_val.get("val_loss", np.nan),
        ]
        metrics_summary.append(row)

# convert to dataframe
df_summary = pd.DataFrame(metrics_summary, columns=metrics_cols)
df_summary.index = [f"fold {i}" for i in range(num_folds)]

# display fold-wise metrics summary
print("\n=== fold-wise metrics summary ===")
for idx, row in df_summary.iterrows():
    print(f"{idx}: "
          f"train f1: {row['train_f1']:.4f}, "
          f"val f1: {row['val_f1']:.4f}, "
          f"val precision: {row['val_precision']:.4f}, "
          f"val recall: {row['val_recall']:.4f}, "
          f"val loss: {row['val_loss']:.4f}")

# display mean and standard deviation across folds
print("\n=== mean and standard deviation across folds ===")
mean_std = df_summary.agg(["mean", "std"]).round(4)

print(f"mean train f1: {mean_std.loc['mean', 'train_f1']:.4f}, std: {mean_std.loc['std', 'train_f1']:.4f}")
print(f"mean val f1:   {mean_std.loc['mean', 'val_f1']:.4f}, std: {mean_std.loc['std', 'val_f1']:.4f}")
print(f"mean val precision: {mean_std.loc['mean', 'val_precision']:.4f}, std: {mean_std.loc['std', 'val_precision']:.4f}")
print(f"mean val recall:    {mean_std.loc['mean', 'val_recall']:.4f}, std: {mean_std.loc['std', 'val_recall']:.4f}")
print(f"mean val loss:      {mean_std.loc['mean', 'val_loss']:.4f}, std: {mean_std.loc['std', 'val_loss']:.4f}")




In [None]:
# identify best fold based on final validation f1
best_fold = None
best_val_f1 = -1

for fold in range(num_folds):
    path = os.path.join(save_dir, f"fold_{fold}", "version_0", "metrics.csv")
    if os.path.exists(path):
        df = pd.read_csv(path)
        df_val = df[df["val_f1"].notna()].copy().reset_index(drop=True)
        if not df_val.empty:
            final_val_f1 = df_val.iloc[-1]["val_f1"]
            if final_val_f1 > best_val_f1:
                best_val_f1 = final_val_f1
                best_fold = fold

# plot training and validation f1 for the best fold
if best_fold is not None:
    path = os.path.join(save_dir, f"fold_{best_fold}", "version_0", "metrics.csv")
    df = pd.read_csv(path)
    df_train = df[df["train_f1"].notna()].copy().reset_index(drop=True)
    df_val = df[df["val_f1"].notna()].copy().reset_index(drop=True)

    plt.figure(figsize=(10, 6))
    plt.plot(df_train["epoch"], df_train["train_f1"], label="train f1", color="#2E8B57", linewidth=2.5)
    plt.plot(df_val["epoch"], df_val["val_f1"], label="validation f1", color="#B22222", linewidth=2.5)

    plt.xlabel("epoch", fontsize=14)
    plt.ylabel("f1 score", fontsize=14)
    plt.title(f"training and validation f1 score (best fold {best_fold})", fontsize=18, fontweight="bold")
    plt.xticks(fontsize=12)
    plt.yticks(fontsize=12)
    
    # updated legend position
    plt.legend(fontsize=12, loc="lower right")
    
    plt.tight_layout()
    plt.show()



### EfficientNet-B0 + Best Hyperparameter

In [None]:
# configuration
save_dir = r"C:/Users/Xuxu/Desktop/Master Thesis/BestHyperEfficientNetB0Full"
class_idx_path = r"C:/Users/Xuxu/Desktop/Master Thesis/OptunaDensenetFull/class_to_idx.json"
num_folds = 5

# load class mapping
with open(class_idx_path, "r") as f:
    class_to_idx = json.load(f)
idx_to_class = {v: k for k, v in class_to_idx.items()}
class_names = [idx_to_class[i] for i in range(len(idx_to_class))]

# load predictions across folds
all_preds_all_folds = []
all_targets_all_folds = []

for fold in range(num_folds):
    fold_dir = os.path.join(save_dir, f"fold_{fold}", "version_0")
    preds_path = os.path.join(fold_dir, "all_preds.npy")
    targets_path = os.path.join(fold_dir, "all_targets.npy")

    if os.path.exists(preds_path) and os.path.exists(targets_path):
        all_preds_all_folds.extend(np.load(preds_path))
        all_targets_all_folds.extend(np.load(targets_path))

# convert predictions and targets to numpy arrays
all_preds_all_folds = np.array(all_preds_all_folds)
all_targets_all_folds = np.array(all_targets_all_folds)

# fold-wise final metrics summary
metrics_cols = [
    "train_f1", "train_precision", "train_recall", "train_loss",
    "val_f1", "val_precision", "val_recall", "val_loss"
]
metrics_summary = []

for fold in range(num_folds):
    metrics_file = os.path.join(save_dir, f"fold_{fold}", "version_0", "metrics.csv")
    if os.path.exists(metrics_file):
        df = pd.read_csv(metrics_file)

        df_val = df.dropna(subset=["val_f1"])
        df_train = df.dropna(subset=["train_f1"])

        last_val = df_val.sort_values("epoch").groupby("epoch").tail(1).iloc[-1]
        last_train = df_train.sort_values("epoch").groupby("epoch").tail(1).iloc[-1]

        row = [
            last_train.get("train_f1", np.nan),
            last_train.get("train_precision", np.nan),
            last_train.get("train_recall", np.nan),
            last_train.get("train_loss", np.nan),
            last_val.get("val_f1", np.nan),
            last_val.get("val_precision", np.nan),
            last_val.get("val_recall", np.nan),
            last_val.get("val_loss", np.nan),
        ]
        metrics_summary.append(row)

# convert to dataframe
df_summary = pd.DataFrame(metrics_summary, columns=metrics_cols)
df_summary.index = [f"fold {i}" for i in range(num_folds)]

# display fold-wise metrics summary
print("\nmetrics summary per fold")
for idx, row in df_summary.iterrows():
    print(f"{idx}: "
          f"train f1: {row['train_f1']:.4f}, "
          f"val f1: {row['val_f1']:.4f}, "
          f"val precision: {row['val_precision']:.4f}, "
          f"val recall: {row['val_recall']:.4f}, "
          f"val loss: {row['val_loss']:.4f}")

# display mean and standard deviation across folds
print("\nmean and standard deviation across folds")
mean_std = df_summary.agg(["mean", "std"]).round(4)

print(f"mean train f1: {mean_std.loc['mean', 'train_f1']:.4f}, std: {mean_std.loc['std', 'train_f1']:.4f}")
print(f"mean val f1:   {mean_std.loc['mean', 'val_f1']:.4f}, std: {mean_std.loc['std', 'val_f1']:.4f}")
print(f"mean val precision: {mean_std.loc['mean', 'val_precision']:.4f}, std: {mean_std.loc['std', 'val_precision']:.4f}")
print(f"mean val recall:    {mean_std.loc['mean', 'val_recall']:.4f}, std: {mean_std.loc['std', 'val_recall']:.4f}")
print(f"mean val loss:      {mean_std.loc['mean', 'val_loss']:.4f}, std: {mean_std.loc['std', 'val_loss']:.4f}")




In [None]:
# identify best fold based on final validation f1
best_fold = None
best_val_f1 = -1

for fold in range(num_folds):
    path = os.path.join(save_dir, f"fold_{fold}", "version_0", "metrics.csv")
    if os.path.exists(path):
        df = pd.read_csv(path)
        df_val = df[df["val_f1"].notna()].copy().reset_index(drop=True)
        if not df_val.empty:
            final_val_f1 = df_val.iloc[-1]["val_f1"]
            if final_val_f1 > best_val_f1:
                best_val_f1 = final_val_f1
                best_fold = fold

# plot training and validation f1 for the best fold
if best_fold is not None:
    path = os.path.join(save_dir, f"fold_{best_fold}", "version_0", "metrics.csv")
    df = pd.read_csv(path)
    df_train = df[df["train_f1"].notna()].copy().reset_index(drop=True)
    df_val = df[df["val_f1"].notna()].copy().reset_index(drop=True)

    plt.figure(figsize=(10, 6))
    plt.plot(df_train["epoch"], df_train["train_f1"], label="train f1", color="#2E8B57", linewidth=2.5)
    plt.plot(df_val["epoch"], df_val["val_f1"], label="validation f1", color="#B22222", linewidth=2.5)

    plt.xlabel("epoch", fontsize=14)
    plt.ylabel("f1 score", fontsize=14)
    plt.title(f"training and validation f1 score (best fold {best_fold})", fontsize=18, fontweight="bold")
    plt.xticks(fontsize=12)
    plt.yticks(fontsize=12)
    
    # updated legend position
    plt.legend(fontsize=12, loc="lower right")
    
    plt.tight_layout()
    plt.show()


### ConvNeXtTiny + Best Hyperparameter

In [None]:
# imports
import os
import json
import numpy as np
import pandas as pd

# configuration
save_dir = r"C:/Users/Xuxu/Desktop/Master Thesis/BestHyperConvNeXtFull"
class_idx_path = r"C:/Users/Xuxu/Desktop/Master Thesis/OptunaDensenetFull/class_to_idx.json"
num_folds = 5

# load class mapping
with open(class_idx_path, "r") as f:
    class_to_idx = json.load(f)
idx_to_class = {v: k for k, v in class_to_idx.items()}
class_names = [idx_to_class[i] for i in range(len(idx_to_class))]

# load predictions across folds
all_preds_all_folds = []
all_targets_all_folds = []

for fold in range(num_folds):
    fold_dir = os.path.join(save_dir, f"fold_{fold}", "version_0")
    preds_path = os.path.join(fold_dir, "all_preds.npy")
    targets_path = os.path.join(fold_dir, "all_targets.npy")

    if os.path.exists(preds_path) and os.path.exists(targets_path):
        all_preds_all_folds.extend(np.load(preds_path))
        all_targets_all_folds.extend(np.load(targets_path))

# convert predictions and targets to numpy arrays
all_preds_all_folds = np.array(all_preds_all_folds)
all_targets_all_folds = np.array(all_targets_all_folds)

# fold-wise final metrics summary
metrics_cols = [
    "train_f1", "train_precision", "train_recall", "train_loss",
    "val_f1", "val_precision", "val_recall", "val_loss"
]
metrics_summary = []

for fold in range(num_folds):
    metrics_file = os.path.join(save_dir, f"fold_{fold}", "version_0", "metrics.csv")
    if os.path.exists(metrics_file):
        df = pd.read_csv(metrics_file)

        df_val = df.dropna(subset=["val_f1"])
        df_train = df.dropna(subset=["train_f1"])

        last_val = df_val.sort_values("epoch").groupby("epoch").tail(1).iloc[-1]
        last_train = df_train.sort_values("epoch").groupby("epoch").tail(1).iloc[-1]

        row = [
            last_train.get("train_f1", np.nan),
            last_train.get("train_precision", np.nan),
            last_train.get("train_recall", np.nan),
            last_train.get("train_loss", np.nan),
            last_val.get("val_f1", np.nan),
            last_val.get("val_precision", np.nan),
            last_val.get("val_recall", np.nan),
            last_val.get("val_loss", np.nan),
        ]
        metrics_summary.append(row)

# convert to dataframe
df_summary = pd.DataFrame(metrics_summary, columns=metrics_cols)
df_summary.index = [f"fold {i}" for i in range(num_folds)]

# display fold-wise metrics summary
print("\nmetrics summary per fold")
for idx, row in df_summary.iterrows():
    print(f"{idx}: "
          f"train f1: {row['train_f1']:.4f}, "
          f"val f1: {row['val_f1']:.4f}, "
          f"val precision: {row['val_precision']:.4f}, "
          f"val recall: {row['val_recall']:.4f}, "
          f"val loss: {row['val_loss']:.4f}")

# display mean and standard deviation across folds
print("\nmean and standard deviation across folds")
mean_std = df_summary.agg(["mean", "std"]).round(4)

print(f"mean train f1: {mean_std.loc['mean', 'train_f1']:.4f}, std: {mean_std.loc['std', 'train_f1']:.4f}")
print(f"mean val f1:   {mean_std.loc['mean', 'val_f1']:.4f}, std: {mean_std.loc['std', 'val_f1']:.4f}")
print(f"mean val precision: {mean_std.loc['mean', 'val_precision']:.4f}, std: {mean_std.loc['std', 'val_precision']:.4f}")
print(f"mean val recall:    {mean_std.loc['mean', 'val_recall']:.4f}, std: {mean_std.loc['std', 'val_recall']:.4f}")
print(f"mean val loss:      {mean_std.loc['mean', 'val_loss']:.4f}, std: {mean_std.loc['std', 'val_loss']:.4f}")


In [None]:
# identify best fold based on final validation f1
best_fold = None
best_val_f1 = -1

for fold in range(num_folds):
    path = os.path.join(save_dir, f"fold_{fold}", "version_0", "metrics.csv")
    if os.path.exists(path):
        df = pd.read_csv(path)
        df_val = df[df["val_f1"].notna()].copy().reset_index(drop=True)
        if not df_val.empty:
            final_val_f1 = df_val.iloc[-1]["val_f1"]
            if final_val_f1 > best_val_f1:
                best_val_f1 = final_val_f1
                best_fold = fold

# plot training and validation f1 for the best fold
if best_fold is not None:
    path = os.path.join(save_dir, f"fold_{best_fold}", "version_0", "metrics.csv")
    df = pd.read_csv(path)
    df_train = df[df["train_f1"].notna()].copy().reset_index(drop=True)
    df_val = df[df["val_f1"].notna()].copy().reset_index(drop=True)

    plt.figure(figsize=(10, 6))
    plt.plot(df_train["epoch"], df_train["train_f1"], label="train f1", color="#2E8B57", linewidth=2.5)
    plt.plot(df_val["epoch"], df_val["val_f1"], label="validation f1", color="#B22222", linewidth=2.5)

    plt.xlabel("epoch", fontsize=14)
    plt.ylabel("f1 score", fontsize=14)
    plt.title(f"training and validation f1 score (best fold {best_fold})", fontsize=18, fontweight="bold")
    plt.xticks(fontsize=12)
    plt.yticks(fontsize=12)
    
    # updated legend position
    plt.legend(fontsize=12, loc="lower right")
    
    plt.tight_layout()
    plt.show()


### ConvNext-Tiny(Supervised)

In [None]:

# paths
version_dir = r"C:/Users/Xuxu/Desktop/Master Thesis/SupervisedBaselineVer2Epoch100/single_run/version_0"
index_dir = r"C:/Users/Xuxu/Desktop/Master Thesis/OptunaDensenetFull"
save_dir = os.path.join(version_dir, "figures")

# create save_dir if needed
os.makedirs(save_dir, exist_ok=True)

# file paths
preds_path = os.path.join(version_dir, "all_preds.npy")
targets_path = os.path.join(version_dir, "all_targets.npy")
probs_path = os.path.join(version_dir, "all_probs.npy")
metrics_path = os.path.join(version_dir, "test_metrics.json")

# load class mapping
with open(os.path.join(index_dir, "class_to_idx.json")) as f:
    class_to_idx = json.load(f)
idx_to_class = {v: k for k, v in class_to_idx.items()}
class_names = [idx_to_class[i] for i in range(len(idx_to_class))]

# load predictions and targets
all_preds = np.load(preds_path)
all_targets = np.load(targets_path)
all_probs = np.load(probs_path)

# classification report
print("\n# classification report")
print(classification_report(all_targets, all_preds, target_names=class_names, digits=4))

# confusion matrix 
cm = confusion_matrix(all_targets, all_preds)
plt.figure(figsize=(8, 6), dpi=300)
sns.heatmap(
    cm,
    annot=True,
    fmt="d",
    cmap="Greens",
    xticklabels=class_names,
    yticklabels=class_names,
    annot_kws={"fontsize": 6}
)
plt.title("convnext-tiny (supervised) – confusion matrix", fontsize=14)
plt.xlabel("predicted label", fontsize=12)
plt.ylabel("true label", fontsize=12)
plt.xticks(rotation=90, ha='right', fontsize=8)
plt.yticks(rotation=0, fontsize=8)
plt.tight_layout(pad=1.0)
plt.savefig(os.path.join(save_dir, "confusion_matrix_absolute_cleaned.png"), bbox_inches="tight")
plt.show()

# roc curves (multi-class)
n_classes = len(class_names)
y_true_bin = label_binarize(all_targets, classes=list(range(n_classes)))

# compute roc and auc
fpr, tpr, roc_auc = {}, {}, {}
for i in range(n_classes):
    fpr[i], tpr[i], _ = roc_curve(y_true_bin[:, i], all_probs[:, i])
    roc_auc[i] = auc(fpr[i], tpr[i])

# micro-average
fpr["micro"], tpr["micro"], _ = roc_curve(y_true_bin.ravel(), all_probs.ravel())
roc_auc["micro"] = auc(fpr["micro"], tpr["micro"])

# macro-average
all_fpr = np.unique(np.concatenate([fpr[i] for i in range(n_classes)]))
mean_tpr = np.zeros_like(all_fpr)
for i in range(n_classes):
    mean_tpr += np.interp(all_fpr, fpr[i], tpr[i])
mean_tpr /= n_classes
fpr["macro"] = all_fpr
tpr["macro"] = mean_tpr
roc_auc["macro"] = auc(fpr["macro"], tpr["macro"])

# save auc values
roc_auc_json = {class_names[i]: roc_auc[i] for i in range(n_classes)}
roc_auc_json["micro"] = roc_auc["micro"]
roc_auc_json["macro"] = roc_auc["macro"]
with open(os.path.join(save_dir, "roc_auc_scores.json"), "w") as f:
    json.dump(roc_auc_json, f, indent=4)

# plot roc curves
plt.figure(figsize=(7, 6), dpi=300)
for i in range(n_classes):
    plt.plot(fpr[i], tpr[i], label=f"{class_names[i]} (auc = {roc_auc[i]:.2f})", linewidth=1.5)
plt.plot(fpr["micro"], tpr["micro"], label=f"micro-average (auc = {roc_auc['micro']:.2f})", color='deeppink', linestyle=':', linewidth=2)
plt.plot(fpr["macro"], tpr["macro"], label=f"macro-average (auc = {roc_auc['macro']:.2f})", color='navy', linestyle='-.', linewidth=2)
plt.title("convnext-tiny (supervised) – multi-class roc curve", fontsize=14)
plt.xlabel("false positive rate", fontsize=12)
plt.ylabel("true positive rate", fontsize=12)
plt.legend(loc="lower right", fontsize=7)
plt.xticks(fontsize=10)
plt.yticks(fontsize=10)
plt.tight_layout(pad=0.5)
plt.savefig(os.path.join(save_dir, "roc_curve_multiclass.png"), bbox_inches="tight")
plt.show()


In [None]:
# load training metrics and drop any rows missing training loss or f1
df = pd.read_csv(metrics_path)
df_epoch = df.dropna(subset=["train_loss", "train_f1"]).reset_index(drop=True)

# load predicted and true labels, then calculate macro f1-score
all_preds = np.load(preds_path)
all_targets = np.load(targets_path)
report = classification_report(all_targets, all_preds, output_dict=True, zero_division=0)
macro_f1 = report["macro avg"]["f1-score"]

# create plot for training and test metrics
plt.figure(figsize=(10, 6))

# plot training loss over epochs
plt.plot(
    df_epoch["epoch"],
    df_epoch["train_loss"],
    label="training loss",
    linestyle="-",
    color="#8B4513",  # saddlebrown
    linewidth=2.5
)

# plot training f1 score over epochs
plt.plot(
    df_epoch["epoch"],
    df_epoch["train_f1"],
    label="training f1",
    linestyle="-",
    color="#228B22",  # forestgreen
    linewidth=2.5
)

# plot horizontal line for final test loss if available
if "test_loss" in df.columns and not df["test_loss"].isnull().all():
    test_loss = df["test_loss"].dropna().values[-1]
    plt.hlines(
        y=test_loss,
        xmin=df_epoch["epoch"].min(),
        xmax=df_epoch["epoch"].max(),
        label=f"final test loss ({test_loss:.4f})",
        colors="#A0522D",  # sienna
        linestyles="--",
        linewidth=2.0
    )

# plot horizontal line for final test macro f1 score
plt.axhline(
    y=macro_f1,
    xmin=0,
    xmax=1,
    label=f"final test macro f1 ({macro_f1:.4f})",
    color="#006400",  # darkgreen
    linestyle="--",
    linewidth=2.0
)

# format axes and layout
plt.xlabel("epoch", fontsize=14)
plt.ylabel("metric value", fontsize=14)
plt.title("convnext-tiny (supervised) – training and test metrics", fontsize=16)
plt.xticks(fontsize=12)
plt.yticks(fontsize=12)
plt.legend(fontsize=12, loc="upper right")
plt.tight_layout()

# save the plot and display it
save_path = os.path.join(save_dir, "convnext_training_metrics.png")
plt.savefig(save_path, dpi=300, bbox_inches="tight")
plt.show()



### ConvNext-Tiny(BYOL)

In [None]:
# paths
version_dir = r"C:/Users/Xuxu/Desktop/Master Thesis/BYOLBaselineVer2Epoch100/single_run/version_0"
index_dir = r"C:/Users/Xuxu/Desktop/Master Thesis/OptunaDensenetFull"
save_dir = os.path.join(version_dir, "figures")
os.makedirs(save_dir, exist_ok=True)

preds_path = os.path.join(version_dir, "all_preds.npy")
targets_path = os.path.join(version_dir, "all_targets.npy")
probs_path = os.path.join(version_dir, "all_probs.npy")
metrics_path = os.path.join(version_dir, "test_metrics.json")

# load class names
with open(os.path.join(index_dir, "class_to_idx.json")) as f:
    class_to_idx = json.load(f)
idx_to_class = {v: k for k, v in class_to_idx.items()}
class_names = [idx_to_class[i] for i in range(len(idx_to_class))]

# load predictions and test metrics
all_preds = np.load(preds_path)
all_targets = np.load(targets_path)
all_probs = np.load(probs_path)

print("\n# classification report")
print(classification_report(all_targets, all_preds, target_names=class_names, digits=4))

# confusion matrix (absolute)
cm = confusion_matrix(all_targets, all_preds)
plt.figure(figsize=(8, 6), dpi=300)
sns.heatmap(cm, annot=True, fmt="d", cmap="Greens",
            xticklabels=class_names, yticklabels=class_names,
            annot_kws={"fontsize": 6})
plt.title("convnext-tiny (byol) – confusion matrix", fontsize=14)
plt.xlabel("predicted label", fontsize=12)
plt.ylabel("true label", fontsize=12)
plt.xticks(rotation=90, ha='right', fontsize=8)
plt.yticks(rotation=0, fontsize=8)
plt.tight_layout(pad=1.0)
plt.savefig(os.path.join(save_dir, "confusion_matrix_absolute_cleaned.png"), bbox_inches="tight")
plt.show()

# roc curve setup
n_classes = len(class_names)
y_true_bin = label_binarize(all_targets, classes=list(range(n_classes)))

# compute per-class roc and auc
fpr, tpr, roc_auc = {}, {}, {}
for i in range(n_classes):
    fpr[i], tpr[i], _ = roc_curve(y_true_bin[:, i], all_probs[:, i])
    roc_auc[i] = auc(fpr[i], tpr[i])

# micro-average
fpr["micro"], tpr["micro"], _ = roc_curve(y_true_bin.ravel(), all_probs.ravel())
roc_auc["micro"] = auc(fpr["micro"], tpr["micro"])

# macro-average
all_fpr = np.unique(np.concatenate([fpr[i] for i in range(n_classes)]))
mean_tpr = np.zeros_like(all_fpr)
for i in range(n_classes):
    mean_tpr += np.interp(all_fpr, fpr[i], tpr[i])
mean_tpr /= n_classes
fpr["macro"] = all_fpr
tpr["macro"] = mean_tpr
roc_auc["macro"] = auc(fpr["macro"], tpr["macro"])

# save auc scores
roc_auc_json = {class_names[i]: roc_auc[i] for i in range(n_classes)}
roc_auc_json["micro"] = roc_auc["micro"]
roc_auc_json["macro"] = roc_auc["macro"]
with open(os.path.join(save_dir, "roc_auc_scores.json"), "w") as f:
    json.dump(roc_auc_json, f, indent=4)

# plot roc curves
plt.figure(figsize=(7, 6), dpi=300)
for i in range(n_classes):
    plt.plot(fpr[i], tpr[i],
             label=f"{c


In [None]:
# load training metrics and filter rows with valid loss and f1 values
df = pd.read_csv(metrics_path)
df_epoch = df.dropna(subset=["train_loss", "train_f1"]).reset_index(drop=True)

# load predicted and true labels, then compute macro-averaged f1 score
all_preds = np.load(preds_path)
all_targets = np.load(targets_path)
report = classification_report(all_targets, all_preds, output_dict=True, zero_division=0)
macro_f1 = report["macro avg"]["f1-score"]

# initialize the plot for training and test metrics
plt.figure(figsize=(10, 6))

# plot training loss across epochs
plt.plot(
    df_epoch["epoch"],
    df_epoch["train_loss"],
    label="training loss",
    linestyle="-",
    color="#8B4513",  # saddlebrown
    linewidth=2.5
)

# plot training f1 score across epochs
plt.plot(
    df_epoch["epoch"],
    df_epoch["train_f1"],
    label="training f1",
    linestyle="-",
    color="#228B22",  # forestgreen
    linewidth=2.5
)

# draw horizontal line for final test loss if available
if "test_loss" in df.columns and not df["test_loss"].isnull().all():
    test_loss = df["test_loss"].dropna().values[-1]
    plt.hlines(
        y=test_loss,
        xmin=df_epoch["epoch"].min(),
        xmax=df_epoch["epoch"].max(),
        label=f"final test loss ({test_loss:.4f})",
        colors="#A0522D",  # sienna
        linestyles="--",
        linewidth=2.0
    )

# draw horizontal line for final macro-averaged test f1 score
plt.axhline(
    y=macro_f1,
    xmin=0,
    xmax=1,
    label=f"final test macro f1 ({macro_f1:.4f})",
    color="#006400",  # darkgreen
    linestyle="--",
    linewidth=2.0
)

# set axis labels and title
plt.xlabel("epoch", fontsize=14)
plt.ylabel("metric value", fontsize=14)
plt.title("convnext-tiny (by


### Supervised ConvNextTiny + SimCLR

In [None]:


# Define paths
VERSION_DIR = r"C:/Users/Xuxu/Desktop/Master Thesis/SIMCLRBaselineEpoch100/single_run/version_0"
INDEX_DIR = r"C:/Users/Xuxu/Desktop/Master Thesis/OptunaDensenetFull"
SAVE_DIR = os.path.join(VERSION_DIR, "figures")

# Create directory to save figures if it doesn't exist
os.makedirs(SAVE_DIR, exist_ok=True)

# Define file paths
PREDS_PATH = os.path.join(VERSION_DIR, "all_preds.npy")
TARGETS_PATH = os.path.join(VERSION_DIR, "all_targets.npy")
PROBS_PATH = os.path.join(VERSION_DIR, "all_probs.npy")
METRICS_PATH = os.path.join(VERSION_DIR, "test_metrics.json")

# Load class names
with open(os.path.join(INDEX_DIR, "class_to_idx.json")) as f:
    class_to_idx = json.load(f)
idx_to_class = {v: k for k, v in class_to_idx.items()}
class_names = [idx_to_class[i] for i in range(len(idx_to_class))]

# Load predictions, targets, and probabilities
all_preds = np.load(PREDS_PATH)
all_targets = np.load(TARGETS_PATH)
all_probs = np.load(PROBS_PATH)


# Print classification report
print("\n# Classification Report")
print(classification_report(all_targets, all_preds, target_names=class_names, digits=4))

# Plot confusion matrix (absolute counts)
cm = confusion_matrix(all_targets, all_preds)
plt.figure(figsize=(8, 6), dpi=300)
sns.heatmap(
    cm,
    annot=True,
    fmt="d",
    cmap="Greens",
    xticklabels=class_names,
    yticklabels=class_names,
    annot_kws={"fontsize": 6}
)
plt.title("ConvNeXt-Tiny (SimCLR) – Confusion Matrix", fontsize=14)
plt.xlabel("Predicted Label", fontsize=12)
plt.ylabel("True Label", fontsize=12)
plt.xticks(rotation=90, ha='right', fontsize=8)
plt.yticks(rotation=0, fontsize=8)
plt.tight_layout(pad=1.0)
plt.savefig(os.path.join(SAVE_DIR, "confusion_matrix_absolute_cleaned.png"), bbox_inches="tight")
plt.show()

# Prepare for ROC Curve (multi-class)
n_classes = len(class_names)
y_true_bin = label_binarize(all_targets, classes=list(range(n_classes)))

# Compute ROC curve and AUC for each class
fpr, tpr, roc_auc = {}, {}, {}
for i in range(n_classes):
    fpr[i], tpr[i], _ = roc_curve(y_true_bin[:, i], all_probs[:, i])
    roc_auc[i] = auc(fpr[i], tpr[i])

# Compute micro-average ROC curve and AUC
fpr["micro"], tpr["micro"], _ = roc_curve(y_true_bin.ravel(), all_probs.ravel())
roc_auc["micro"] = auc(fpr["micro"], tpr["micro"])

# Compute macro-average ROC curve and AUC
all_fpr = np.unique(np.concatenate([fpr[i] for i in range(n_classes)]))
mean_tpr = np.zeros_like(all_fpr)
for i in range(n_classes):
    mean_tpr += np.interp(all_fpr, fpr[i], tpr[i])
mean_tpr /= n_classes
fpr["macro"] = all_fpr
tpr["macro"] = mean_tpr
roc_auc["macro"] = auc(fpr["macro"], tpr["macro"])

# Plot ROC curves
plt.figure(figsize=(7, 6), dpi=300)

# Plot ROC for each class
for i in range(n_classes):
    plt.plot(fpr[i], tpr[i],
             label=f"{class_names[i]} (AUC = {roc_auc[i]:.2f})",
             linewidth=1.5)

# Plot micro- and macro-average ROC
plt.plot(fpr["micro"], tpr["micro"],
         label=f"Micro-average (AUC = {roc_auc['micro']:.2f})",
         color='deeppink', linestyle=':', linewidth=2)
plt.plot(fpr["macro"], tpr["macro"],
         label=f"Macro-average (AUC = {roc_auc['macro']:.2f})",
         color='navy', linestyle='-.', linewidth=2)

# Finalize ROC plot
plt.title("ConvNeXt-Tiny (SimCLR) – Multi-Class ROC Curve", fontsize=14)
plt.xlabel("False Positive Rate", fontsize=12)
plt.ylabel("True Positive Rate", fontsize=12)
plt.legend(loc="lower right", fontsize=7)
plt.xticks(fontsize=10)
plt.yticks(fontsize=10)
plt.tight_layout(pad=0.5)
plt.savefig(os.path.join(SAVE_DIR, "roc_curve_byol.png"), bbox_inches="tight", dpi=300)
plt.show()


In [None]:
# load training metrics and keep only rows with valid training loss and f1 score
df = pd.read_csv(metrics_path)
df_epoch = df.dropna(subset=["train_loss", "train_f1"]).reset_index(drop=True)

# load predicted and true labels, then compute macro-averaged f1 score
all_preds = np.load(preds_path)
all_targets = np.load(targets_path)
report = classification_report(all_targets, all_preds, output_dict=True, zero_division=0)
macro_f1 = report["macro avg"]["f1-score"]

# create the plot for training and test metrics
plt.figure(figsize=(10, 6))

# plot training loss across epochs
plt.plot(
    df_epoch["epoch"],
    df_epoch["train_loss"],
    label="training loss",
    linestyle="-",
    color="#8B4513",  # saddlebrown
    linewidth=2.5
)

# plot training f1 score across epochs
plt.plot(
    df_epoch["epoch"],
    df_epoch["train_f1"],
    label="training f1",
    linestyle="-",
    color="#228B22",  # forestgreen
    linewidth=2.5
)

# plot horizontal line for final test loss if available
if "test_loss" in df.columns and not df["test_loss"].isnull().all():
    test_loss = df["test_loss"].dropna().values[-1]
    plt.hlines(
        y=test_loss,
        xmin=df_epoch["epoch"].min(),
        xmax=df_epoch["epoch"].max(),
        label=f"final test loss ({test_loss:.4f})",
        colors="#A0522D",  # sienna
        linestyles="--",
        linewidth=2.0
    )

# plot horizontal line for final macro-averaged test f1 score
plt.axhline(
    y=macro_f1,
    xmin=0,
    xmax=1,
    label=f"final test macro f1 ({macro_f1:.4f})",
    color="#006400",  # darkgreen
    linestyle="--",
    linewidth=2.0
)

# set axis labels and plot title
plt.xlabel("epoch", fontsize=14)
plt.ylabel("metric value", fontsize=14)
plt.title("convnext-tiny (simclr) – training and test metrics", fontsize=16)

# customize tick size and legend
plt.xticks(fontsize=12)
plt.yticks(fontsize=12)
plt.legend(fontsize=12, loc="upper right")
plt.tight_layout()

# save the plot to file and display it
save_path = os.path.join(save_dir, "convnext_training_metrics.png")
plt.savefig(save_path, dpi=300, bbox_inches="tight")
plt.show()
