In [None]:
import os
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
from tqdm import tqdm
from ultralytics import YOLO
import tqdm
import cv2
import numpy as np

model = YOLO("./yolo_training/962/weights/best.pt")

In [None]:
path = "../../Dataset/isic/HAM10000/cheat/test/"
label_trans = {'AKIEC': 0, 'BCC': 1, 'BKL': 2, 'DF': 3, 'MEL': 4, 'NV': 5, 'VASC': 6}
data, label = [], []
for types in os.listdir(path):
    local_path = path + types + "/"
    for i in tqdm.tqdm(os.listdir(local_path)):
        img_path = local_path + i
        img = cv2.imread(img_path)
        data.append(img)
        label.append(label_trans[types])
data, label = np.array(data), np.array(label)
label_catagorical = np.eye(7)[label]

In [None]:
predict_proba, predict_numerical = [], []
for i in tqdm.tqdm(data):
    pre = model.predict(i, verbose=False)
    predict_proba.append(pre[0].probs.data.cpu().numpy())
    predict_numerical.append(pre[0].probs.top1)
predict_proba = np.array(predict_proba)
predict_numerical = np.array(predict_numerical)
print(predict_proba.shape, predict_numerical.shape)

In [None]:
import pandas as pd
from sklearn.metrics import accuracy_score, balanced_accuracy_score, matthews_corrcoef, jaccard_score, f1_score, precision_score, roc_auc_score
from itertools import combinations
import numpy as np

class_names = ['AKIEC', 'BCC', 'BKL', 'DF', 'MEL', 'NV', 'VASC']

columns = ["Top-1 Accuracy", "Balanced Accuracy", "MCC", "Jaccard Score", "F1-score", "Precision", "OvO AUC"]
index = class_names + ["Overall"]
results = pd.DataFrame(index=index, columns=columns)

for i, class_name in enumerate(class_names):
    true_binary = (label == i).astype(int)
    pred_binary = (predict_numerical == i).astype(int)

    results.loc[class_name, "Top-1 Accuracy"] = accuracy_score(true_binary, pred_binary)
    results.loc[class_name, "Balanced Accuracy"] = balanced_accuracy_score(true_binary, pred_binary)
    results.loc[class_name, "MCC"] = matthews_corrcoef(true_binary, pred_binary)
    results.loc[class_name, "Jaccard Score"] = jaccard_score(true_binary, pred_binary)
    results.loc[class_name, "F1-score"] = f1_score(true_binary, pred_binary)
    results.loc[class_name, "Precision"] = precision_score(true_binary, pred_binary)

results.loc["Overall", "Top-1 Accuracy"] = accuracy_score(label, predict_numerical)
results.loc["Overall", "Balanced Accuracy"] = balanced_accuracy_score(label, predict_numerical)
results.loc["Overall", "MCC"] = matthews_corrcoef(label, predict_numerical)
results.loc["Overall", "Jaccard Score"] = jaccard_score(label, predict_numerical, average='macro')
results.loc["Overall", "F1-score"] = f1_score(label, predict_numerical, average='weighted')  # 加權
results.loc["Overall", "Precision"] = precision_score(label, predict_numerical, average='weighted')  # 加權

ovo_auc_per_class = {class_name: [] for class_name in class_names}
ovo_auc_scores = []

for class1, class2 in combinations(range(len(class_names)), 2):
    mask = (label == class1) | (label == class2)
    
    if np.sum(mask) < 2: 
        continue

    y_true_binary = (label[mask] == class1).astype(int)
    y_score_binary = predict_proba[mask, class1]

    try:
        auc_score = roc_auc_score(y_true_binary, y_score_binary)
        ovo_auc_scores.append(auc_score)

        ovo_auc_per_class[class_names[class1]].append(auc_score)
        ovo_auc_per_class[class_names[class2]].append(auc_score)

    except ValueError:
        continue

for class_name in class_names:
    if ovo_auc_per_class[class_name]:
        results.loc[class_name, "OvO AUC"] = np.mean(ovo_auc_per_class[class_name])


results.loc["Overall", "OvO AUC"] = np.mean(ovo_auc_scores)


In [None]:
results.astype("float32").to_csv("./overall_diseace.csv")

In [None]:
import os, cv2, tqdm, numpy as np, pandas as pd
from itertools import combinations
from collections import OrderedDict
from sklearn.metrics import (accuracy_score, balanced_accuracy_score, matthews_corrcoef,
                             jaccard_score, f1_score, precision_score, roc_auc_score)

DATA_DIR = "../../Dataset/isic/HAM10000/cheat/test/"
CLASS_NAMES = ['AKIEC', 'BCC', 'BKL', 'DF', 'MEL', 'NV', 'VASC']
LABEL_MAP   = {c:i for i,c in enumerate(CLASS_NAMES)}

BLUR_SETTINGS = OrderedDict({
    "original": None,
    "bilateral_d5":  {"d": 5,  "sigmaColor": 25,  "sigmaSpace": 25},
    "bilateral_d9":  {"d": 9,  "sigmaColor": 75,  "sigmaSpace": 75},
})

def load_dataset(data_dir, label_map):
    imgs, labels = [], []
    for cls in CLASS_NAMES:
        cls_dir = os.path.join(data_dir, cls)
        files = sorted(os.listdir(cls_dir))
        for fn in tqdm.tqdm(files, desc=f"Loading {cls:>5}", leave=False):
            img = cv2.imread(os.path.join(cls_dir, fn)) 
            imgs.append(img)
            labels.append(label_map[cls])
    return imgs, np.array(labels)

def apply_bilateral(img, params):
    if params is None:
        return img
    return cv2.bilateralFilter(img, d=params["d"],
                               sigmaColor=params["sigmaColor"],
                               sigmaSpace=params["sigmaSpace"])

def batch_predict(model, images):
    proba_list, top1_list = [], []
    for img in tqdm.tqdm(images, desc="Inference", leave=False):
        img_in = img
        pred = model.predict(img_in, verbose=False)
        probs = pred[0].probs.data.cpu().numpy()          # [C]
        proba_list.append(probs)
        top1_list.append(pred[0].probs.top1)              # int
    return np.vstack(proba_list), np.array(top1_list)

def metrics_per_condition(labels, y_pred, y_proba):
    cols = ["Top-1 Accuracy","Balanced Accuracy","MCC","Jaccard Score",
            "F1-score","Precision","OvO AUC"]
    idx  = CLASS_NAMES + ["Overall"]
    df   = pd.DataFrame(index=idx, columns=cols, dtype=float)

    for i, cls in enumerate(CLASS_NAMES):
        y_true_bin = (labels == i).astype(int)
        y_pred_bin = (y_pred == i).astype(int)
        df.loc[cls, "Top-1 Accuracy"]   = accuracy_score(y_true_bin, y_pred_bin)
        df.loc[cls, "Balanced Accuracy"]= balanced_accuracy_score(y_true_bin, y_pred_bin)
        df.loc[cls, "MCC"]              = matthews_corrcoef(y_true_bin, y_pred_bin)
        df.loc[cls, "Jaccard Score"]    = jaccard_score(y_true_bin, y_pred_bin, zero_division=0)
        df.loc[cls, "F1-score"]         = f1_score(y_true_bin, y_pred_bin, zero_division=0)
        df.loc[cls, "Precision"]        = precision_score(y_true_bin, y_pred_bin, zero_division=0)

    df.loc["Overall","Top-1 Accuracy"]    = accuracy_score(labels, y_pred)
    df.loc["Overall","Balanced Accuracy"] = balanced_accuracy_score(labels, y_pred)
    df.loc["Overall","MCC"]               = matthews_corrcoef(labels, y_pred)
    df.loc["Overall","Jaccard Score"]     = jaccard_score(labels, y_pred, average='macro', zero_division=0)
    df.loc["Overall","F1-score"]          = f1_score(labels, y_pred, average='weighted', zero_division=0)
    df.loc["Overall","Precision"]         = precision_score(labels, y_pred, average='weighted', zero_division=0)

    ovo_scores = []
    per_cls_aucs = {c:[] for c in CLASS_NAMES}
    num_classes = len(CLASS_NAMES)
    for c1, c2 in combinations(range(num_classes), 2):
        mask = (labels == c1) | (labels == c2)
        if mask.sum() < 2: 
            continue
        y_true = (labels[mask] == c1).astype(int)
        y_scr  = y_proba[mask, c1] 
        try:
            auc = roc_auc_score(y_true, y_scr)
            ovo_scores.append(auc)
            per_cls_aucs[CLASS_NAMES[c1]].append(auc)
            per_cls_aucs[CLASS_NAMES[c2]].append(auc)
        except ValueError:
            pass
    for cls in CLASS_NAMES:
        if per_cls_aucs[cls]:
            df.loc[cls, "OvO AUC"] = float(np.mean(per_cls_aucs[cls]))
    if ovo_scores:
        df.loc["Overall","OvO AUC"] = float(np.mean(ovo_scores))
    return df

def evaluate_under_blurs(model, data_dir=DATA_DIR, blur_settings=BLUR_SETTINGS):
    images, labels = load_dataset(data_dir, LABEL_MAP) 
    all_results, summary_rows = {}, []

    for name, params in blur_settings.items():
        pbar = tqdm.tqdm(total=len(images)*2, desc=f"[{name}] preparing", dynamic_ncols=True)
        imgs_blur = []
        for img in images:
            imgs_blur.append(apply_bilateral(img, params))
            pbar.update(1)
            if pbar.n % 50 == 0:
                pbar.set_description(f"[{name}] blurring")

        proba_list, top1_list = [], []
        for img in imgs_blur:
            pred = model.predict(img, verbose=False)
            probs = pred[0].probs.data.cpu().numpy()
            proba_list.append(probs)
            top1_list.append(pred[0].probs.top1)
            pbar.update(1)
            if pbar.n % 50 == 0:
                pbar.set_description(f"[{name}] inference")
        pbar.close()

        proba = np.vstack(proba_list)
        preds  = np.array(top1_list)
        df = metrics_per_condition(labels, preds, proba)
        all_results[name] = df
        summary_rows.append((name, df.loc["Overall"]))

    summary = pd.DataFrame({k: v for k, v in summary_rows}).T
    print("\n=== Overall summary across conditions ===")
    print(summary.round(4))
    return all_results, summary

all_results, summary = evaluate_under_blurs(model)
summary.to_csv("metrics_summary_blurs.csv", index=True)
all_results["original"].to_csv("metrics_perclass_original.csv")
all_results["bilateral_d15"].to_csv("metrics_perclass_bilateral_d15.csv")


In [None]:
from sklearn.metrics import accuracy_score, balanced_accuracy_score, confusion_matrix, ConfusionMatrixDisplay
import matplotlib.pyplot as plt
import seaborn as sns

sns.set(font_scale=1.5)
sns.set_style("white")

class_names = ['AKIEC', 'BCC', 'BKL', 'DF', 'MEL', 'NV', 'VASC']

print(balanced_accuracy_score(label, predict_numerical))
plt.figure(figsize=(10,10))
ax = plt.subplot(111)
cm = confusion_matrix(label, predict_numerical)
ConfusionMatrixDisplay(cm, display_labels=class_names).plot(ax=ax, cmap="Blues")
plt.xlabel("Predicted label", weight="bold", fontsize=23)
plt.ylabel("True label", weight="bold", fontsize=23)
plt.xticks(weight="bold")
plt.yticks(weight="bold")
plt.title("Confusion Matrix", weight="bold", fontsize=25)
plt.savefig("./confusion.png", dpi=300, bbox_inches="tight")
plt.show()

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import roc_curve, auc
from sklearn.preprocessing import label_binarize
import seaborn as sns

sns.set(font_scale=1.5)
sns.set_style("white")
class_names = ['AKIEC', 'BCC', 'BKL', 'DF', 'MEL', 'NV', 'VASC']

y_true = label_catagorical.copy()
y_score = predict_proba  

plt.figure(figsize=(10, 8))
colors = plt.cm.Set2(np.linspace(0, 1, len(class_names)))

auc_values = []
tpr_values_macro = []
tpr_values_micro = []

for i, color in zip(range(len(class_names)), colors):
    fpr, tpr, _ = roc_curve(y_true[:, i], y_score[:, i])
    roc_auc = auc(fpr, tpr)
    auc_values.append(roc_auc)
    tpr_values_macro.append(np.interp(np.linspace(0, 1, 100), fpr, tpr))  # 統一插值
    plt.plot(fpr, tpr, color=color, alpha=0.5, label=f'{class_names[i]} (AUC = {roc_auc:.3f})')

fpr_micro, tpr_micro, _ = roc_curve(y_true.ravel(), y_score.ravel())
roc_auc_micro = auc(fpr_micro, tpr_micro)

tpr_values_micro = np.array([np.interp(np.linspace(0, 1, 100), fpr_micro, tpr_micro)])

plt.plot(fpr_micro, tpr_micro, label=f'Micro-Average (AUC = {roc_auc_micro:.3f})', color='blue', linewidth=2)

all_fpr = np.linspace(0, 1, 100)
mean_tpr_macro = np.mean(tpr_values_macro, axis=0)
std_tpr_macro = np.std(tpr_values_macro, axis=0)
roc_auc_macro = auc(all_fpr, mean_tpr_macro)

plt.plot(all_fpr, mean_tpr_macro, label=f'Macro-Average (AUC = {roc_auc_macro:.3f})', color='red', linewidth=2)


plt.plot([0, 1], [0, 1], 'k--', lw=2)

plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC Curves with Micro and Macro AUC')
plt.legend(loc='lower right')
plt.grid()
plt.show()

from sklearn.metrics import precision_recall_curve, average_precision_score

plt.figure(figsize=(8, 6))
for i in range(len(class_names)):
    precision, recall, _ = precision_recall_curve(y_true[:, i], y_score[:, i])
    ap = average_precision_score(y_true[:, i], y_score[:, i])
    plt.plot(recall, precision, label=f"{class_names[i]} (AP={ap:.3f})")

plt.xlabel("Recall")
plt.ylabel("Precision")
plt.title("Precision-Recall Curve")
plt.legend()
plt.grid()
plt.show()



In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

our_model = {
    "Model": "YOLOv11s + Full Enhancements",
    "Accuracy": 96.10,
    "AUC": 0.9996,
    "Average Precision": 0.9625, 
    "Extra Training Data": False,
    "Year": 2025
}

other_models = [
    {"Model": "FixCaps", "Accuracy": 96.49, "AUC": "-", "Average Precision": "-", "Extra Training Data": True, "Year": 2022},
    {"Model": "IRv2+Soft Attention", "Accuracy": 93.4, "AUC": 0.984, "Average Precision": 0.937, "Extra Training Data": True, "Year": 2021},
    {"Model": "Multi-Resolution Efficient Nets", "Accuracy": 92.6, "AUC": "-", "Average Precision": "-", "Extra Training Data": True, "Year": 2020},
    {"Model": "Two-Path CNN", "Accuracy": 88.6, "AUC": "-", "Average Precision": "-", "Extra Training Data": True, "Year": 2020},
    {"Model": "ISIC 2019 Analysis", "Accuracy": 85.1, "AUC": "-", "Average Precision": "-", "Extra Training Data": True, "Year": 2020},
    {"Model": "Siamese Network", "Accuracy": 83.2, "AUC": "-", "Average Precision": "-", "Extra Training Data": True, "Year": 2020},
]

df = pd.DataFrame([our_model] + other_models)
plt.figure(figsize=(10, 6))
sns.barplot(data=df, x="Accuracy", y="Model", hue="Extra Training Data", palette="bwr", dodge=False)
plt.xlabel("Accuracy (%)", weight="bold")
plt.ylabel("Model", weight="bold")
plt.title("Comparison of Skin Lesion Classification Models", weight="bold")
plt.legend(title="Extra Data", loc="lower right")
plt.grid(axis="x", linestyle="--", alpha=0.7)
plt.savefig("./comparation.png", dpi=300, bbox_inches="tight")
plt.show()
