In [None]:
# %%
# IMPORT PACKAGES
import os
import numpy as np
import pandas as pd
from ultralytics import YOLO
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
from sklearn.metrics import roc_curve, auc, RocCurveDisplay
import matplotlib.pyplot as plt
import yaml
from tqdm import tqdm

In [None]:
def load_images_from_folder(folder):
    exts = ('.jpg', '.jpeg', '.png', '.bmp', '.tif', '.tiff')
    return [os.path.join(folder, f) for f in os.listdir(folder) if f.lower().endswith(exts)]

In [None]:
def load_yaml_config(yaml_path, keys_to_extract):
    if not os.path.exists(yaml_path):
        return {key: None for key in keys_to_extract}
    with open(yaml_path, 'r') as f:
        config = yaml.safe_load(f)
    return {key: config.get(key, None) for key in keys_to_extract}

In [None]:
# CONFIGURATION
base_run_dir = os.path.join('runs', 'classify')
data_base = os.path.join('data', '2-splits')
classes = ['NRM', 'PSS']  # negative, positive (PSS is treated as positive class for ROC)
POS_CLASS_INDEX = classes.index('PSS')

sgkf_versions = ['sgkf05']
yolo_versions = ['yolo11s']
seeds = [f'seed{r:02}' for r in range(1, 6)]
fold_counts = {'sgkf05': 5, 'sgkf10': 10}

# YAML fields to log from training
yaml_keys = [
    'epochs', 'batch', 'imgsz', 'optimizer', 'dropout', 'lr0',
    'weight_decay', 'model', 'pretrained', 'single_cls',
    'auto_augment', 'data'
]

In [None]:
for sgkf in sgkf_versions:
    for yolo in yolo_versions:
        model_base = f'{sgkf}-{yolo}'
        results = []

        for seed in seeds:
            fold_range = range(1, fold_counts[sgkf] + 1)

            for fold in fold_range:
                fold_name = f'fold{fold:02}'
                train_id = f'train-{seed}-{fold_name}'

                # Paths
                model_path = os.path.join(base_run_dir, model_base, train_id, 'weights', 'best.pt')
                yaml_path  = os.path.join(base_run_dir, model_base, train_id, 'args.yaml')
                test_dir   = os.path.join(data_base, sgkf, seed, fold_name, 'test')

                # Skip if any key file is missing
                if not (os.path.exists(model_path) and os.path.exists(test_dir) and os.path.exists(yaml_path)):
                    print(f"‚ö†Ô∏è Skipping (missing): {model_path} or {test_dir} or {yaml_path}")
                    continue

                print(f"\nüìå Evaluating: {model_base} | {seed} | {fold_name}")
                model = YOLO(model_path)

                # Collect labels, predicted labels, and scores for ROC
                true_labels = []
                predicted_labels = []
                pos_scores = []          # probability/score for positive class (PSS)
                file_ids = []            # optional: keep image names for per-image CSV

                for cls_index, cls in enumerate(classes):
                    cls_dir = os.path.normpath(os.path.join(test_dir, cls))
                    images = load_images_from_folder(cls_dir)
                    for img_path in tqdm(images, desc=f'{cls} images'):
                        pred = model(img_path, verbose=False)[0]
                        # top-1 predicted class for confusion matrix
                        top1 = int(pred.probs.top1)
                        # probability for positive class (PSS)
                        # pred.probs.data is a torch tensor with per-class softmax scores
                        score_pos = float(pred.probs.data[POS_CLASS_INDEX].item())

                        true_labels.append(cls_index)
                        predicted_labels.append(top1)
                        pos_scores.append(score_pos)
                        file_ids.append(os.path.basename(img_path))

                # Ensure arrays
                y_true = np.array(true_labels, dtype=int)
                y_pred = np.array(predicted_labels, dtype=int)
                y_score = np.array(pos_scores, dtype=float)

                # --- Confusion matrix (from top-1, i.e., argmax rule) ---
                cm = confusion_matrix(y_true, y_pred, labels=[0, 1])

                # Create subfolder for outputs
                out_dir = os.path.join(base_run_dir, model_base, f'test-{seed}-{fold_name}')
                os.makedirs(out_dir, exist_ok=True)

                # Save confusion matrix PNG
                cm_fig_path = os.path.join(out_dir, 'confusion_matrix.png')
                fig_cm, ax_cm = plt.subplots()
                disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=classes)
                disp.plot(cmap='Blues', ax=ax_cm, colorbar=False)
                ax_cm.set_title(f"Confusion Matrix ‚Äî {model_base} ‚Äî {seed} ‚Äî {fold_name}")
                fig_cm.tight_layout()
                fig_cm.savefig(cm_fig_path, dpi=300)
                plt.close(fig_cm)
                print(f"‚úÖ Confusion matrix saved to: {cm_fig_path}")

                # --- ROC curve & AUC (threshold-free) ---
                fpr, tpr, thresholds = roc_curve(y_true, y_score, pos_label=POS_CLASS_INDEX)
                roc_auc = auc(fpr, tpr)

                # Save ROC PNG
                roc_fig_path = os.path.join(out_dir, 'roc_curve.png')
                fig_roc, ax_roc = plt.subplots()
                RocCurveDisplay(fpr=fpr, tpr=tpr, roc_auc=roc_auc, estimator_name=f'{model_base}-{seed}-{fold_name}').plot(ax=ax_roc)
                ax_roc.set_title(f"ROC ‚Äî {model_base} ‚Äî {seed} ‚Äî {fold_name} (AUC={roc_auc:.3f})")
                ax_roc.grid(True, linestyle='--', alpha=0.4)
                fig_roc.tight_layout()
                fig_roc.savefig(roc_fig_path, dpi=300)
                plt.close(fig_roc)
                print(f"‚úÖ ROC curve saved to: {roc_fig_path}")

                # Save ROC points (CSV)
                roc_csv_path = os.path.join(out_dir, 'roc_points.csv')
                pd.DataFrame({'threshold': thresholds, 'fpr': fpr, 'tpr': tpr}).to_csv(roc_csv_path, index=False)
                print(f"‚úÖ ROC points saved to: {roc_csv_path}")

                # Optional: per-image predictions & scores
                per_img_csv = os.path.join(out_dir, 'per_image_predictions.csv')
                pd.DataFrame({
                    'image': file_ids,
                    'true_label': y_true,
                    'pred_label': y_pred,
                    'score_pos_PSS': y_score
                }).to_csv(per_img_csv, index=False)

                # Confusion-matrix-based metrics (argmax decision)
                TP, TN, FP, FN = cm[1, 1], cm[0, 0], cm[0, 1], cm[1, 0]
                accuracy = round((TP + TN) / np.sum(cm), 4)
                precision = round(TP / (TP + FP), 4) if (TP + FP) else 0.0
                sensitivity = round(TP / (TP + FN), 4) if (TP + FN) else 0.0
                specificity = round(TN / (TN + FP), 4) if (TN + FP) else 0.0
                f1_score = round(2 * (precision * sensitivity) / (precision + sensitivity), 4) if (precision + sensitivity) else 0.0

                # Load training configuration from YAML
                config_data = load_yaml_config(yaml_path, yaml_keys)
                expected_data_path = os.path.join(data_base, sgkf, seed, fold_name)
                yaml_data_path = config_data.pop('data', '')
                config_data['data_path_match'] = (expected_data_path in (yaml_data_path or ''))

                # Append full record
                results.append({
                    'sgkf': sgkf,
                    'yolo_version': yolo,
                    'seed': seed,
                    'fold': fold_name,
                    'TP': TP,
                    'TN': TN,
                    'FP': FP,
                    'FN': FN,
                    'accuracy': accuracy,
                    'precision': precision,
                    'sensitivity': sensitivity,
                    'specificity': specificity,
                    'f1_score': f1_score,
                    'roc_auc': round(float(roc_auc), 4),
                    **config_data
                })

        # Save summary CSV with descriptive name
        csv_name = f"test-{sgkf}-{yolo}.csv"
        output_path = os.path.join(base_run_dir, model_base, csv_name)
        df = pd.DataFrame(results)
        drop_columns = ['auto_augment']
        df.drop(columns=[col for col in drop_columns if col in df.columns], inplace=True)
        df.to_csv(output_path, index=False)
        print(f"\nüìÜ Output saved to: {output_path}")