In [61]:
import numpy as np
import imageio.v2 as imageio  # or cv2
import os
import numpy as np 
from sklearn.metrics import cohen_kappa_score, confusion_matrix

In [62]:
def load_mask(path):
    """Load a mask image and binarize it."""
    mask = imageio.imread(path)
    mask = (mask > 0).astype(np.uint8)
    return mask

def evaluate_mask(pred_mask, gt_mask):
    """Compute accuracy, confusion matrix, and Cohen's Kappa."""

    #print(f"Evaluating masks with shapes: pred_mask={pred_mask.shape}, gt_mask={gt_mask.shape}")
    assert pred_mask.shape == gt_mask.shape, "Shapes must match!"

    # Flatten to 1D arrays for sklearn
    y_true = gt_mask.flatten()
    y_pred = pred_mask.flatten()

    # Accuracy
    accuracy = np.mean(y_true == y_pred)

    # Confusion matrix: TN, FP, FN, TP for binary masks
    cm = confusion_matrix(y_true, y_pred, labels=[0, 1])

    # Cohen’s Kappa
    kappa = cohen_kappa_score(y_true, y_pred)

    return accuracy, cm, kappa

In [63]:
def main(predicted_mask_dir, ground_truth_dir):
    mask_files = [f for f in os.listdir(predicted_mask_dir) if f.endswith('.png')]

    accuracies = []
    kappas = []

    for mask_file in mask_files:
        pred_mask_path = os.path.join(predicted_mask_dir, mask_file)
        gt_mask_path = os.path.join(ground_truth_dir, mask_file)  # assumes same filename

        #print(f"Evaluating {mask_file}...")

        if not os.path.exists(gt_mask_path):
            print(f"Ground truth for {mask_file} not found! Skipping.")
            continue

        pred_mask = load_mask(pred_mask_path)
        gt_mask = load_mask(gt_mask_path)

        accuracy, cm, kappa = evaluate_mask(pred_mask, gt_mask)

        accuracies.append(accuracy)
        kappas.append(kappa)

        #print(f"Mask: {mask_file}")
        #print(f"  Accuracy: {accuracy:.4f}")
        #print(f"  Confusion Matrix (TN, FP, FN, TP):\n{cm}")
        #print(f"  Cohen’s Kappa: {kappa:.4f}\n")

    # ✅ Compute averages
    if accuracies:
        avg_acc = np.mean(accuracies)
        avg_kappa = np.mean(kappas)
        #print(f"Average Accuracy: {avg_acc:.4f}")
        print(f"Average Cohen’s Kappa: {avg_kappa:.4f}")
    else:
        print("No masks evaluated!")

if __name__ == "__main__":
    predicted_mask_dir = "../images/micro_sam_pred"
    ground_truth_dir = "../images/test_images_masks"
    main(predicted_mask_dir, ground_truth_dir)

Average Cohen’s Kappa: 0.8349
