This script evaluates the performance of the YOLO + U-Net model using IoU, Precision, and Recall per class.

**Note:**
Unlike evaluations based on YOLO-cropped images, this evaluation is performed on the full 12500×12500 original image. This allows for a direct, head-to-head comparison with the U-Net only model.

In [2]:
import os, cv2
import numpy as np
import pandas as pd
from tqdm import tqdm

# Define class labels
CLASSES = ["background", "PV_normal", "PV_heater", "PV_pool"]
n_classes = len(CLASSES)

# Directories for predicted and ground truth masks (as PNGs)
pred_dir = "/shared/data/climateplus2025/YOLO+U-Net_Prediction_updated_0722/prediction_masks_tiles_320"
gt_dir   = "/home/cmn60/cape_town_segmentation/masks_320_1k_new"
output_csv = "metrics_evaluation_YOLO+U-Net.csv"

# Initialize accumulators
inter = np.zeros(n_classes, dtype=np.float64)
union = np.zeros_like(inter)
tp = np.zeros_like(inter)
fp = np.zeros_like(inter)
fn = np.zeros_like(inter)

# Get file list (assuming consistent naming)
file_names = sorted([f for f in os.listdir(gt_dir) if f.endswith(".png")])

for fname in tqdm(file_names, desc="Evaluating"):
    gt_path = os.path.join(gt_dir, fname)
    pred_path = os.path.join(pred_dir, fname)

    if not os.path.exists(pred_path):
        print(f"[Warning] Prediction missing: {fname}")
        continue

    # Load grayscale masks (single-channel, values: 0~3)
    gt = cv2.imread(gt_path, cv2.IMREAD_GRAYSCALE)
    pred = cv2.imread(pred_path, cv2.IMREAD_GRAYSCALE)

    if gt.shape != pred.shape:
        print(f"[Error] Shape mismatch: {fname}")
        continue

    for cls in range(n_classes):
        pred_cls = (pred == cls)
        gt_cls   = (gt == cls)

        inter[cls] += np.logical_and(pred_cls, gt_cls).sum()
        union[cls] += np.logical_or(pred_cls, gt_cls).sum()
        tp[cls]    += np.logical_and(pred_cls, gt_cls).sum()
        fp[cls]    += np.logical_and(pred_cls, ~gt_cls).sum()
        fn[cls]    += np.logical_and(~pred_cls, gt_cls).sum()

# Compute metrics
eps = 1e-7
iou       = (inter + eps) / (union + eps)
precision = (tp    + eps) / (tp + fp + eps)
recall    = (tp    + eps) / (tp + fn + eps)

df = pd.DataFrame({
    "class": CLASSES,
    "IoU": iou,
    "Precision": precision,
    "Recall": recall
})

df.to_csv(output_csv, index=False)
print(f"\n Evaluation saved to {output_csv}")


Evaluating:  12%|█▏        | 60/483 [00:00<00:01, 305.85it/s]



Evaluating:  41%|████      | 198/483 [00:00<00:00, 426.71it/s]



Evaluating:  59%|█████▉    | 285/483 [00:00<00:00, 414.44it/s]



Evaluating:  79%|███████▉  | 383/483 [00:00<00:00, 450.81it/s]



Evaluating: 100%|██████████| 483/483 [00:01<00:00, 426.98it/s]


 Evaluation saved to metrics_evaluation_YOLO+U-Net.csv



