In [None]:
import os
import numpy as np
import pandas as pd
from tqdm import tqdm
from PIL import Image

import albumentations as A
from albumentations.pytorch import ToTensorV2

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader

from ultralytics import YOLO
import segmentation_models_pytorch as smp

NUM_CLASSES    = 10       # including background=0
VAL_IMAGES_DIR = "./AgricultureVision/val/images/rgb"
VAL_MASKS_DIR  = "./AgricultureVision/val/masks"
VAL_LABELS_DIR = "./AgricultureVisionYOLO/val/labels"

YOLO_MODELS = [
    "YOLOSEG/YOLOn/best.pt",
    "YOLOSEG/YOLOs/best.pt",
    "YOLOSEG/YOLOm/best.pt",
    "YOLOSEG/YOLOl/best.pt",
    "YOLOSEG/YOLOx/best.pt",
]

SMP_MODELS = [
    "smp_logs/best_Unet_resnet101.pth",
    "smp_logs/best_Unet_resnet34.pth",
    "smp_logs/best_Unet_timm-efficientnet-b4.pth",
    "smp_logs/best_UnetPlusPlus_resnet101.pth",
    "smp_logs/best_UnetPlusPlus_resnet34.pth",
    "smp_logs/best_UnetPlusPlus_timm-efficientnet-b4.pth",
    "smp_logs/best_FPN_resnet101.pth",
    "smp_logs/best_FPN_resnet34.pth",
    "smp_logs/best_FPN_timm-efficientnet-b4.pth",
    "smp_logs/best_DeepLabV3_resnet34.pth",
    "smp_logs/best_DeepLabV3_resnext101+32x8d.pth",
    "smp_logs/best_DeepLabV3_timm-efficientnet-b4.pth",
    "smp_logs/best_DeepLabV3Plus_resnet34.pth",
    "smp_logs/best_DeepLabV3Plus_resnext101+32x8d.pth",
    "smp_logs/best_DeepLabV3Plus_timm-efficientnet-b4.pth",    
]

DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
import os
import numpy as np
from PIL import Image
import albumentations as A
from albumentations.pytorch import ToTensorV2
import torch
from torch.utils.data import Dataset

VAL_TRANSFORM = A.Compose([
    A.Resize(512, 512),
    A.Normalize(mean=[0.485, 0.456, 0.406],
                std=[0.229, 0.224, 0.225]),
    ToTensorV2()
])

class SMPValDataset(Dataset):
    def __init__(self, images_dir, masks_dir, transform=VAL_TRANSFORM):
        self.images     = sorted([f for f in os.listdir(images_dir)
                                  if f.lower().endswith((".jpg", ".png"))])
        self.images_dir = images_dir
        self.masks_dir  = masks_dir
        self.transform  = transform

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        fn = self.images[idx]
        img_path  = os.path.join(self.images_dir, fn)
        mask_path = os.path.join(self.masks_dir, fn.rsplit(".", 1)[0] + ".png")

        image = np.array(Image.open(img_path).convert("RGB"))
        mask  = np.array(Image.open(mask_path))

        augmented = self.transform(image=image, mask=mask)
        image     = augmented["image"]
        mask      = augmented["mask"].long()

        return image, mask, fn


class YOLOValDataset(Dataset):
    def __init__(self, images_dir, labels_dir, img_size=512):
        self.images = sorted([f for f in os.listdir(images_dir) if f.lower().endswith((".jpg",".png"))])
        self.images_dir = images_dir
        self.labels_dir = labels_dir
        self.img_size   = img_size
    
    def __len__(self):
        return len(self.images)
    
    def __getitem__(self, i):
        fn = self.images[i]
        img = np.array(Image.open(os.path.join(self.images_dir, fn)).convert("RGB"))
        h,w = img.shape[:2]
        scale = self.img_size / max(h,w)
        img_resized = np.array(Image.fromarray(img).resize((int(w*scale),int(h*scale))))
        padded = np.zeros((self.img_size,self.img_size,3),dtype=img.dtype)
        padded[:img_resized.shape[0],:img_resized.shape[1]] = img_resized
        img_tensor = torch.from_numpy(padded).permute(2,0,1).float()/255.0
        return img_tensor, fn, (h,w,scale)

def fast_confusion(gt_mask, pred_mask, nclass):
    inds = gt_mask * nclass + pred_mask
    cm = np.bincount(inds, minlength=nclass**2).reshape(nclass,nclass)
    return cm

def compute_segmentation_metrics(conf_matrix):
    tp = np.diag(conf_matrix)
    fp = conf_matrix.sum(axis=0) - tp
    fn = conf_matrix.sum(axis=1) - tp
    with np.errstate(divide="ignore",invalid="ignore"):
        precision = tp / (tp+fp)
        recall    = tp / (tp+fn)
        iou       = tp / (tp + fp + fn)
        f1        = 2*precision*recall/(precision+recall)
    for arr in (precision,recall,iou,f1):
        arr[np.isnan(arr)] = 0.0
    return {
        "per_class_precision": precision,
        "per_class_recall": recall,
        "per_class_iou": iou,
        "per_class_f1": f1,
        "mean_precision": np.mean(precision),
        "mean_recall": np.mean(recall),
        "mean_iou": np.mean(iou),
        "mean_f1": np.mean(f1)
    }

def eval_smp_model(model_path, val_loader):
    arch,enc = os.path.basename(model_path)[:-4].split("_")[1:3]
    enc = enc.replace("+", "_")
    net = smp.create_model(arch=arch, encoder_name=enc, encoder_weights=None,
                           in_channels=3, classes=NUM_CLASSES).to(DEVICE)
    net.load_state_dict(torch.load(model_path.replace("+", "_"), map_location=DEVICE))
    net.eval()
    total_cm = np.zeros((NUM_CLASSES,NUM_CLASSES),dtype=np.int64)
    with torch.no_grad():
        for img, gt, _ in tqdm(val_loader, desc=f"SMP:{os.path.basename(model_path)}"):
            img = img.to(DEVICE)
            out = net(img)
            pred = torch.argmax(out,dim=1).squeeze(0).cpu().numpy().ravel()
            gt_flat = gt.ravel()
            total_cm += fast_confusion(gt_flat,pred,NUM_CLASSES)
    return compute_segmentation_metrics(total_cm)

import cv2

def eval_yolo_model(model_path, val_dataset):
    yolo = YOLO(model_path)
    total_cm = np.zeros((NUM_CLASSES, NUM_CLASSES), dtype=np.int64)

    for img_tensor, fn, (h, w, scale) in tqdm(val_dataset, desc=f"YOLO:{os.path.basename(model_path)}"):
        results = yolo.predict(source=img_tensor.unsqueeze(0), imgsz=val_dataset.img_size, verbose=False)
        res = results[0]

        ph, pw = int(h * scale), int(w * scale)
        mask_pred = np.zeros((ph, pw), dtype=np.uint8)

        if res.masks is not None and res.masks.data is not None:
            masks = res.masks.data.cpu().numpy().astype(bool)  # (N, H, W)
            classes = res.boxes.cls.cpu().numpy().astype(int)  # (N,)
            for m, cls in zip(masks, classes):
                mask_pred[m] = cls + 1

        flat_pred = mask_pred[:h, :w].ravel()
        gt_mask = np.array(
            Image.open(os.path.join(VAL_MASKS_DIR, fn.rsplit(".", 1)[0] + ".png"))
        ).ravel()
        total_cm += fast_confusion(gt_mask, flat_pred, NUM_CLASSES)

    return compute_segmentation_metrics(total_cm)



if __name__=="__main__":
    smp_val = SMPValDataset(VAL_IMAGES_DIR, VAL_MASKS_DIR)
    smp_loader = DataLoader(smp_val, batch_size=1, shuffle=False, num_workers=2)
    smp_results = {}
    for m in SMP_MODELS:
        smp_results[m] = eval_smp_model(m, smp_loader)
    
    yolo_val = YOLOValDataset(VAL_IMAGES_DIR, VAL_LABELS_DIR, img_size=512)
    yolo_results = {}
    for m in YOLO_MODELS:
        yolo_results[m] = eval_yolo_model(m, yolo_val)
    
    rows = []
    for name, res in {**smp_results, **yolo_results}.items():
        rows.append({
            "model": name,
            "mean_iou": res["mean_iou"],
            "mean_f1": res["mean_f1"],
            "mean_precision": res["mean_precision"],
            "mean_recall": res["mean_recall"],
        })
    df = pd.DataFrame(rows).sort_values("mean_iou", ascending=False)
    print(df.to_markdown(index=False))
