In [1]:
import os
from pathlib import Path
from typing import Iterable, Optional

import hydra
import numpy as np
import torch
import wandb
from accelerate import Accelerator
from matplotlib.cm import get_cmap
from omegaconf import DictConfig, OmegaConf
from PIL import Image
from skimage.color import label2rgb
from tqdm import tqdm, trange

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# evaluation utilities
import eval_utils
# for reading a dataset with groundth truth and labels
from dataset import EvalDataset


In [42]:
root_dir = 'demo_gt'
custom_dataset = EvalDataset(root_dir)

root: demo_gt
image_dir: demo_gt/images
gt_dir: demo_gt/ground_truth
pred_dir: demo_gt/predictions
Checking sizes of ground truth and predictions


100%|██████████| 2/2 [00:00<00:00, 91.12it/s]


In [47]:
# Add background class
n_classes = 6
n_clusters = 6

# Iterate
tp = [0] * n_classes
fp = [0] * n_classes
fn = [0] * n_classes

# metrics per image
miou_all = []
jac_all = []

print('n_classes=', n_classes)
print('n_clusters=', n_clusters)

n_classes= 6
n_clusters= 6


In [44]:
for i in trange(len(custom_dataset), desc='Iterating predictions'):
    image, gt, pred, metadata = custom_dataset[i]

    # Do matching 
    gt_unique = np.unique(gt)
    pred_unique = np.unique(pred)
    print(f'GT unique labels: {gt_unique}')
    print(f'PRED unique labels: {pred_unique}')
    if np.array_equal(gt_unique,pred_unique) and n_clusters==n_classes:
    # if len(gt_unique)==len(pred_unique):
        print('Using hungarian algorithm for matching')
        match, iou_mat  = eval_utils.hungarian_match(pred, gt, preds_k=n_clusters, targets_k=n_classes, metric='iou', thresh=0.0)
    else:
        print('Using majority voting for matching')
        match, iou_mat = eval_utils.majority_vote_unique(pred, gt, preds_k=n_clusters, targets_k=n_classes, thresh=0.0)
    print(f'Optimal matching: {match}')

    # reorder prediction according to found mapping
    reordered_pred = np.zeros_like(pred)
    for pred_i, target_i in match:
        reordered_pred[pred == int(pred_i)] = int(target_i)
    
    # calculate TP, FP, FN, TN for a single image - to get an IoU per image
    tp_image = [0] * n_classes
    fp_image = [0] * n_classes
    fn_image = [0] * n_classes

    # metrics per image
    jac_image_all_categs = [0] * n_classes

    # TP, FP, and FN evaluation, accumulated for ALL images
    for i_part in range(0, n_classes):
        tmp_gt = (gt == i_part) #get class i mask from ground truth
        tmp_pred = (reordered_pred == i_part) #get class i mask from predictions
        # just for the current image
        tp_image[i_part] += np.sum(tmp_gt & tmp_pred)
        fp_image[i_part] += np.sum(~tmp_gt & tmp_pred)
        fn_image[i_part] += np.sum(tmp_gt & ~tmp_pred)
        # accumulated for all
        tp[i_part] += tp_image[i_part] 
        fp[i_part] += fp_image[i_part] 
        fn[i_part] += fn_image[i_part]

        # calculate metrics per image
        jac_image_all_categs[i_part] = float(tp_image[i_part]) / max(float(tp_image[i_part] + fp_image[i_part] + fn_image[i_part]), 1e-8)



    print(f'PER IMAGE: TP={tp_image}, FP={fp_image}, FN={fn_image}')
    print(f'PER DATASET: TP={tp}, FP={fp}, FN={fn}')
    print(f'PER IMAGE: jac_image_all_categs={jac_image_all_categs}')


    miou_image = np.mean(jac_image_all_categs)
    miou_all.append(miou_image)
    jac_all.append(jac_image_all_categs)

mIoU_mean = np.mean(miou_all)
mIoU_std = np.std(miou_all)
print('MEAN of mIoU=', mIoU_mean)

mIoU_mean_from_jac = np.mean(np.mean(jac_all))
print('MEAN of mIoU from jac =', mIoU_mean_from_jac)


Iterating predictions:   0%|          | 0/2 [00:00<?, ?it/s]

GT unique labels: [0 1 2 3 4 5]
PRED unique labels: [0 1 2 3 4 5]
Using hungarian algorithm for matching
Using iou as metric


Iterating predictions:  50%|█████     | 1/2 [00:00<00:00,  1.89it/s]

Optimal matching: [(0, 0), (1, 3), (2, 5), (3, 1), (4, 2), (5, 4)]
PER IMAGE: TP=[60590, 2946, 1824, 2404, 7519, 69814], FP=[464, 38, 481, 708, 3596, 4816], FN=[8094, 262, 0, 80, 114, 1553]
PER DATASET: TP=[60590, 2946, 1824, 2404, 7519, 69814], FP=[464, 38, 481, 708, 3596, 4816], FN=[8094, 262, 0, 80, 114, 1553]
PER IMAGE: jac_image_all_categs=[0.8762364782784752, 0.9075785582255084, 0.7913232104121475, 0.7531328320802005, 0.6696054857957076, 0.9163986716196527]
GT unique labels: [0 1 4 5]
PRED unique labels: [0 2 3 5]
Using majority voting for matching
No threshold used


Iterating predictions: 100%|██████████| 2/2 [00:01<00:00,  1.78it/s]

Optimal matching: [(0, 0), (3, 1), (0, 2), (0, 3), (5, 4), (2, 5)]
PER IMAGE: TP=[0, 3513, 0, 0, 10192, 92322], FP=[0, 719, 0, 40908, 3235, 4311], FN=[48400, 7, 0, 0, 2, 764]
PER DATASET: TP=[60590, 6459, 1824, 2404, 17711, 162136], FP=[464, 757, 481, 41616, 6831, 9127], FN=[56494, 269, 0, 80, 116, 2317]
PER IMAGE: jac_image_all_categs=[0.0, 0.8287331917905166, 0.0, 0.0, 0.7589545014520813, 0.9478936722897009]
MEAN of mIoU= 0.6208213834953326
MEAN of mIoU from jac = 0.6208213834953326





In [45]:
 # Calculate Jaccard index
jac1 = [0] * n_classes
jac2 = [0] * n_classes

for i_part in range(0, n_classes):
    jac1[i_part] = float(tp[i_part]) / max(float(tp[i_part] + fp[i_part] + fn[i_part]), 1e-8)
    jac2[i_part] = np.mean(jac_all, axis=0)
old_mIoU_mean1 = np.mean(jac1)
old_mIoU_mean2 = np.mean(jac2)
# old_mIoU_mean3 = np.mean(jac_new) / 2

print('old mIoU1=', old_mIoU_mean1) # somehow is different from mIoU_mean
print('old mIoU2=', old_mIoU_mean2) # somehow is same as mIoU_mean
# print('old mIoU3=', old_mIoU_mean3) # somehow is different from mIoU_mean


old mIoU1= 0.6460911940855601
old mIoU2= 0.6208213834953323


In [46]:
jac_all

[[0.8762364782784752,
  0.9075785582255084,
  0.7913232104121475,
  0.7531328320802005,
  0.6696054857957076,
  0.9163986716196527],
 [0.0, 0.8287331917905166, 0.0, 0.0, 0.7589545014520813, 0.9478936722897009]]