# COCO Panoptic scoring for multiple images

TL;DR: we need to delay indicators computation until we gather all matching scores.

This notebook contains 2 parts:

1. simple walkthrough on edge cases, showing the labeling and the matching steps work on empty predictions and/or targets
2. a summary with reusable functions

We assume detections are rasterized as binary maps with disjoint shapes here, but providing label maps directly should work just as well, as long as all elements belong to the same class.

Missing here:
- background management: we just discard it here
- multiclass (seems to be only an extra for loop to add)

In [1]:
import numpy as np

## Global init

In [2]:
W_TtoP_global = []
W_PtoT_global = []

## Inner loop

### Sample detection 1: P and T both not empty

In [3]:
aa = np.zeros((10, 10), dtype=bool)
aa[1:5,1:5] = True
bb = np.zeros((10, 10), dtype=bool)
bb[1:5,2:6] = True
bb[6:9,6:9] = True

In [4]:
aa*1

array([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 1, 1, 1, 1, 0, 0, 0, 0, 0],
       [0, 1, 1, 1, 1, 0, 0, 0, 0, 0],
       [0, 1, 1, 1, 1, 0, 0, 0, 0, 0],
       [0, 1, 1, 1, 1, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]])

In [5]:
bb*1

array([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 1, 1, 1, 1, 0, 0, 0, 0],
       [0, 0, 1, 1, 1, 1, 0, 0, 0, 0],
       [0, 0, 1, 1, 1, 1, 0, 0, 0, 0],
       [0, 0, 1, 1, 1, 1, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 1, 1, 1, 0],
       [0, 0, 0, 0, 0, 0, 1, 1, 1, 0],
       [0, 0, 0, 0, 0, 0, 1, 1, 1, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]])

In [6]:
from coco_pano_ext_demo.coco import _compute_labelmap, _compute_iou

In [7]:
laa, lbb = _compute_labelmap(aa), _compute_labelmap(bb)

In [8]:
laa

array([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 1, 1, 1, 1, 0, 0, 0, 0, 0],
       [0, 1, 1, 1, 1, 0, 0, 0, 0, 0],
       [0, 1, 1, 1, 1, 0, 0, 0, 0, 0],
       [0, 1, 1, 1, 1, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]])

In [9]:
lbb

array([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 1, 1, 1, 1, 0, 0, 0, 0],
       [0, 0, 1, 1, 1, 1, 0, 0, 0, 0],
       [0, 0, 1, 1, 1, 1, 0, 0, 0, 0],
       [0, 0, 1, 1, 1, 1, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 2, 2, 2, 0],
       [0, 0, 0, 0, 0, 0, 2, 2, 2, 0],
       [0, 0, 0, 0, 0, 0, 2, 2, 2, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]])

In [10]:
wtp, wpt = _compute_iou(laa, lbb)
wtp, wpt

(array([0.80681818, 0.6       ]), array([0.80681818, 0.6       , 0.10714286]))

In [11]:
# remove background components
wtp, wpt = wtp[1:], wpt[1:]
wtp, wpt

(array([0.6]), array([0.6       , 0.10714286]))

In [12]:
# Update global matching lists
W_TtoP_global.extend(wtp.tolist())
W_PtoT_global.extend(wpt.tolist())
W_TtoP_global, W_PtoT_global

([0.6], [0.6, 0.10714285714285714])

### Sample detection 2: P empty, T not empty

(we reuse `laa` as fake target)

In [13]:
cc = np.zeros_like(aa)
lcc = _compute_labelmap(cc)
lcc

array([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]])

In [14]:
wtp, wpt = _compute_iou(laa, lcc)
wtp, wpt

(array([0.84, 0.16]), array([0.84]))

In [15]:
# remove background components
wtp, wpt = wtp[1:], wpt[1:]
wtp, wpt

(array([0.16]), array([], dtype=float64))

In [16]:
# Update global matching lists
W_TtoP_global.extend(wtp.tolist())
W_PtoT_global.extend(wpt.tolist())
W_TtoP_global, W_PtoT_global

([0.6, 0.16], [0.6, 0.10714285714285714])

### Sample detection 3: P not empty, T empty

(we reuse `lbb` as fake preduction)

In [17]:
dd = np.zeros_like(aa)
ldd = _compute_labelmap(dd)
ldd

array([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]])

In [18]:
wtp, wpt = _compute_iou(ldd, lbb)
wtp, wpt

(array([0.75]), array([0.75, 0.16, 0.09]))

In [19]:
# remove background components
wtp, wpt = wtp[1:], wpt[1:]
wtp, wpt

(array([], dtype=float64), array([0.16, 0.09]))

In [20]:
# Update global matching lists
W_TtoP_global.extend(wtp.tolist())
W_PtoT_global.extend(wpt.tolist())
W_TtoP_global, W_PtoT_global

([0.6, 0.16], [0.6, 0.10714285714285714, 0.16, 0.09])

### Sample detection 4: both P and T empty

In [21]:
wtp, wpt = _compute_iou(ldd, ldd)
wtp, wpt

(array([1.]), array([1.]))

In [22]:
# remove background components
wtp, wpt = wtp[1:], wpt[1:]
wtp, wpt

(array([], dtype=float64), array([], dtype=float64))

In [23]:
# Update global matching lists
W_TtoP_global.extend(wtp.tolist())
W_PtoT_global.extend(wpt.tolist())
W_TtoP_global, W_PtoT_global

([0.6, 0.16], [0.6, 0.10714285714285714, 0.16, 0.09])

In [24]:
from coco_pano_ext_demo.iou import compute_matching_scores

In [25]:
# final agregation
pairing_threshold = 0.5
df = compute_matching_scores(np.array(W_TtoP_global), np.array(W_PtoT_global), pairing_threshold)
COCO_SQ = df["IoU"].mean() if len(df) > 0 else 0
COCO_RQ = df["F-score"].iloc[0] if len(df) > 0 else 0
COCO_PQ = COCO_SQ * COCO_RQ

COCO_PQ, COCO_RQ, COCO_SQ

(0.19999999999999998, 0.3333333333333333, 0.6)

In [26]:
df

Unnamed: 0,IoU,Precision,Recall,F-score
0,0.6,0.25,0.5,0.333333


## Summary

In [43]:
def compute_matching_weights(target_binary_image, pred_binary_image) -> tuple[list[float], list[float]]:
    # extract connected components
    T = _compute_labelmap(target_binary_image)
    P = _compute_labelmap(pred_binary_image)
    # computes IoUs
    wtp, wpt = _compute_iou(T, P)
    # remove background components
    wtp, wpt = wtp[1:], wpt[1:]
    return wtp, wpt

In [54]:
from typing import Iterable
import pandas as pd
def compute_pq_score_list_single_class(targets: Iterable[np.ndarray], predictions: Iterable[np.ndarray]) -> tuple[float, float, float, pd.DataFrame]:
    # Init global accumulators
    W_TtoP_global = []
    W_PtoT_global = []
    # loop over predictions (single class, TODO repeat for each class)
    for T0, P0 in zip(targets, predictions):
        # Compute pairwise matching scores, exluding background
        wtp, wpt = compute_matching_weights(T0, P0)
        # Update global matching lists
        W_TtoP_global.extend(wtp.tolist())
        W_PtoT_global.extend(wpt.tolist())
    # report final score
    pairing_threshold = 0.5
    df = compute_matching_scores(np.array(W_TtoP_global), np.array(W_PtoT_global), pairing_threshold)
    COCO_SQ = df["IoU"].mean() if len(df) > 0 else 0
    COCO_RQ = df["F-score"].iloc[0] if len(df) > 0 else 0
    COCO_PQ = COCO_SQ * COCO_RQ
    
    return COCO_PQ, COCO_RQ, COCO_SQ, df

In [55]:
COCO_PQ, COCO_RQ, COCO_SQ, df = compute_pq_score_list_single_class([aa, aa, cc, dd], [aa, bb, bb, dd])
print(f"COCO_PQ: {COCO_PQ*100:.1f}% = COCO_RQ: {COCO_RQ*100:.1f}% × COCO_SQ: {COCO_SQ*100:.1f}%")
df

COCO_PQ: 45.7% = COCO_RQ: 57.1% × COCO_SQ: 80.0%


Unnamed: 0,IoU,Precision,Recall,F-score
0,0.6,0.4,1.0,0.571429
1,1.0,0.2,0.5,0.285714
