 # Plotting script

This script contains all the plotting code to generate different plots (listed under headers) used directly in the paper. It is overly verbose or inefficient at times (e.g. by recomputing predictions and bounding box intervals) and could be restructured or improved by leveraging precomputed results with filtering. However, it is very flexible and permits filtering results e.g. by ground truth matching, class name, set of classes etc.

 #### Setup

In [48]:
# import statements
import sys
sys.path.insert(0, "/ssd_4TB/divake/conformal-od")
sys.path.insert(0, "/ssd_4TB/divake/conformal-od/detectron2")

import os
import torch
import importlib
import argparse
import logging
from pathlib import Path

import numpy as np
import pandas as pd
from scipy.stats import beta
from scipy.optimize import brentq
import itertools

import matplotlib
import matplotlib.gridspec as gridspec
import matplotlib.lines as mlines
import matplotlib.colors as mcolors
import matplotlib.pyplot as plt
from matplotlib.ticker import FixedLocator, FixedFormatter
import seaborn as sns

from detectron2.data import MetadataCatalog, get_detection_dataset_dicts, DatasetCatalog
from detectron2.data.detection_utils import annotations_to_instances
from detectron2.structures import Instances
from detectron2.utils.logger import setup_logger

from control import std_conformal, ens_conformal, cqr_conformal, baseline_conformal, classifier_sets
from data import data_loader
from evaluation import results_table
from model import matching, model_loader, ensemble_boxes_wbf
from model.qr_head import QuantileROIHead
from plots import plot_util
from util import util, io_file

from plots.plot_style import *

# scientific notation off for pytorch
torch.set_printoptions(sci_mode=False)

In [49]:
def save_fig(figname: str, **kwargs):
    # plt.savefig(figname + ".png", dpi=300, format="png", bbox_inches="tight", **kwargs)
    plt.savefig(figname + ".png", format="png", **kwargs)
    print(f"Saved figure {figname}.")

In [50]:
### simulate CLI with fixed parameters (see main.py for definitions)
rc = "std" # ens, cqr
d = "coco_val" 

args_dict = {
    "config_file": f"cfg_{rc}_rank",
    "config_path": f"/ssd_4TB/divake/conformal-od/config/{d}",
    "run_collect_pred": False,
    "load_collect_pred": f"{rc}_conf_x101fpn_{rc}_rank_class",
    "save_file_pred": False,
    "risk_control": f"{rc}_conf",
    "alpha": 0.1,
    "label_set": "class_threshold",
    "label_alpha": 0.01,
    "run_risk_control": True,
    "load_risk_control": None,
    "save_file_control": True,
    "save_label_set": True,
    "run_eval": True,
    "save_file_eval": True,
    "file_name_prefix": None,
    "file_name_suffix": f"_{rc}_rank_class",
    "log_wandb": False,
    "device": "cuda"
}
args = argparse.Namespace(**args_dict)

In [51]:
### main setup (see main.py)

cfg = io_file.load_yaml(args.config_file, args.config_path, to_yacs=True)
data_name = cfg.DATASETS.DATASET.NAME 
cfg.MODEL.AP_EVAL = False

if args.file_name_prefix is not None:
    file_name_prefix = args.file_name_prefix
else:
    file_name_prefix = (f"{args.risk_control}_{cfg.MODEL.ID}{args.file_name_suffix}")

outdir = cfg.PROJECT.OUTPUT_DIR 
# NOTE: Modify as required
outdir = f"{outdir}"

filedir = os.path.join(outdir, data_name, file_name_prefix)
Path(filedir).mkdir(exist_ok=True, parents=True)

logger = setup_logger(output=filedir)
util.set_seed(cfg.PROJECT.SEED, logger=logger)
cfg, _ = util.set_device(cfg, "cpu", logger=logger)

if not DatasetCatalog.__contains__(data_name):
    data_loader.d2_register_dataset(cfg, logger=logger)

cfg_model, model = model_loader.d2_build_model(cfg, logger=logger)
model_loader.d2_load_model(cfg_model, model, logger=logger)

data_list = get_detection_dataset_dicts(data_name, filter_empty=cfg.DATASETS.DATASET.FILTER_EMPTY)
dataloader = data_loader.d2_load_dataset_from_dict(data_list, cfg, cfg_model, logger=logger)
metadata = MetadataCatalog.get(data_name).as_dict()
nr_class = len(metadata["thing_classes"])

In [52]:
logger.info(f"Init risk control procedure with '{args.risk_control}'...")
if args.risk_control == "std_conf":
    controller = std_conformal.StdConformal(
        cfg, args, nr_class, filedir, log=None, logger=logger
    )
elif args.risk_control == "ens_conf":
    controller = ens_conformal.EnsConformal(
        cfg, args, nr_class, filedir, log=None, logger=logger
    )
elif args.risk_control == "cqr_conf":
    controller = cqr_conformal.CQRConformal(
        cfg, args, nr_class, filedir, log=None, logger=logger
    )
elif args.risk_control == "base_conf":
    controller = baseline_conformal.BaselineConformal(
        cfg, args, nr_class, filedir, log=None, logger=logger
    )

In [53]:
# load files
# img_list = io_file.load_json(f"{file_name_prefix}_img_list", filedir)
# ist_list = io_file.load_json(f"{file_name_prefix}_ist_list", filedir)
control_data = io_file.load_tensor(f"{file_name_prefix}_control", filedir)
test_indices = io_file.load_tensor(f"{file_name_prefix}_test_idx", filedir)
label_data = io_file.load_tensor(f"{file_name_prefix}_label", filedir)
# box_set_data = io_file.load_tensor(f"{file_name_prefix}_box_set", filedir)

# plotting-specific details
fnames = [data_list[i]["file_name"].split("/")[-1][:-4] for i in range(len(data_list))]
channels = cfg.DATASETS.DATASET.CHANNELS # type: ignore
plotdir = os.path.join("plots", data_name, file_name_prefix)
Path(plotdir).mkdir(exist_ok=True, parents=True)

# get metric indices for easy access in loaded files
from evaluation.results_table import _idx_metrics as metr
from evaluation.results_table import _idx_label_metrics as label_metr

In [54]:
coco_classes = util.get_coco_classes()
sel_coco_classes = util.get_selected_coco_classes()

%pprint
print(coco_classes)
print(sel_coco_classes)

 #### Plot: image with ground truths or predictions

In [55]:
### params
class_name = "person" # gt contains instance of this class
i, j = 0, 4 # desired score indices
filter_for_class = True # filter for class_name
###

class_idx = metadata["thing_classes"].index(class_name)
cn = class_name.replace(" ", "") # remove whitespace
# select best trial idx for nice viz
trial_idx = torch.argmin(control_data[:, class_idx, i:j, metr["mpiw"]].mean(-1))
# test img indices for that trial idx where class ist exists
indices = torch.nonzero(test_indices[trial_idx, class_idx], as_tuple=True)[0].to(torch.float32)

### select test img idx
# -- random
# idx = indices[torch.multinomial(indices, num_samples=1, replacement=False)]
# -- by specific idx
# idx = torch.tensor([76])
# --by specific name
idx = torch.tensor([fnames.index("000000054593")])
# "zurich_000002_000019_leftImg8bit"

### select img
img = dataloader.dataset.__getitem__(idx)
img_id = os.path.split(img["file_name"])[-1][:-4]

### prediction
pred = controller.raw_prediction(model, img)
print(f"Predicted for img {img_id} (idx {idx}) using {controller.__class__}")

### filtering
if filter_for_class:
    img["annotations"] = [anno for anno in img["annotations"] if anno["category_id"] == class_idx]
    print(f"Filtered for class '{class_name}' only.")

 Ground truths

----------------------------------------

In [56]:
to_file = False

col = "red"
# list needs one color per object
colors = [mcolors.to_rgb(col)]*len(img["annotations"])
fname = f"tr{trial_idx}_{cn}_gt_idx{idx.numpy()[0]}_img{img_id}.jpg"

plot_util.d2_plot_gt(img, metadata, channels, 
                          draw_labels=[], colors=colors, alpha=0.8, 
                          notebook=True, to_file=to_file, filename=os.path.join(plotdir, fname))

 Predictions

-------

In [57]:
to_file = False

col = "blue"
# list needs one color per object
colors = [mcolors.to_rgb(col)]*len(pred)
fname = f"tr{trial_idx}_{cn}_pred_idx{idx.numpy()[0]}_img{img_id}.jpg"

print("######### All predictions:")
plot_util.d2_plot_pred(img, pred, metadata, channels, 
                          draw_labels=[], colors=colors, alpha=0.8, 
                          notebook=True, to_file=False, filename=os.path.join(plotdir, fname))

print(f"#########  Filtered predictions by class label '{class_name}':")
plot_util.d2_plot_pred(img, pred[pred.pred_classes == class_idx], metadata, channels, 
                          draw_labels=[], colors=colors, alpha=0.8, 
                          notebook=True, to_file=to_file, filename=os.path.join(plotdir, fname))

Predictions filtered by ground truth matching

--------

In [58]:
gt = annotations_to_instances(img["annotations"], (img["height"], img["width"]))

(
    gt_box, pred_box, gt_class, pred_class, pred_score,
    pred_score_all, pred_logits_all, matches
) = matching.matching(
    gt.gt_boxes, pred.pred_boxes, gt.gt_classes, pred.pred_classes, pred.scores, pred.scores_all, None,
    controller.box_matching, controller.class_matching, controller.iou_thresh
)

pred_match = Instances(pred.image_size)
pred_match.set("pred_boxes", pred_box)
pred_match.set("scores", pred_score)
pred_match.set("pred_classes", pred_class)
pred_match.set("pred_score_all", pred_score_all)

In [59]:
to_file = False

col = "blue"
# list needs one color per object
colors = [mcolors.to_rgb(col)]*len(pred)
fname = f"tr{trial_idx}_{cn}_gt_idx{idx.numpy()[0]}_img{img_id}.jpg"

plot_util.d2_plot_pred(img, pred_match, metadata, channels, 
                          draw_labels=[], colors=colors, alpha=0.8, 
                          notebook=True, to_file=to_file, filename=os.path.join(plotdir, fname))

Filtered prediction and ground truth together

----------

In [60]:
pred_match_gt = Instances(pred.image_size)

# manual jitter for visualisation (if needed)
# x0, y0, x1, y1
pred_box_jit = pred_box.tensor + torch.tensor([[10, 0, -10, -10]])

# Create a tensor of ones with the same length as gt_box (which is 6)
gt_scores = torch.ones(len(gt_box))

pred_match_gt.set("pred_boxes", torch.cat([pred_box_jit, gt_box.tensor]))
pred_match_gt.set("scores", torch.cat([pred_score, gt_scores]))  # Now this will be length 12
pred_match_gt.set("pred_classes", torch.cat([pred_class, gt_class]))

In [61]:
to_file = False

color_gt = [mcolors.to_rgb("red")]*len(img["annotations"])
color_pred = [mcolors.to_rgb("blue")]*len(img["annotations"])
colors = color_gt + color_pred

fname = f"tr{trial_idx}_{cn}_gt_pred_idx{idx.numpy()[0]}_img{img_id}.jpg"

print("######### Ground truth + prediction:")
plot_util.d2_plot_pred(img, pred_match_gt, metadata, channels, 
                          draw_labels=[], colors=colors, alpha=0.8, 
                          notebook=True, to_file=to_file, filename=os.path.join(plotdir, fname))

 #### Plot: image with ground truth and PI

For a single image and method

---------------------

In [62]:
### params
class_name = "person" # gt contains instance of this class
i, j = 0, 4 # desired score indices
filter_for_class = True # filter for class_name
###

class_idx = metadata["thing_classes"].index(class_name)
cn = class_name.replace(" ", "") # remove whitespace
# select best trial idx for nice viz
trial_idx = torch.argmin(control_data[:, class_idx, i:j, metr["mpiw"]].mean(-1))
# test img indices for that trial idx where class ist exists
indices = torch.nonzero(test_indices[trial_idx, class_idx], as_tuple=True)[0].to(torch.float32)

### select test img idx
# -- random
# idx = indices[torch.multinomial(indices, num_samples=1, replacement=False)]
# -- by specific idx
# idx = torch.tensor([76])
# --by specific name
idx = torch.tensor([fnames.index("000000054593")])
# "zurich_000002_000019_leftImg8bit"

### select img
img = dataloader.dataset.__getitem__(idx)
img_id = os.path.split(img["file_name"])[-1][:-4]

### prediction
print("+++ Prediction procedure +++")
pred = controller.raw_prediction(model, img)
print(f"Predicted for img {img_id} (idx {idx}) using {controller.__class__}")

### filtering
if filter_for_class:
    img["annotations"] = [anno for anno in img["annotations"] if anno["category_id"] == class_idx]
    print(f"Filtered for class '{class_name}' only.")
    # gt = annotations_to_instances(img["annotations"], (img["height"], img["width"]))

### matching
gt = annotations_to_instances(img["annotations"], (img["height"], img["width"]))

(
    gt_box, pred_box, gt_class, pred_class, pred_score, # type:ignore
    pred_score_all, pred_logits_all, matches, _, pred_idx, _
) = matching.matching(
    gt.gt_boxes, pred.pred_boxes, gt.gt_classes, pred.pred_classes, pred.scores, 
    pred.scores_all, None,
    controller.box_matching, controller.class_matching, controller.iou_thresh,
    return_idx=True
)
print(f"Performed matching using {controller.box_matching=} and {controller.class_matching=}.")
print(f"Missed ground truth objects: {len(gt.gt_classes) - len(pred_idx)}/{len(gt.gt_classes)}.\n")

### build matched prediction instance
pred_match = Instances(pred.image_size)
pred_match.set("pred_boxes", pred_box)
pred_match.set("scores", pred_score)
pred_match.set("pred_classes", pred_class)
pred_match.set("pred_score_all", pred_score_all)

if args.risk_control == "ens_conf":
    pred_match.set("unc", pred.unc[pred_idx])
elif args.risk_control == "cqr_conf":
    pred_lower = pred.get(f"pred_boxes_{controller.q_str[controller.q_idx[0]]}")
    pred_upper = pred.get(f"pred_boxes_{controller.q_str[controller.q_idx[1]]}")
    pred_match.set("pred_lower", pred_lower[pred_idx])
    pred_match.set("pred_upper", pred_upper[pred_idx])

### get quantiles for all classes, mean quantile over trials
box_quant_all = control_data[:, :, i:j, metr["quant"]].mean(dim=0)
label_quant = label_data[:, :, label_metr["quant"]].mean(dim=0)
# true box quantiles
box_quant_true = box_quant_all[gt_class]

### get label set
label_set = controller.label_set_generator.get_pred_set(pred_match.pred_score_all, label_quant)
label_set = controller.label_set_generator.handle_null_set(pred_match.pred_score_all, label_set)

print("+++ Label set procedure +++")
print(f"Using method '{args.label_set}'.")
lab_gt, lab_pred, lab_set = [], [], []
for i, labels in enumerate(label_set):
    l_gt = coco_classes[gt_class[i]]
    l_pred = coco_classes[pred_class[i]]
    l_set = [coco_classes[l] for l in torch.nonzero(labels, as_tuple=True)[0]]
    print(f"True class: '{l_gt}' | Pred class: '{l_pred}' | Label set: {l_set}")
    lab_gt.append(l_gt); lab_pred.append(l_pred); lab_set.append(l_set)


### get box set quantiles
print(f"Box quantile selection strategy: {controller.label_set_generator.box_set_strategy}.")
box_quant, box_quant_idx = classifier_sets.box_set_strategy(label_set, box_quant_all, controller.label_set_generator.box_set_strategy)

b = box_quant_idx.tolist()
l_box_quant = [["class" for _ in range(4)] for _ in range(len(b))]
for bi, bv in enumerate(b):
    for bj, bv2 in enumerate(bv):
        l_box_quant[bi][bj] = lab_set[bi][bv2] 
print(f"Selected quantiles: {l_box_quant}")

In [63]:
to_file = False

fname1 = f"{args.risk_control}_{args.label_set}_{cn}_idx{idx.numpy()[0]}_img{img_id}.jpg"
fname2 = f"{args.risk_control}_oracle_{cn}_idx{idx.numpy()[0]}_img{img_id}.jpg"

print("FIGURE 1: Label set quantiles")
plot_util.d2_plot_pi(args.risk_control, img, gt.gt_boxes, pred_match, box_quant,
                     channels, draw_labels=[], 
                     colors=["red", "green", "palegreen"], alpha=[1.0, 0.6, 0.4],
                     lw=1.5, notebook=True, to_file=to_file,
                     filename=os.path.join(plotdir, fname1),
                     label_gt=lab_gt, label_set=lab_set)

print("FIGURE 2: Oracle (true class quantiles)")
plot_util.d2_plot_pi(args.risk_control, img, gt.gt_boxes, pred_match, box_quant_true,
                     channels, draw_labels=[], 
                     colors=["red", "green", "palegreen"], alpha=[1.0, 0.6, 0.4],
                     lw=1.5, notebook=True, to_file=to_file,
                     filename=os.path.join(plotdir, fname2),
                     label_gt=lab_gt, label_set=lab_set)

For a single image and multiple methods

-------------------

In [64]:
def get_args(rc, d):
    args_dict = {
        "config_file": f"cfg_{rc}_rank",
        "config_path": f"/ssd_4TB/divake/conformal-od/config/{d}",
        "run_collect_pred": False,
        "load_collect_pred": f"{rc}_conf_x101fpn_{rc}_rank_class",
        "save_file_pred": False,
        "risk_control": f"{rc}_conf",
        "alpha": 0.1,
        "label_set": "class_threshold",
        "label_alpha": 0.01,
        "run_risk_control": True,
        "load_risk_control": None,
        "save_file_control": True,
        "save_label_set": True,
        "run_eval": True,
        "save_file_eval": True,
        "file_name_prefix": None,
        "file_name_suffix": f"_{rc}_rank_class",
        "log_wandb": False,
        "device": "cpu"
    }
    args = argparse.Namespace(**args_dict)
    return args


def get_dirs(args, cfg):
    if args.file_name_prefix is not None:
        file_name_prefix = args.file_name_prefix
    else:
        file_name_prefix = (f"{args.risk_control}_{cfg.MODEL.ID}{args.file_name_suffix}")
    outdir = cfg.PROJECT.OUTPUT_DIR  # type: ignore
    filedir = os.path.join(outdir, data_name, file_name_prefix)
    Path(filedir).mkdir(exist_ok=True, parents=True)
    return file_name_prefix, outdir, filedir


def get_controller(args, cfg, nr_class, filedir, logger):
    logger.info(f"Init risk control procedure with '{args.risk_control}'...")
    if args.risk_control == "std_conf":
        controller = std_conformal.StdConformal(
            cfg, args, nr_class, filedir, log=None, logger=logger
        )
    elif args.risk_control == "ens_conf":
        controller = ens_conformal.EnsConformal(
            cfg, args, nr_class, filedir, log=None, logger=logger
        )
    elif args.risk_control == "cqr_conf":
        controller = cqr_conformal.CQRConformal(
            cfg, args, nr_class, filedir, log=None, logger=logger
        )
    elif args.risk_control == "base_conf":
        controller = baseline_conformal.BaselineConformal(
            cfg, args, nr_class, filedir, log=None, logger=logger
        )
    return controller


def get_loggy(plotdir_log, fname_log):
    loggy = logging.getLogger('loggy')
    loggy.setLevel(logging.DEBUG)
    loggy.propagate = 0
    file_handler = logging.FileHandler(os.path.join(plotdir_log, fname_log))
    file_handler.setLevel(logging.DEBUG)
    console_handler = logging.StreamHandler()
    console_handler.setLevel(logging.INFO)
    formatter = logging.Formatter('%(name)s|%(message)s')
    file_handler.setFormatter(formatter)
    console_handler.setFormatter(formatter)
    loggy.addHandler(file_handler)
    loggy.addHandler(console_handler)
    return loggy


def update_log_path(loggy, new_path):
    while len(loggy.handlers) > 0:
        loggy.removeHandler(loggy.handlers[0])
    file_handler = logging.FileHandler(new_path)
    file_handler.setLevel(logging.DEBUG)
    console_handler = logging.StreamHandler()
    console_handler.setLevel(logging.INFO)
    formatter = logging.Formatter('%(name)s|%(message)s')
    file_handler.setFormatter(formatter)
    console_handler.setFormatter(formatter)
    loggy.addHandler(file_handler)
    loggy.addHandler(console_handler)

In [65]:
data_name = "coco_val"
args_std = get_args("std", data_name)
args_ens = get_args("ens", data_name)
args_cqr = get_args("cqr", data_name)

cfg_std = io_file.load_yaml(args_std.config_file, args_std.config_path, to_yacs=True)
cfg_ens = io_file.load_yaml(args_ens.config_file, args_ens.config_path, to_yacs=True)
cfg_cqr = io_file.load_yaml(args_cqr.config_file, args_cqr.config_path, to_yacs=True)

# Update the checkpoint path to use absolute path
cfg_cqr.MODEL.CHECKPOINT_PATH = os.path.join(os.getcwd(), "../checkpoints/x101fpn_train_qr_5k_postprocess.pth")

file_name_prefix_std, outdir_std, filedir_std = get_dirs(args_std, cfg_std)
file_name_prefix_ens, outdir_ens, filedir_ens = get_dirs(args_ens, cfg_ens)
file_name_prefix_cqr, outdir_cqr, filedir_cqr = get_dirs(args_cqr, cfg_cqr)

logger = setup_logger(output=filedir)
util.set_seed(cfg_std.PROJECT.SEED, logger=logger)

if not DatasetCatalog.__contains__(data_name):
    data_loader.d2_register_dataset(cfg_std, logger=logger)

cfg_model_std, model_std = model_loader.d2_build_model(cfg_std, logger=logger)
model_loader.d2_load_model(cfg_model_std, model_std, logger=logger)
cfg_model_ens, model_ens = model_loader.d2_build_model(cfg_ens, logger=logger)
model_loader.d2_load_model(cfg_model_ens, model_ens, logger=logger)
cfg_model_cqr, model_cqr = model_loader.d2_build_model(cfg_cqr, logger=logger)
model_loader.d2_load_model(cfg_model_cqr, model_cqr, logger=logger)

data_list = get_detection_dataset_dicts(data_name, filter_empty=cfg.DATASETS.DATASET.FILTER_EMPTY)
dataloader = data_loader.d2_load_dataset_from_dict(data_list, cfg_std, cfg_model_std, logger=logger)
metadata = MetadataCatalog.get(data_name).as_dict()
nr_class = len(metadata["thing_classes"])

In [66]:
controller_std = get_controller(args_std, cfg_std, nr_class, filedir_std, logger)
controller_ens = get_controller(args_ens, cfg_ens, nr_class, filedir_ens, logger)
controller_cqr = get_controller(args_cqr, cfg_cqr, nr_class, filedir_cqr, logger)

In [67]:
control_data_std = io_file.load_tensor(f"{file_name_prefix_std}_control", filedir_std)
test_indices_std = io_file.load_tensor(f"{file_name_prefix_std}_test_idx", filedir_std)
label_data_std = io_file.load_tensor(f"{file_name_prefix_std}_label", filedir_std)

control_data_ens = io_file.load_tensor(f"{file_name_prefix_ens}_control", filedir_ens)
test_indices_ens = io_file.load_tensor(f"{file_name_prefix_ens}_test_idx", filedir_ens)
label_data_ens = io_file.load_tensor(f"{file_name_prefix_ens}_label", filedir_ens)

control_data_cqr = io_file.load_tensor(f"{file_name_prefix_cqr}_control", filedir_cqr)
test_indices_cqr = io_file.load_tensor(f"{file_name_prefix_cqr}_test_idx", filedir_cqr)
label_data_cqr = io_file.load_tensor(f"{file_name_prefix_cqr}_label", filedir_cqr)

# plotting-specific details
fnames = [data_list[i]["file_name"].split("/")[-1][:-4] for i in range(len(data_list))]
channels = cfg.DATASETS.DATASET.CHANNELS # type: ignore

plotdir_std = os.path.join("plots", data_name, file_name_prefix_std)
Path(plotdir_std).mkdir(exist_ok=True, parents=True)
plotdir_ens = os.path.join("plots", data_name, file_name_prefix_ens)
Path(plotdir_ens).mkdir(exist_ok=True, parents=True)
plotdir_cqr = os.path.join("plots", data_name, file_name_prefix_cqr)
Path(plotdir_cqr).mkdir(exist_ok=True, parents=True)
plotdir_log = os.path.join("plots", data_name, "logs")
Path(plotdir_log).mkdir(exist_ok=True, parents=True)
loggy = get_loggy(plotdir_log, "log.txt")

# get metric indices for easy access in loaded files
from evaluation.results_table import _idx_metrics as metr
from evaluation.results_table import _idx_label_metrics as label_metr

In [68]:
def get_pred(args, controller, model, img, img_id, idx, filter_for_class, filter_for_set, class_name, set_name,
             set_idx, control_data, label_data, i, j, metr, label_metr, coco_classes, loggy):

    ### prediction
    loggy.info("+++ Prediction procedure +++")
    pred = controller.raw_prediction(model, img)
    loggy.info(f"Predicted for img {img_id} (idx {idx}) using {controller.__class__}")

    ### filtering
    if filter_for_class:
        img["annotations"] = [anno for anno in img["annotations"] if anno["category_id"] == class_idx]
        loggy.info(f"Filtered for class '{class_name}' only.")
        # gt = annotations_to_instances(img["annotations"], (img["height"], img["width"]))
    elif filter_for_set:
        img["annotations"] = [anno for anno in img["annotations"] if anno["category_id"] in set_idx]
        loggy.info(f"Filtered for classes {set_name} only.")

    ### matching
    gt = annotations_to_instances(img["annotations"], (img["height"], img["width"]))

    (
        gt_box, pred_box, gt_class, pred_class, pred_score, # type:ignore
        pred_score_all, pred_logits_all, matches, _, pred_idx, _
    ) = matching.matching(
        gt.gt_boxes, pred.pred_boxes, gt.gt_classes, pred.pred_classes, pred.scores, 
        pred.scores_all, None,
        controller.box_matching, controller.class_matching, controller.iou_thresh,
        return_idx=True
    )
    loggy.info(f"Performed matching using {controller.box_matching=} and {controller.class_matching=}.")
    loggy.info(f"Missed ground truth objects: {len(gt.gt_classes) - len(pred_idx)}/{len(gt.gt_classes)}.\n")

    ### build matched prediction instance
    pred_match = Instances(pred.image_size)
    pred_match.set("pred_boxes", pred_box)
    pred_match.set("scores", pred_score)
    pred_match.set("pred_classes", pred_class)
    pred_match.set("pred_score_all", pred_score_all)

    if args.risk_control == "ens_conf":
        pred_match.set("unc", pred.unc[pred_idx])
    elif args.risk_control == "cqr_conf":
        pred_lower = pred.get(f"pred_boxes_{controller.q_str[controller.q_idx[0]]}")
        pred_upper = pred.get(f"pred_boxes_{controller.q_str[controller.q_idx[1]]}")
        pred_match.set("pred_lower", pred_lower[pred_idx])
        pred_match.set("pred_upper", pred_upper[pred_idx])

    ### get quantiles for all classes, mean quantile over trials
    box_quant_all = control_data[:, :, i:j, metr["quant"]].mean(dim=0)
    label_quant = label_data[:, :, label_metr["quant"]].mean(dim=0)
    # true box quantiles
    box_quant_true = box_quant_all[gt_class]

    ### get label set
    label_set = controller.label_set_generator.get_pred_set(pred_match.pred_score_all, label_quant)
    label_set = controller.label_set_generator.handle_null_set(pred_match.pred_score_all, label_set)

    loggy.info("+++ Label set procedure +++")
    loggy.info(f"Using method '{args.label_set}'.")
    lab_gt, lab_pred, lab_set = [], [], []
    for i, labels in enumerate(label_set):
        l_gt = coco_classes[gt_class[i]]
        l_pred = coco_classes[pred_class[i]]
        l_set = [coco_classes[l] for l in torch.nonzero(labels, as_tuple=True)[0]]
        loggy.info(f"True class: '{l_gt}' | Pred class: '{l_pred}' | Label set: {l_set}")
        lab_gt.append(l_gt); lab_pred.append(l_pred); lab_set.append(l_set)

    ### get box set quantiles
    loggy.info(f"Box quantile selection strategy: {controller.label_set_generator.box_set_strategy}.")
    box_quant, box_quant_idx = classifier_sets.box_set_strategy(label_set, box_quant_all, controller.label_set_generator.box_set_strategy)
    
    b = box_quant_idx.tolist()
    l_box_quant = [["class" for _ in range(4)] for _ in range(len(b))]
    for bi, bv in enumerate(b):
        for bj, bv2 in enumerate(bv):
            l_box_quant[bi][bj] = lab_set[bi][bv2] 
    loggy.info(f"Selected quantiles: {l_box_quant}")

    return gt, pred_match, box_quant, box_quant_true, lab_gt, lab_pred, lab_set

In [69]:
print(coco_classes)
sel_coco_classes

In [73]:
### params
plot_name = "000000054593"  # The specific COCO image ID you want to use
class_name = "person"  # Filter for this class
set_name = []  # Not filtering for a set of classes
i, j = 0, 4  # Desired score indices
filter_for_class = True  # Filter for class_name
filter_for_set = False  # Not filtering for a set
device = "cpu"  # Force CPU usage

###
# Move all tensors to CPU
control_data_std = control_data_std.to(device)
control_data_ens = control_data_ens.to(device)
control_data_cqr = control_data_cqr.to(device)
test_indices_std = test_indices_std.to(device)

if filter_for_class:
    class_idx = metadata["thing_classes"].index(class_name)
    cn = class_name.replace(" ", "")  # Remove whitespace
    # Select best trial idx for nice viz
    trial_idx_std = torch.argmin(control_data_std[:, class_idx, i:j, metr["mpiw"]].mean(-1))
    trial_idx_ens = torch.argmin(control_data_ens[:, class_idx, i:j, metr["mpiw"]].mean(-1))
    trial_idx_cqr = torch.argmin(control_data_cqr[:, class_idx, i:j, metr["mpiw"]].mean(-1))
    # Test img indices for that trial idx where class exists
    indices = torch.nonzero(test_indices_std[trial_idx_std, class_idx], as_tuple=True)[0].to(torch.float32)
    set_idx = []
elif filter_for_set:
    set_idx = [metadata["thing_classes"].index(n) for n in set_name]
    cn = "set"
else:
    cn = "all"

### Find the index of the specific image
# Instead of iterating through the whole dataset, let's use a more direct approach
# If you know the image ID is 000000054593, you might be able to use a more direct method
# For COCO dataset, the index might be directly related to the image ID
target_idx = 477  # You mentioned this was the index in your error message

# Set the index to the specific image
idx = torch.tensor([target_idx], device=device)

### Select img
img = dataloader.dataset.__getitem__(idx)
img_id = os.path.splitext(os.path.basename(img["file_name"]))[0]

fname_log = f"all_{args_std.label_set}_{cn}_idx{idx.numpy()[0]}_img{img_id}.log"
update_log_path(loggy, os.path.join(plotdir_log, fname_log))

# Make sure the models are on CPU
model_std.to(device)
model_ens.to(device)
model_cqr.to(device)

# Define a wrapper for get_pred to ensure everything stays on CPU
def get_pred_cpu(*args, **kwargs):
    # Move any tensor inputs to CPU
    new_args = []
    for arg in args:
        if isinstance(arg, torch.Tensor):
            new_args.append(arg.to(device))
        else:
            new_args.append(arg)
    
    # Call the original function
    results = get_pred(*new_args, **kwargs)
    
    # Ensure all tensor outputs are on CPU
    cpu_results = []
    for res in results:
        if isinstance(res, torch.Tensor):
            cpu_results.append(res.to(device))
        else:
            cpu_results.append(res)
    
    return tuple(cpu_results)

### Prediction
loggy.info(f"------ Method: {args_std.risk_control} ------")
gt_std, pred_match_std, box_quant_std, box_quant_true_std, lab_gt_std, lab_pred_std, lab_set_std = get_pred_cpu(
    args_std, controller_std, model_std, img, img_id, idx, filter_for_class, filter_for_set, class_name, set_name, set_idx,
    control_data_std, label_data_std, i, j, metr, label_metr, coco_classes, loggy 
)

loggy.info(f"\n------ Method: {args_ens.risk_control} ------")
gt_ens, pred_match_ens, box_quant_ens, box_quant_true_ens, lab_gt_ens, lab_pred_ens, lab_set_ens = get_pred_cpu(
    args_ens, controller_ens, model_ens, img, img_id, idx, filter_for_class, filter_for_set, class_name, set_name, set_idx,
    control_data_ens, label_data_ens, i, j, metr, label_metr, coco_classes, loggy 
)

loggy.info(f"\n------ Method: {args_cqr.risk_control} ------")
gt_cqr, pred_match_cqr, box_quant_cqr, box_quant_true_cqr, lab_gt_cqr, lab_pred_cqr, lab_set_cqr = get_pred_cpu(
    args_cqr, controller_cqr, model_cqr, img, img_id, idx, filter_for_class, filter_for_set, class_name, set_name, set_idx,
    control_data_cqr, label_data_cqr, i, j, metr, label_metr, coco_classes, loggy 
)

In [74]:
# to_file = True
to_file = False

fname_std = f"{args_std.risk_control}_{args_std.label_set}_{cn}_idx{idx.numpy()[0]}_img{img_id}.jpg"
fname_ens = f"{args_ens.risk_control}_{args_ens.label_set}_{cn}_idx{idx.numpy()[0]}_img{img_id}.jpg"
fname_cqr = f"{args_cqr.risk_control}_{args_cqr.label_set}_{cn}_idx{idx.numpy()[0]}_img{img_id}.jpg"

print(f"FIG 1.1: Label set quant; {args_std.risk_control} - {args_std.label_set}\n")
plot_util.d2_plot_pi(args_std.risk_control, img, gt_std.gt_boxes, pred_match_std, box_quant_std,
                     channels, draw_labels=[], 
                     colors=["red", "green", "palegreen"], alpha=[1.0, 0.6, 0.4],
                     lw=1.5, notebook=True, to_file=to_file,
                     filename=os.path.join(plotdir_std, fname_std),
                     label_gt=lab_gt_std, label_set=lab_set_std)

print(f"FIG 1.2: Label set quant; {args_ens.risk_control} - {args_ens.label_set}\n")
plot_util.d2_plot_pi(args_ens.risk_control, img, gt_ens.gt_boxes, pred_match_ens, box_quant_ens,
                     channels, draw_labels=[], 
                     colors=["red", "green", "palegreen"], alpha=[1.0, 0.6, 0.4],
                     lw=1.5, notebook=True, to_file=to_file,
                     filename=os.path.join(plotdir_ens, fname_ens),
                     label_gt=lab_gt_ens, label_set=lab_set_ens)

print(f"FIG 1.3: Label set quant; {args_cqr.risk_control} - {args_cqr.label_set}\n")
plot_util.d2_plot_pi(args_cqr.risk_control, img, gt_cqr.gt_boxes, pred_match_cqr, box_quant_cqr,
                     channels, draw_labels=[], 
                     colors=["red", "green", "palegreen"], alpha=[1.0, 0.6, 0.4],
                     lw=1.5, notebook=True, to_file=to_file,
                     filename=os.path.join(plotdir_cqr, fname_cqr),
                     label_gt=lab_gt_cqr, label_set=lab_set_cqr)


In [None]:
# to_file = True
# to_file = False

fname_std = f"{args_std.risk_control}_oracle_{cn}_idx{idx.numpy()[0]}_img{img_id}.jpg"
fname_ens = f"{args_ens.risk_control}_oracle_{cn}_idx{idx.numpy()[0]}_img{img_id}.jpg"
fname_cqr = f"{args_cqr.risk_control}_oracle_{cn}_idx{idx.numpy()[0]}_img{img_id}.jpg"

print(f"FIG 2.1: Oracle; {args_std.risk_control}\n")
plot_util.d2_plot_pi(args_std.risk_control, img, gt_std.gt_boxes, pred_match_std, box_quant_true_std,
                     channels, draw_labels=[], 
                     colors=["red", "green", "palegreen"], alpha=[1.0, 0.6, 0.4],
                     lw=1.5, notebook=True, to_file=to_file,
                     filename=os.path.join(plotdir_std, fname_std),
                     label_gt=lab_gt_std, label_set=lab_set_std)

print(f"FIG 2.2: Oracle; {args_ens.risk_control}\n")
plot_util.d2_plot_pi(args_ens.risk_control, img, gt_ens.gt_boxes, pred_match_ens, box_quant_true_ens,
                     channels, draw_labels=[], 
                     colors=["red", "green", "palegreen"], alpha=[1.0, 0.6, 0.4],
                     lw=1.5, notebook=True, to_file=to_file,
                     filename=os.path.join(plotdir_ens, fname_ens),
                     label_gt=lab_gt_ens, label_set=lab_set_ens)

print(f"FIG 2.3: Oracle; {args_cqr.risk_control}\n")
plot_util.d2_plot_pi(args_cqr.risk_control, img, gt_cqr.gt_boxes, pred_match_cqr, box_quant_true_cqr,
                     channels, draw_labels=[], 
                     colors=["red", "green", "palegreen"], alpha=[1.0, 0.6, 0.4],
                     lw=1.5, notebook=True, to_file=to_file,
                     filename=os.path.join(plotdir_cqr, fname_cqr),
                     label_gt=lab_gt_cqr, label_set=lab_set_cqr)


 #### Plot: empirical coverage histogram over nr. of trials

In [39]:
### params
class_name = "car"
i, j = 0, 4 # desired score indices
alpha = 0.1 # miscoverage
###

n = 1000 # calibration samples
a, b = n + 1 - np.floor((n+1)*alpha), np.floor((n+1)*alpha) # beta shape params
x = np.linspace(beta.ppf(0, a, b), beta.ppf(1, a, b), 1000)

class_idx = metadata["thing_classes"].index(class_name)
cn = class_name.replace(" ", "") # remove whitespace

In [None]:
# coordinate coverage for all trials
cover = control_data[:, class_idx, i:j, metr["cov_coord"]]

fig, axes = plt.subplots(2, 2, figsize=(5, 3))
labs = ["x0", "y0", "x1", "y1"]

for i, ax in enumerate(axes.flat):
    cov = cover[:, i]
    ax.hist(cov.numpy(), bins=30, alpha=0.5, range=(0.85, 1.0),
            color="blue", density=True,
            label=r"Emp. coverage, $\bar{x}$ = " + f"{cov.mean():.3f}")
    ax.plot(x, beta.pdf(x, a, b), color="red", alpha=0.8,
            label = f"Nom. Beta fit, {n} samp")
            # label=f"Beta({int(a)},{int(b)})")
    ax.axvline(x=1-alpha, color="black", ls=":", label=r"Nom. coverage 1-$\alpha$")
    ax.set_xlim(0.85, 1.0)
    ax.legend(loc="upper left", fontsize="small") # large
    ax.set_ylabel("Density", fontsize="small")
    ax.set_xlabel("Coverage level", fontsize="small")
    ax.set_title(f"Coord. {labs[i]}", fontsize="small")
fig.suptitle(f"Class: {class_name}, Coverage histogram over nr. of trials", 
             y=.97, x=0.5, fontsize="medium")
fig.tight_layout()

# save_fig(plotdir, f"{class_name}_emp_coord_cov_hist")

In [None]:
# box coverage for all trials
cov = control_data[:, class_idx, i, metr["cov_box"]]

fig, ax = plt.subplots(1, 1, figsize=(5, 3))
ax.hist(cov.numpy(), bins=30, alpha=0.5, range=(0.8, 1.0),
        color="blue", density=True,
        label=r"Emp. coverage, $\bar{x}$ = " + f"{cov.mean():.3f}")
ax.plot(x, beta.pdf(x, a, b), color="red", alpha=0.8,
        label = f"Nom. Beta fit, {n} samp")
        # label=f"Beta({int(a)},{int(b)})")
ax.axvline(x=1-alpha, color="black", ls=":", label=r"Nom. coverage 1-$\alpha$")
ax.set_xlim(0.84, 0.96)
ax.legend(loc="upper left", fontsize="small") # large
ax.set_ylabel("Density", fontsize="small")
ax.set_xlabel("Coverage level", fontsize="small")
ax.set_title(f"Class: {class_name}, Box coverage histogram over nr. of trials", fontsize="small")
fig.tight_layout()

# save_fig(plotdir, f"{class_name}_emp_box_cov_hist")

#### Plot: Beta distribution for given calibration set size

In [None]:
alpha = 0.1
eps = 0.03
ql, qh = 0.01, 0.99
qs = []

calib_sizes = [930, 3100, 56000]
dataset = ["COCO", "Cityscapes", "BDD100k"]
colors = ["#a7c957", "#219EBC", "#E63946"]

fig, ax = plt.subplots(figsize=(4.2, 2.2))

for i, n in enumerate(calib_sizes):
  
  # compute cov beta distr
  l = np.floor((n+1)*alpha)
  a = n + 1 - l
  b = l
  x = np.linspace(1-alpha-eps, 1-alpha+eps, 10000)
  rv = beta(a, b)
  
  # compute beta quantile
  q_low = rv.ppf(ql)
  q_high = rv.ppf(qh)
  ax.vlines(q_low, ymin=0, ymax=rv.pdf(q_low), lw=1.5, color=colors[i])
  ax.vlines(q_high, ymin=0, ymax=rv.pdf(q_high), lw=1.5, color=colors[i])
  
  # plot with two-line legend
  ax.plot(x, rv.pdf(x), lw=1.5, label=f'{dataset[i]} (n = {n})', color=colors[i])
  ax.plot([], [], alpha=0, label=rf'$Q_{{{ql}}}$ = {q_low:.3f}, $Q_{{{qh}}}$ = {q_high:.3f}')
  ax.fill_between(x, rv.pdf(x), where = (x >= q_low) & (x <= q_high), alpha=0.2, color=colors[i], interpolate=True)
  
  qs.append((f"{q_low:.3f}", f"{q_high:.3f}"))

ax.vlines(1-alpha, ymin=0, ymax=ax.get_ylim()[1], lw=1.5, ls='--', label=r'Target coverage ($1 - \alpha_B$)', color="black")

ax.set_xlabel(r'Coverage', fontsize=12)
ax.set_ylabel(r'Density', fontsize=12)
# ax.set_title(r'Nominal coverage distribution', fontsize=14)
ax.set_ylim(0, 320)
ax.set_xlim(1-alpha-eps, 1-alpha+eps)

leg = ax.legend(loc="upper left", fontsize=7)
leg.get_frame().set_facecolor("white")
# leg.get_frame().set_edgecolor("black")
leg.get_frame().set_alpha(1.0)

plt.xticks(fontsize=7)
plt.yticks(fontsize=7)
plt.tight_layout()
# save_fig(f"plots/calib_size_cov_distr_box")
plt.show()

print(f"Quantiles for target coverage {1 - alpha}: \nq_low={ql}, q_high={qh} \n{qs}")

In [None]:
alpha = 0.01
eps = 0.01
ql, qh = 0.01, 0.99
qs = []

calib_sizes = [930, 3100, 56000]
dataset = ["COCO", "Cityscapes", "BDD100k"]
colors = ["#a7c957", "#219EBC", "#E63946"]

fig, ax = plt.subplots(figsize=(4.2, 2.2))

for i, n in enumerate(calib_sizes):
  
  # compute cov beta distr
  l = np.floor((n+1)*alpha)
  a = n + 1 - l
  b = l
  x = np.linspace(1-alpha-eps, 1-alpha+eps, 10000)
  rv = beta(a, b)
  
  # compute beta quantile
  q_low = rv.ppf(ql)
  q_high = rv.ppf(qh)
  ax.vlines(q_low, ymin=0, ymax=rv.pdf(q_low), lw=1.5, color=colors[i])
  ax.vlines(q_high, ymin=0, ymax=rv.pdf(q_high), lw=1.5, color=colors[i])
  
  # plot with two-line legend
  ax.plot(x, rv.pdf(x), lw=1.5, label=f'{dataset[i]} (n = {n})', color=colors[i])
  ax.plot([], [], alpha=0, label=rf'$Q_{{{ql}}}$ = {q_low:.3f}, $Q_{{{qh}}}$ = {q_high:.3f}')
  ax.fill_between(x, rv.pdf(x), where = (x >= q_low) & (x <= q_high), alpha=0.2, color=colors[i], interpolate=True)
  
  qs.append((f"{q_low:.3f}", f"{q_high:.3f}"))

ax.vlines(1-alpha, ymin=0, ymax=ax.get_ylim()[1], lw=1.5, ls='--', label=r'Target coverage ($1 - \alpha_L$)', color="black")

ax.set_xlabel(r'Coverage', fontsize=12)
ax.set_ylabel(r'Density', fontsize=12)
# ax.set_title(r'Nominal coverage distribution', fontsize=14)
ax.set_ylim(0, 960)
ax.set_xlim(1-alpha-eps, 1-alpha+eps)

leg = ax.legend(loc="upper left", fontsize=7)
leg.get_frame().set_facecolor("white")
# leg.get_frame().set_edgecolor("black")
leg.get_frame().set_alpha(1.0)

plt.xticks(fontsize=7)
plt.yticks(fontsize=7)
plt.tight_layout()
# save_fig(f"plots/calib_size_cov_distr_labels")
plt.show()

print(f"Quantiles for target coverage {1 - alpha}: \nq_low={ql}, q_high={qh} \n{qs}")

--- compute exact calibration set sizes required for desired condition

In [None]:
# see https://github.com/aangelopoulos/conformal-prediction/blob/main/notebooks/correctness_checks.ipynb

alpha = 0.1
epsilons = [0.0165, 0.0089, 0.00209]

for epsilon in epsilons:
  def _condition(n):
    l = np.floor((n+1)*alpha)
    a = n + 1 - l
    b = l
    if (beta.ppf(0.05, a, b) < 1-alpha-epsilon) or (beta.ppf(0.95, a, b) > 1-alpha+epsilon):
      return -1
    else:
      return 1

  print(int(np.ceil(brentq(_condition,np.ceil(1/alpha),100000000000))))

 #### Plot: Main results, efficiency vs. coverage

Label set names
- match: box metrics for calibration samples with class matching and correct quantile selection (original), no label set procedure
- plain: box metrics for calibration samples without class matching but with correct quantile selection, no label set procedure
- full: box metrics for calibration samples without class matching and for full label sets
- top: box metrics for calibration samples without class matching and for top label sets
- oracle: box metrics for calibration samples without class matching and for density level label sets 
- class: box metrics for calibration samples without class matching and for class label sets (per-class thresholding)

Renaming (in line with the paper):
- Match --> OracleMatch (not reported in the paper)
- Plain --> Oracle
- Oracle --> Naive
- Class --> ClassThr

--- For box set metrics

In [None]:
# only for single type at a time
t = "rank" # rank, bonf 

# datasets
datasets = ["coco_val", "cityscapes", "bdd100k_train"]

res_folder = f"results/results_selected_{t}"
plot_folder = "plots/results_selected"
Path(plot_folder).mkdir(exist_ok=True, parents=True)

label_paths = []
box_paths = []
for d in datasets:
    label_paths.append(f"{res_folder}/{d}_res_{t}_label_table.csv")
    box_paths.append(f"{res_folder}/{d}_res_{t}_box_set_table.csv")

print("Loading results from:", res_folder, "\n", "Plotting figures to:", plot_folder)
print("Label result files:", label_paths, "\n", "Box result files:", box_paths)

In [43]:
# Names
conf_str = ["Box-Std", "Box-Ens", "Box-CQR"]
label_str = ["Full", "Top", "Naive", "ClassThr"]
label_plain = "Oracle"
label_match = "OracleMatch"

# for latex rendering
dataset_str = {"coco_val": "cocoval", "cityscapes": "cityscapes", "bdd100k_train": "bdd100ktrain"}

# Emp coverage
alpha = 0.9
# 1 and 99 percent quantiles of empirical beta distr based on calibration set size (see above)
emp_cov_lim = {"coco_val": (0.876, 0.922), "cityscapes": (0.887, 0.912), "bdd100k_train": (0.897, 0.903)}

# Plotting 
markers = {label_plain: "o", label_str[0]: "s", label_str[1]: "^", label_str[2]: "P", label_str[3]: "*"}

# colors = {conf_str[0]: "red", conf_str[1]: "green", conf_str[2]: "cornflowerblue"}
# colors = {conf_str[0]: "#BC4749", conf_str[1]: "#A7C957", conf_str[2]: "#386641"}
colors = {conf_str[0]: "#E63946", conf_str[1]: "#219EBC", conf_str[2]: "#023047"}

xaxis_lim = {"coco_val": (0.87, 1.003), "cityscapes": (0.878, 1.003), "bdd100k_train": (0.878, 1.003)}
yaxis_lim = {"coco_val": (40, 260), "cityscapes": (45, 220), "bdd100k_train": (30, 230)}
yaxis_ticks = {
    "coco_val": [50, 75, 100, 125, 150, 175, 200, 225, 250],
    "cityscapes": [50, 75, 100, 125, 150, 175, 200],
    "bdd100k_train": [50, 75, 100, 125, 150, 175, 200, 225]
}
figheight = {"coco_val": 1.85, "cityscapes": 1.85, "bdd100k_train": 2.0}

In [44]:
idx = 1 
i, path = idx, box_paths[idx]

In [None]:
# for i, path in enumerate(box_paths):

df = pd.read_csv(box_paths[i])
df = df[df["label"] != label_match]

conf = df["conf"].to_list()
label = df["label"].to_list()
mpiw = df["mpiw"].to_list()
cov = df["cov box"].to_list()

fig, ax = plt.subplots(figsize=(4.5, figheight[datasets[i]]))
leg_mark, leg_name = [], []

for j, c in enumerate(conf):
    a = ax.scatter(cov[j], mpiw[j], marker=markers[label[j]], color=colors[conf[j]], linewidth=1, s=48)
    leg_mark.append([a])
    leg_name.append([f"{conf[j]}, {label[j]}"])

leg_mark = np.array(leg_mark).flatten()
leg_name = np.array(leg_name).flatten()

# first legend outside plot
# leg = ax.legend(leg_mark, leg_name, ncol=3, loc="center", bbox_to_anchor=(0.5, 1.35), fontsize=8) # legend below plot
# # leg = ax.legend(leg_mark, leg_name, ncol=3, loc="upper left") # legend inside plot
# leg.get_frame().set_facecolor("white")
# leg.get_frame().set_edgecolor("black")
# leg.get_frame().set_alpha(1.0)
# ax.add_artist(leg)

# target cov
ax.axvline(x=alpha, color="black", linestyle="--", label=r'Target coverage')
liml, limh = emp_cov_lim[datasets[i]]
ax.axvspan(liml, limh, alpha=0.2, color="grey")
# ax.axvline(x=liml, color="grey", linestyle="--", label=r'Coverage distr. $Q_{0.01}$')
# ax.axvline(x=limh, color="grey", linestyle="--", label=r'Coverage distr. $Q_{0.99}$')

ax.set_ylabel(r'MPIW', fontsize=14)
ax.set_xlabel(r'Box coverage', fontsize=14)
ax.yaxis.set_major_locator(FixedLocator(yaxis_ticks[datasets[i]]))
ax.xaxis.grid(False, which="major")
ax.set_xlim(xaxis_lim[datasets[i]])
ax.set_ylim(yaxis_lim[datasets[i]])
plt.xticks(fontsize=8)
plt.yticks(fontsize=8)
plt.tight_layout()

# save_fig(f"{plot_folder}/{datasets[i]}_{t}_box_set")

plt.show()
plt.close()


--- For label set metrics

In [46]:
# Coverage
alpha_lab = 0.99
# 1 and 99 percent quantiles of empirical beta distr based on calibration set size (see above)
emp_cov_lim_lab = {"coco_val": (0.981, 0.996), "cityscapes": (0.985, 0.994), "bdd100k_train": (0.989, 0.991)}

# Plotting
markers_lab = {label_str[0]: "s", label_str[1]: "^", label_str[2]: "P", label_str[3]: "*"}

xaxis_lim = {"coco_val": (0.913, 1.003), "cityscapes": (0.915, 1.003), "bdd100k_train": (0.819, 1.006)}
xaxis_ticks = {
    "coco_val": [0.92, 0.94, 0.96, 0.98, 1.00],
    "cityscapes": [0.92, 0.94, 0.96, 0.98, 1.00],
    "bdd100k_train": [0.82, 0.84, 0.86, 0.88, 0.90, 0.92, 0.94, 0.96, 0.98, 1.00]
}
figheight = {"coco_val": 1.7, "cityscapes": 1.7, "bdd100k_train": 1.7}

In [47]:
idx = 2 
i, path = idx, label_paths[idx]

plot with y-axis break

In [None]:
# for i, path in enumerate(label_paths):

df = pd.read_csv(label_paths[i])

conf = df["conf"].to_list()
label = df["label"].to_list()
mss = df["mean set size"].to_list()
cov = df["cov set"].to_list()

# jitter coverage to visualize in plot
cov_jit = []
for ji, v in enumerate(cov):
    if conf[ji] == "Box-CQR":
        jv = v-0.001
    elif conf[ji] == "Box-Std" and label[ji] == "Full":
        jv = v+0.001
    else:
        jv = v
    cov_jit.append(jv)
cov = cov_jit

fig = plt.figure(figsize=(4.5, figheight[datasets[i]]))
gs = matplotlib.gridspec.GridSpec(2, 1, height_ratios=[1, 4], hspace=0.15)
ax1 = fig.add_subplot(gs[0]) # Top plot
ax2 = fig.add_subplot(gs[1]) # Bottom plot

leg_mark, leg_name = [], []

for j, c in enumerate(conf):
    if label[j] == "Full": 
        # Top plot
        a = ax1.scatter(cov[j], mss[j], marker=markers_lab[label[j]], color=colors[conf[j]], linewidth=1, s=48)
    else: 
        # Bottom plot
        a = ax2.scatter(cov[j], mss[j], marker=markers_lab[label[j]], color=colors[conf[j]], linewidth=1, s=48)
    
    leg_mark.append([a])
    leg_name.append([f"{conf[j]}, {label[j]}"])

# Draw break
d = .012
kwargs = dict(transform=ax1.transAxes, color='k', clip_on=False)
ax1.plot((-d, +d), (-d, +d), **kwargs)        # top-left diagonal
ax1.plot((1 - d, 1 + d), (-d, +d), **kwargs)  # top-right diagonal
kwargs.update(transform=ax2.transAxes)  # switch to the bottom subplot's coordinate system
ax2.plot((-d, +d), (1 - d, 1 + d), **kwargs)  # bottom-left diagonal
ax2.plot((1 - d, 1 + d), (1 - d, 1 + d), **kwargs)  # bottom-right diagonal

# target cov
ax1.axvline(x=alpha_lab, color="black", linestyle="--", label=r'Target coverage')
ax2.axvline(x=alpha_lab, color="black", linestyle="--", label=r'Target coverage')     
# emp cov limits
liml, limh = emp_cov_lim_lab[datasets[i]]
ax1.axvspan(liml, limh, alpha=0.2, color="grey")
ax2.axvspan(liml, limh, alpha=0.2, color="grey")
# ax1.axvline(x=liml, color="grey", linestyle="--", label=r'Coverage distr. $Q_{0.01}$')
# ax1.axvline(x=limh, color="grey", linestyle="--", label=r'Coverage distr. $Q_{0.99}$')

# Legend for cov inside plot
# leg2 = ax2.legend(fontsize=8)
# leg2.get_frame().set_facecolor("white")
# leg2.get_frame().set_alpha(1.0)
# ax2.add_artist(leg2)

# Legend
# leg_mark = np.array(leg_mark).flatten()
# leg_name = np.array(leg_name).flatten()
# leg = ax2.legend(leg_mark, leg_name, ncol=3, loc="center", bbox_to_anchor=(0.5, -1.15), fontsize=8) # legend below plot
# # leg = ax1.legend(leg_mark, leg_name, ncol=3, loc="upper left") # legend inside plot
# leg.get_frame().set_facecolor("white")
# leg.get_frame().set_edgecolor("black")
# leg.get_frame().set_alpha(1.0)

ax1.spines['bottom'].set_color("lightgray")
ax1.spines['bottom'].set_alpha(0.5)
ax2.spines['top'].set_color("lightgray")
ax2.spines['top'].set_alpha(0.5)

ax1.xaxis.set_ticks_position('none')
ax1.set_xticklabels([])
ax2.set_xlim(xaxis_lim[datasets[i]])
ax2.set_xticks(xaxis_ticks[datasets[i]])
ax1.set_xlim(ax2.get_xlim()) # align top x-axis with bottom one

ax1.set_yticks([80])
ax1.set_yticklabels([80], fontsize=8)
ax2.set_yticks([1, 2, 3, 4])
ax2.set_yticklabels([1, 2, 3, 4], fontsize=8)
ax2.set_ylim(0.7, 4)

fig.text(0.03, 0.5, r'Mean set size', ha='center', va='center', rotation='vertical', fontsize=14)
ax2.set_xlabel(r'Label coverage', fontsize=14)
ax1.xaxis.grid(False, which="major")
ax2.xaxis.grid(False, which="major")
plt.xticks(fontsize=8)
plt.tight_layout()

# save_fig(f"{plot_folder}/{datasets[i]}_{t}_label")

plt.show()
plt.close()


#### Plot: ClassThr and Naive vs. calibration

In [2]:
# Naive label set
filedir = "output/coco_val/std_conf_x101fpn_std_rank_oracle_temp"
df_box_set = pd.read_csv(f'{filedir}/box_metrics_per_temp.csv')
df_label_set = pd.read_csv(f'{filedir}/label_metrics_per_temp.csv')

# Class threshold label set
filedir_cl = "output/coco_val/std_conf_x101fpn_std_rank_class_temp"
df_box_set_cl = pd.read_csv(f'{filedir_cl}/box_metrics_per_temp.csv')
df_label_set_cl = pd.read_csv(f'{filedir_cl}/label_metrics_per_temp.csv')

# ECE vs. temperature (same for naive and class)
filedir_ece = "output/coco_val/std_conf_x101fpn_std_rank_class_temp"
df_ece_per_temp = pd.read_csv(f'{filedir_ece}/ece_per_temp.csv')

In [3]:
box_cols = ["mpiw", "box stretch", "cov box"]
label_cols = ["mean set size", "cov set"]

metr_str = {"mpiw": "MPIW", 
            "box stretch": "Box stretch", 
            "cov box": "Box coverage", 
            "mean set size": "Mean set size", 
            "cov set": "Label coverage"}

In [164]:
# LABEL SET
df1 = df_label_set
df2 = df_label_set_cl
metric = "cov set"
metric2 = "mean set size"
save_str = "label_metrics"
cover = 0.99

# BOX SET
# df1 = df_box_set
# df2 = df_box_set_cl
# metric = "cov box"
# metric2 = "mpiw"
# save_str = "box_metrics"
# cover = 0.9

metric_col = "#E63946"
metric2_col = "#219EBC" #"#023047" #"#3D9970" #"#219EBC"

In [None]:
# Values
ece = df1["ece"].to_numpy()
metr = df1[metric].to_numpy()
metr2 = df1[metric2].to_numpy()
metr_cl = df2[metric].to_numpy()
metr2_cl = df2[metric2].to_numpy()

temp = df1["temperature"].to_numpy()
temp_val = 1.3 # 1.0
idx_temp = df1.loc[df1['temperature'] == temp_val].index[0]
ece_at_temp = df_ece_per_temp[df_ece_per_temp["temperature"] == temp_val]["ece"].item()

ece_overconf = ece[:idx_temp+1][::-1]
metr_overconf = metr[:idx_temp+1][::-1]
metr2_overconf = metr2[:idx_temp+1][::-1]
metr_overconf_cl = metr_cl[:idx_temp+1][::-1]
metr2_overconf_cl = metr2_cl[:idx_temp+1][::-1]

ece_underconf = ece[idx_temp:]
metr_underconf = metr[idx_temp:]
metr2_underconf = metr2[idx_temp:]
metr_underconf_cl = metr_cl[idx_temp:]
metr2_underconf_cl = metr2_cl[idx_temp:]

# Figure
fig = plt.figure(figsize=(5, 4))
gs = gridspec.GridSpec(2, 2, height_ratios=[1, 4])
ax0 = fig.add_subplot(gs[0, :])
ax1 = fig.add_subplot(gs[1, 0])
ax2 = fig.add_subplot(gs[1, 1])

# Plot 0
ax0.plot(df_ece_per_temp["temperature"], df_ece_per_temp["ece"], color="#E63946", ls="-", marker='o', alpha=0.8)
ax0.set_xlabel('Temperature', fontsize=12, labelpad=-4)
ax0.set_ylabel('ECE', fontsize=12)
ax0.set_ylim(-0.1, 1.1)
ax0.set_xscale('log')
ax0.axvline(x=temp_val, color="black", ls=":", lw=2, label=rf'Opt. confidence ($T^*={temp_val}$, $ECE={round(ece_at_temp*100, 2)}\%$)')
ax0.legend()
ax0.text(0.17, 0.5, r'$\leftarrow$ +Overconfidence', fontsize=8, va='center', color="black", weight="extra bold")
ax0.text(1.47, 0.5, r'+Underconfidence $\rightarrow$', fontsize=8, va='center', color="black", weight="extra bold")

# Plot 1
ax1.plot(ece_overconf, metr_overconf, ls="-", marker='o', color=metric_col, alpha=0.8, label="Naive")
ax1.plot(ece_overconf, metr_overconf_cl, ls="--", marker='s', color=metric_col, alpha=0.8, label="ClassThr")
ax1.axvline(x=ece[idx_temp], color="black", ls="-.", lw=1.5, label=rf'ECE at $T^*={temp_val}$')
ax1.axhline(y=cover, color="black", ls=":", lw=1.5, label=rf'Target cov. ($1-\alpha$)')
ax1.set_xlabel(r'+Overconfidence $\rightarrow$', fontsize=12)
ax1.set_ylabel(f'{metr_str[metric]}', fontsize=12, color=metric_col)

# Plot 1 - twin axis
ax1_x2 = ax1.twinx()
ax1_x2.plot(ece_overconf, metr2_overconf, ls="-", marker='o', color=metric2_col, alpha=0.8)
ax1_x2.plot(ece_overconf, metr2_overconf_cl, ls="--", marker='s', color=metric2_col, alpha=0.8)
ax1_x2.grid(False)
ax1_x2.set_ylim(-1.5, 81.5)

# Plot 2
ax2.plot(ece_underconf, metr_underconf, ls="-", marker='o', color=metric_col, alpha=0.8, label="Naive")
ax2.plot(ece_underconf, metr_underconf_cl, ls="--", marker='s', color=metric_col, alpha=0.8, label="Naive")
ax2.axvline(x=ece[idx_temp], color="black", ls="-.", lw=1.5, label=rf'ECE at $T^*={temp_val}$')
ax2.axhline(y=cover, color="black", ls=":", lw=1.5, label=rf'Target cov. ($1-\alpha$)')
ax2.set_xlabel(r'+Underconfidence $\rightarrow$', fontsize=12)
ax2.set_xscale('log')

# Plot 2 - twin axis
ax2_x2 = ax2.twinx() # create a second y-axis on the right side of the plot
ax2_x2.plot(ece_underconf, metr2_underconf, ls="-", marker='o', color=metric2_col, alpha=0.8)
ax2_x2.plot(ece_underconf, metr2_underconf_cl, ls="--", marker='s', color=metric2_col, alpha=0.8)
ax2_x2.set_ylabel(f'{metr_str[metric2]}', fontsize=12, color=metric2_col)
ax2_x2.grid(False)
ax2_x2.set_ylim(-1.5, 81.5)

# Plot 1 - custom legend
ax1naive = mlines.Line2D([], [], color='grey', ls="-", marker='o', label='Naive')
ax1class = mlines.Line2D([], [], color='grey', ls="--", marker='s', label='ClassThr')
ax1vline = mlines.Line2D([], [], color='black', ls="-.", lw=1, label='Opt. confidence')
ax1hline = mlines.Line2D([], [], color='black', ls=":", lw=1, label='Target coverage')
leg1 = ax1_x2.legend(handles=[ax1naive, ax1class, ax1hline, ax1vline], loc="center")
leg1.get_frame().set_facecolor("white")
leg1.get_frame().set_alpha(1.0)

# Plot 2 - custom legend
# leg2 = ax2.legend(fontsize=7)
# ax2naive = mlines.Line2D([], [], color='grey', marker='o', ls="-", label='Naive')
# ax2class = mlines.Line2D([], [], color='grey', marker='o', ls="--", label='ClassThr')
# ax2vline = mlines.Line2D([], [], color='black', ls=":", label=rf'ECE at $T^*={temp_val}$')
# leg2 = ax2.legend(handles=[ax2naive, ax2class, ax2vline])
# leg2.get_frame().set_facecolor("white")
# leg2.get_frame().set_alpha(1.0)

# Figure
# fig.suptitle('Label set strategy vs. calibration', fontsize=14)
plt.xticks(fontsize=7)
plt.yticks(fontsize=7)
plt.tight_layout()
# save_fig(f"plots/{save_str}_vs_ece")
plt.show()


#### Plot: Coverage (stratified) across methods, MPIW across methods; violin plots

In [176]:
dataset = "cityscapes"
classes = list(util.get_selected_coco_classes().values())
i, j = 0, 4 # score indices

plot_dir = "plots/violins"
Path(plot_dir).mkdir(exist_ok=True, parents=True)

emp_cov_lim = {"coco_val": (0.876, 0.922), "cityscapes": (0.887, 0.912), "bdd100k_train": (0.897, 0.903)}

In [None]:
data_std = io_file.load_tensor("std_conf_x101fpn_std_rank_control", f"output/{dataset}/std_conf_x101fpn_std_rank")
data_ens = io_file.load_tensor("ens_conf_x101fpn_ens_rank_control", f"output/{dataset}/ens_conf_x101fpn_ens_rank")
data_cqr = io_file.load_tensor("cqr_conf_x101fpn_cqr_rank_control", f"output/{dataset}/cqr_conf_x101fpn_cqr_rank")

In [None]:
results_table._idx_metrics

In [None]:
# sanity check
print("Cov | Cov_S | Cov_M | Cov_L")
print(data_std[:, classes, i:j].mean(dim=(1,2))[:, 5:9].mean(dim=0))
print(data_ens[:, classes, i:j].mean(dim=(1,2))[:, 5:9].mean(dim=0))
print(data_cqr[:, classes, i:j].mean(dim=(1,2))[:, 5:9].mean(dim=0))

print("\nMPIW")
print(data_std[:, classes, i:j].mean(dim=(1,2))[:, 2].mean(dim=0))
print(data_ens[:, classes, i:j].mean(dim=(1,2))[:, 2].mean(dim=0))
print(data_cqr[:, classes, i:j].mean(dim=(1,2))[:, 2].mean(dim=0))

In [178]:
cov_std = data_std[:, classes, i:j].nanmean(dim=(1,2))[:, 5].numpy()
cov_s_std = data_std[:, classes, i:j].nanmean(dim=(1,2))[:, 6].numpy()
cov_m_std = data_std[:, classes, i:j].nanmean(dim=(1,2))[:, 7].numpy()
cov_l_std = data_std[:, classes, i:j].nanmean(dim=(1,2))[:, 8].numpy()
mpiw_std = data_std[:, classes, i:j].nanmean(dim=(1,2))[:, 2].numpy()

cov_ens = data_ens[:, classes, i:j].nanmean(dim=(1,2))[:, 5].numpy()
cov_s_ens = data_ens[:, classes, i:j].nanmean(dim=(1,2))[:, 6].numpy()
cov_m_ens = data_ens[:, classes, i:j].nanmean(dim=(1,2))[:, 7].numpy()
cov_l_ens = data_ens[:, classes, i:j].nanmean(dim=(1,2))[:, 8].numpy()
mpiw_ens = data_ens[:, classes, i:j].nanmean(dim=(1,2))[:, 2].numpy()

cov_cqr = data_cqr[:, classes, i:j].nanmean(dim=(1,2))[:, 5].numpy()
cov_s_cqr = data_cqr[:, classes, i:j].nanmean(dim=(1,2))[:, 6].numpy()
cov_m_cqr = data_cqr[:, classes, i:j].nanmean(dim=(1,2))[:, 7].numpy()
cov_l_cqr = data_cqr[:, classes, i:j].nanmean(dim=(1,2))[:, 8].numpy()
mpiw_cqr = data_cqr[:, classes, i:j].nanmean(dim=(1,2))[:, 2].numpy()

# replace nan values
if dataset == "coco_val":
    cov_s_ens = np.nan_to_num(cov_s_ens, nan=np.nanmedian(cov_s_ens))
if dataset == "cityscapes":
    cov_s_std = np.nan_to_num(cov_s_std, nan=np.nanmedian(cov_s_std))
    cov_s_ens = np.nan_to_num(cov_s_ens, nan=np.nanmedian(cov_s_ens))
    cov_s_cqr = np.nan_to_num(cov_s_cqr, nan=np.nanmedian(cov_s_cqr))
if dataset == "bdd100k_train":
    cov_s_ens = np.nan_to_num(cov_s_ens, nan=np.nanmedian(cov_s_ens))

In [None]:
fig, ax = plt.subplots(figsize=(5, 3))

ax.axhline(y=0.9, color="black", linestyle="--", label=r'Target coverage (1 - $\alpha_B$)')

col = ["#E63946", "#219EBC", "#023047", "#BC4749"]
# colors = {conf_str[0]: "#BC4749", conf_str[1]: "#A7C957", conf_str[2]: "#386641"}
xt = [1, 2, 3, 4]

data = [cov_std, cov_s_std, cov_m_std, cov_l_std]
means = [d.mean() for d in data]
violin = ax.violinplot(data, showextrema=False, widths=0.3, points=1000)

for i, body in enumerate(violin["bodies"]):
    body.set_facecolor(col[i])
    body.set_edgecolor("black")
    body.set_alpha(0.8)
    body.set_linewidth(1)
    
    # horizontal mean lines
    path = body.get_paths()[0].to_polygons()[0]
    ax.plot([min(path[:,0])+0.01, max(path[:,0])-0.01], [means[i], means[i]], color="black", linestyle="-", linewidth=1)
    
ax.set_title("Violin plot")

ax.set_ylabel("Coverage")
ax.set_ylim(0.64, 1.01)
ax.set_yticks(np.arange(0.65, 1.01, 0.05))

ax.set_xlabel("Method")
ax.set_xticks(xt)
ax.set_xticklabels(["$Cov$", "$Cov_S$", "$Cov_M$", "$Cov_L$"])
ax.set_xlim(0.5, 4.5)

ax.legend(loc="upper left")

plt.show()

In [179]:
col = ["#E63946", "#219EBC", "#023047", "#A7C957"]
# colors = {conf_str[0]: "#BC4749", conf_str[1]: "#A7C957", conf_str[2]: "#386641"}

y_lims = {"coco_val": (0.64, 1.02), "cityscapes": (0.64, 1.02), "bdd100k_train": (0.68, 1.02)}
y_lims_ticks = {"coco_val": [0.7, 0.8, 0.9, 1.0], "cityscapes": [0.7, 0.8, 0.9, 1.0], "bdd100k_train": [0.7, 0.8, 0.9, 1.0]}
# leg_loc = {"coco_val": (0.55, 0.04)}

In [None]:
fig, ax = plt.subplots(figsize=(5.5, 1.45))

ax.axhline(y=0.9, color="black", linestyle="--", label=r'Target coverage (1 - $\alpha_B$)')
liml, limh = emp_cov_lim[dataset]
# ax.axhline(y=liml, color="grey", linestyle="-", label=r'Coverage distr. $Q_{0.01}$')
# ax.axhline(y=limh, color="grey", linestyle="-", label=r'Coverage distr. $Q_{0.99}$')
ax.axhspan(liml, limh, alpha=0.3, color="grey", label=r'Coverage distribution')

data = [cov_std, cov_s_std, cov_m_std, cov_l_std, cov_ens, cov_s_ens, cov_m_ens, cov_l_ens, cov_cqr, cov_s_cqr, cov_m_cqr, cov_l_cqr]
means = [d.mean() for d in data]
offsets = [1, 2, 3, 4, 7, 8, 9, 10, 13, 14, 15, 16]
violin = ax.violinplot(data, showextrema=False, widths=0.5, points=1000, positions=offsets)

for i, body in enumerate(violin["bodies"]):
    body.set_facecolor(col[i % len(col)])
    body.set_edgecolor("black")
    body.set_alpha(0.8)
    body.set_linewidth(1)
    
    # horizontal mean lines
    path = body.get_paths()[0].to_polygons()[0]
    ax.plot([min(path[:,0])+0.01, max(path[:,0])-0.01], [means[i], means[i]], color="black", linestyle="-", linewidth=1)

ax.set_ylabel("Coverage", fontsize=12)
ax.set_ylim(y_lims[dataset])
ax.set_yticks(y_lims_ticks[dataset])

major_ticks = [2.5, 8.5, 14.5]
major_labels = ["Box-Std", "Box-Ens", "Box-CQR"]

minor_ticks = [1, 2, 3, 4, 7, 8, 9, 10, 13, 14, 15, 16]
# minor_labels = ["$Cov$", "$Cov_S$", "$Cov_M$", "$Cov_L$"] * 3
minor_labels = ["All", "Small", "Med.", "Large"] * 3

ax.xaxis.set_major_locator(FixedLocator(major_ticks))
ax.xaxis.set_major_formatter(FixedFormatter(major_labels))
ax.xaxis.set_minor_locator(FixedLocator(minor_ticks))
ax.xaxis.set_minor_formatter(FixedFormatter(minor_labels))
ax.xaxis.grid(False, which="major")
# ax.xaxis.grid(True, which='minor')
ax.set_xlim(0.5, 16.5)
ax.tick_params(axis="x", which="major", length=0, pad=20, labelsize=12)
ax.tick_params(axis="x", which="minor", labelsize=8)
ax.tick_params(axis="y", which="major", labelsize=8)

# ax.legend(loc=(0.55,0.04), fontsize=8)

# save_fig(f"{plot_dir}/{dataset}_cov_violin")
plt.show()

MPIW plot

In [None]:
col = ["#E63946", "#219EBC", "#023047"]

fig, ax = plt.subplots(figsize=(2.5, 1.3))

data = [mpiw_std, mpiw_ens, mpiw_cqr]
means = [d.mean() for d in data]
violin = ax.violinplot(data, showextrema=False, widths=0.5, points=1000)

for i, body in enumerate(violin["bodies"]):
    body.set_facecolor(col[0])
    body.set_edgecolor("black")
    body.set_alpha(0.8)
    body.set_linewidth(1)
    
    # horizontal mean lines
    path = body.get_paths()[0].to_polygons()[0]
    ax.plot([min(path[:,0])+0.01, max(path[:,0])-0.01], [means[i], means[i]], color="black", linestyle="-", linewidth=1)

ax.set_ylabel("MPIW", fontsize=12)
# ax.set_ylim(y_lims[dataset])
# ax.set_yticks(y_lims_ticks[dataset])

major_ticks = [1,2,3]
major_labels = ["Box-Std", "Box-Ens", "Box-CQR"]
ax.xaxis.set_major_locator(FixedLocator(major_ticks))
ax.xaxis.set_major_formatter(FixedFormatter(major_labels))

ax.xaxis.grid(False)
ax.tick_params(axis="x", which="major", labelsize=8)
ax.tick_params(axis="y", which="major", labelsize=8)

# save_fig(f"{plot_dir}/{dataset}_mpiw_violin")
plt.show()

Coverage across classes

In [114]:
i, j = 0, 4
class_names = list(util.get_selected_coco_classes().keys())

In [115]:
cov_std = data_std[:, classes, i:j].nanmean(dim=2)[:, :, 5].numpy()
cov_ens = data_ens[:, classes, i:j].nanmean(dim=2)[:, :, 5].numpy()
cov_cqr = data_cqr[:, classes, i:j].nanmean(dim=2)[:, :, 5].numpy()

In [122]:
col = ["#E63946", "#219EBC", "#023047", "#A7C957", "#386641", "#E63946", "#219EBC"]
# colors = {conf_str[0]: "#BC4749", conf_str[1]: "#A7C957", conf_str[2]: "#386641"}

y_lims = {"coco_val": (0.64, 1.02), "cityscapes": (0.64, 1.02), "bdd100k_train": (0.78, 1.02)}
y_lims_ticks = {"coco_val": [0.7, 0.8, 0.9, 1.0], "cityscapes": [0.7, 0.8, 0.9, 1.0], "bdd100k_train": [0.8, 0.9, 1.0]}
# leg_loc = {"coco_val": (0.55, 0.04)}

In [None]:
fig, ax = plt.subplots(figsize=(8, 1.7))

ax.axhline(y=0.9, color="black", linestyle="--", label=r'Target coverage (1 - $\alpha_B$)')
liml, limh = emp_cov_lim[dataset]
# ax.axhline(y=liml, color="grey", linestyle="-", label=r'Coverage distr. $Q_{0.01}$')
# ax.axhline(y=limh, color="grey", linestyle="-", label=r'Coverage distr. $Q_{0.99}$')
ax.axhspan(liml, limh, alpha=0.3, color="grey", label=r'Coverage distribution')

data = [cov_std.mean(axis=1), cov_std[:,0], cov_std[:,1], cov_std[:,3], cov_std[:,2], cov_std[:,4], cov_std[:,5], 
        cov_ens.mean(axis=1), cov_ens[:,0], cov_ens[:,1], cov_ens[:,3], cov_ens[:,2], cov_ens[:,4], cov_ens[:,5],
        cov_cqr.mean(axis=1), cov_cqr[:,0], cov_cqr[:,1], cov_cqr[:,3], cov_cqr[:,2], cov_cqr[:,4], cov_cqr[:,5]]
means = [d.mean() for d in data]
offsets = [1, 2, 3, 4, 5, 6, 7, 10, 11, 12, 13, 14, 15, 16, 19, 20, 21, 22, 23, 24, 25]
violin = ax.violinplot(data, showextrema=False, widths=0.5, points=1000, positions=offsets)

for i, body in enumerate(violin["bodies"]):
    body.set_facecolor(col[i % len(col)])
    body.set_edgecolor("black")
    body.set_alpha(0.8)
    body.set_linewidth(1)
    
    # horizontal mean lines
    path = body.get_paths()[0].to_polygons()[0]
    ax.plot([min(path[:,0])+0.01, max(path[:,0])-0.01], [means[i], means[i]], color="black", linestyle="-", linewidth=1)

ax.set_ylabel("Coverage", fontsize=12)
ax.set_ylim(y_lims[dataset])
ax.set_yticks(y_lims_ticks[dataset])

major_ticks = [4.1, 13.1, 22.1]
major_labels = ["Box-Std", "Box-Ens", "Box-CQR"]

minor_ticks = [1, 2, 3, 4, 5, 6, 7, 10, 11, 12, 13, 14, 15, 16, 19, 20, 21, 22, 23, 24, 25]
# minor_labels = ["$Cov$", "$Cov_S$", "$Cov_M$", "$Cov_L$"] * 3
minor_labels = ["All", "Person", "Bicycle", "Motorcycle", "Car", "Bus", "Truck"] * 3

ax.xaxis.set_major_locator(FixedLocator(major_ticks))
ax.xaxis.set_major_formatter(FixedFormatter(major_labels))
ax.xaxis.set_minor_locator(FixedLocator(minor_ticks))
ax.xaxis.set_minor_formatter(FixedFormatter(minor_labels))
ax.xaxis.grid(False, which="major")
# ax.xaxis.grid(True, which='minor')
ax.set_xlim(0.5, 25.5)
ax.tick_params(axis="x", which="major", length=0, pad=38, labelsize=12)
ax.tick_params(axis="x", which="minor", labelsize=8)
ax.tick_params(axis="y", which="major", labelsize=8)

for label in ax.xaxis.get_minorticklabels():
    label.set_rotation(40)
    label.set_horizontalalignment('right')

# ax.legend(loc=(0.55,0.04), fontsize=8)

# save_fig(f"{plot_dir}/{dataset}_cov_class_violin")
plt.show()

#### Plot: caption lines

In [None]:
fig, ax = plt.subplots(figsize=(0.3, 0.1))
ax.plot([0,1], [0.5,0.5], color='black', ls='--', lw=1)
ax.axis('off')
# save_fig(f"{plot_dir}/caption_line1")

In [None]:
fig, ax = plt.subplots(figsize=(0.3, 0.1))
ax.plot([0,1], [0.5,0.5], color='grey', ls='-', lw=5, alpha=0.7)
ax.axis('off')
# save_fig(f"{plot_dir}/caption_line2")

#### Plot: Ours against baselines (one-sided)

In [67]:
label_method = "Oracle" # ClassThr
box_metric = "mpiw"
baseline_idx = 4 # rows 0-3 are the baseline methods

In [68]:
coco_base = pd.read_csv("results/results_selected_base/coco_val_res_base_box_set_table.csv")
city_base = pd.read_csv("results/results_selected_base/cityscapes_res_base_box_set_table.csv")
bdd_base = pd.read_csv("results/results_selected_base/bdd100k_train_res_base_box_set_table.csv")

In [69]:
coco_base = coco_base.loc[coco_base["label"].isin([label_method]), ["conf", "label", box_metric]]
city_base = city_base.loc[city_base["label"].isin([label_method]), ["conf", "label", box_metric]]
bdd_base = bdd_base.loc[bdd_base["label"].isin([label_method]), ["conf", "label", box_metric]]

coco_dat = coco_base[box_metric].to_numpy()
city_dat = city_base[box_metric].to_numpy()
bdd_dat = bdd_base[box_metric].to_numpy()

In [None]:
coco_base, coco_dat

In [None]:
col = ["#E63946", "#023047"]
datasets = ["COCO", "Cityscapes", "BDD100k"]

fig, ax = plt.subplots(figsize=(4, 1))
bar_width = 0.35
index = np.arange(len(datasets))

for i, dat in enumerate([coco_dat, city_dat, bdd_dat]):
    best_base = dat[:baseline_idx].min()
    best_ours = dat[baseline_idx:].min()
    
    bar_base = ax.bar(i, best_base, bar_width, label='And\u00E9ol et al.', alpha=0.8, color=col[0])
    bar_ours = ax.bar(i + bar_width, best_ours, bar_width, label='Ours', alpha=0.8, color=col[1])
    
handles, labels = ax.get_legend_handles_labels()
# Remove duplicate legend entries
by_label = dict(zip(labels, handles))
leg = ax.legend(by_label.values(), by_label.keys(), loc=(1.03, 0.15), fontsize=8)
leg.get_frame().set_facecolor("white")
leg.get_frame().set_edgecolor("black")
leg.get_frame().set_alpha(1.0)

ax.set_ylabel("MPIW ($\downarrow$)", fontsize=12)
ax.set_yticks([25, 50, 75, 100])
ax.set_ylim(60, 110)
ax.set_xticks(index + bar_width/2)
ax.set_xticklabels(datasets)
ax.tick_params(axis='x', which='both', bottom=False, top=False)

ax.tick_params(axis="x", which="major", labelsize=8)
ax.tick_params(axis="y", which="major", labelsize=8)
ax.xaxis.grid(False)
ax.yaxis.grid(False)
plt.tight_layout()

# save_fig("plots/base_comp_small")

plt.show()

For all comparison methods in appendix

In [None]:
datasets = ["COCO", "Cityscapes", "BDD100k"]
methods = ["AddBonf", "MultBonf", "AddMax", "MultMax", "Box-Std", "Box-Ens", "Box-Mult"]
num_methods = len(methods)
col = ["#E63946", "#E63946", "#E63946", "#E63946", "#023047", "#023047", "#023047"]

fig, ax = plt.subplots(figsize=(7.5, 1.8))
bar_width = 0.2  # Adjusted bar width
dataset_spacing = 0.5  # Space between datasets
start_pos = 0.5  # Starting position for the first bar

# Create a list for minor ticks and labels
minor_ticks = []
minor_labels = []

for i, dat in enumerate([coco_dat, city_dat, bdd_dat]):
    for j in range(num_methods):
        # Calculate the position for each bar
        pos = start_pos + i * dataset_spacing + j * bar_width
        # Example data access, modify as per your data structure
        data_value = dat[j]
        ax.bar(pos, data_value, bar_width, alpha=0.8, color=col[j], edgecolor="black", linewidth=0.5)
        # Add minor tick and label
        minor_ticks.append(pos)
        if i == 0:  # Add labels only for the first dataset
            minor_labels.append(methods[j])
    start_pos += bar_width * num_methods  # Update start position for the next dataset

ax.set_ylabel("MPIW ($\downarrow$)", fontsize=12)
ax.set_yticks([25, 50, 75, 100, 125])
ax.set_ylim(60, 123)

# Set major ticks for datasets
major_ticks = np.array([1.15, 3.05, 4.95])
ax.set_xticks(major_ticks)
ax.set_xticklabels(datasets, )

# Set minor ticks for methods
ax.set_xticks(minor_ticks, minor=True)
ax.set_xticklabels(minor_labels * len(datasets), minor=True, fontsize=8, rotation=45)

ax.tick_params(axis="x", which="major", labelsize=12, pad=38)
ax.tick_params(axis="y", which="major", labelsize=8)
ax.tick_params(axis='x', which='major', bottom=False, top=False)
ax.xaxis.grid(False)
ax.yaxis.grid(True)

plt.tight_layout()

# Uncomment the following line to save the figure
# plt.savefig("plots/base_comp_mpiw.png")

plt.show()


In [None]:
datasets = ["COCO", "Cityscapes", "BDD100k"]
methods = ["AddBonf", "MultBonf", "AddMax", "MultMax", "Box-Std", "Box-Ens", "Box-Mult"]
num_methods = len(methods)
col = ["#E63946", "#E63946", "#E63946", "#E63946", "#023047", "#023047", "#023047"]

fig, ax = plt.subplots(figsize=(7.5, 1.8))
bar_width = 0.2  # Adjusted bar width
dataset_spacing = 0.5  # Space between datasets
start_pos = 0.5  # Starting position for the first bar

# Create a list for minor ticks and labels
minor_ticks = []
minor_labels = []

for i, dat in enumerate([coco_dat, city_dat, bdd_dat]):
    for j in range(num_methods):
        # Calculate the position for each bar
        pos = start_pos + i * dataset_spacing + j * bar_width
        # Example data access, modify as per your data structure
        data_value = dat[j]
        ax.bar(pos, data_value, bar_width, alpha=0.8, color=col[j], edgecolor="black", linewidth=0.5)
        # Add minor tick and label
        minor_ticks.append(pos)
        if i == 0:  # Add labels only for the first dataset
            minor_labels.append(methods[j])
    start_pos += bar_width * num_methods  # Update start position for the next dataset

ax.set_ylabel("Stretch ($\downarrow$)", fontsize=12)
ax.set_yticks([1.5, 2])
ax.set_ylim(1.2, 2.25)

# Set major ticks for datasets
major_ticks = np.array([1.15, 3.05, 4.95])
ax.set_xticks(major_ticks)
ax.set_xticklabels(datasets, )

# Set minor ticks for methods
ax.set_xticks(minor_ticks, minor=True)
ax.set_xticklabels(minor_labels * len(datasets), minor=True, fontsize=8, rotation=45)

ax.tick_params(axis="x", which="major", labelsize=12, pad=38)
ax.tick_params(axis="y", which="major", labelsize=8)
ax.tick_params(axis='x', which='major', bottom=False, top=False)
ax.xaxis.grid(False)
ax.yaxis.grid(True)

plt.tight_layout()

# Uncomment the following line to save the figure
# plt.savefig("plots/base_comp_stretch.png")

plt.show()


#### Plot: Ablation coverage levels 

In [213]:
# datasets
dataset = "bdd100k_train" #["coco_val", "cityscapes", "bdd100k_train"]
model = "std_conf_x101fpn_std_rank_class"
score = "abs_res"

res_folder = f"/media/atimans/hdd/output_abl/{dataset}"
plot_folder = "plots/results_ablation"
Path(plot_folder).mkdir(exist_ok=True, parents=True)

In [214]:
box_cov = [0.85, 0.90, 0.95]
label_cov = [0.8, 0.9, 0.99, 1.0]

box_cov_str = [f"{int(cov*100)}" for cov in box_cov]
label_cov_str = [f"{int(cov*100)}" for cov in label_cov]

cov_combos = list(itertools.product(box_cov, label_cov))
cov_combos_str = list(itertools.product(box_cov_str, label_cov_str))

In [None]:
label_paths = []
box_paths = []

for bc, lc in cov_combos_str:
    label_paths.append(f"{res_folder}/{model}_{bc}_{lc}/{model}_{bc}_{lc}_label_table.csv")
    box_paths.append(f"{res_folder}/{model}_{bc}_{lc}/{model}_{bc}_{lc}_box_set_table_{score}.csv")

print("Loading results from:", res_folder, "\n", "Plotting figures to:", plot_folder)
print("Label result files:", label_paths, "\n", "Box result files:", box_paths)

label_path_def = f"{res_folder}/{model}/{model}_label_table.csv"
box_path_def = f"{res_folder}/{model}/{model}_box_set_table_{score}.csv"

In [None]:
box_paths, label_paths

In [217]:
lcov = []
leff = []

bcov = []
beff = []

row = 4 # class_selected

In [218]:
for i, p in enumerate(label_paths):
    if i==6: #90, 99 
        df = pd.read_csv(label_path_def)
    else:
        df = pd.read_csv(p)
    
    lcov.append(df["cov set"].iloc[row].item())
    leff.append(df["mean set size"].iloc[row].item())

In [219]:
for i, p in enumerate(box_paths):
    if i==6: #90, 99 
        df = pd.read_csv(box_path_def)
    else:
        df = pd.read_csv(p)
    
    bcov.append(df["cov box"].iloc[row].item())
    beff.append(df["mpiw"].iloc[row].item())

In [None]:
print(lcov, leff, bcov, beff)

In [221]:
colors = {"0.8":"#E63946", "0.9":"#219EBC", "0.99":"#023047", "1.0":"#A7C957"}
markers = {"0.85":"o", "0.9":"*", "0.95":"^"}

In [None]:
fig, ax = plt.subplots(figsize=(2, 2))

for i, (bc, lc) in enumerate(cov_combos):
    ax.scatter(lcov[i], bcov[i], color=colors[str(lc)], marker=markers[str(bc)], alpha=0.8, label=fr"$(1-\alpha_B)=${bc}, $(1-\alpha_L)$={lc}", linewidth=1, s=48)

for xtick in label_cov:
    ax.axvline(x=xtick, color='black', linestyle=':', linewidth=1, alpha=0.6)

for ytick in box_cov:
    ax.axhline(y=ytick, color='black', linestyle=':', linewidth=1, alpha=0.6)

ax.set_ylabel(r'Box cov.', fontsize=10)
ax.set_xlabel(r'Label cov.', fontsize=10)

ax.set_xticks([0.8, 0.9, 1.0])
ax.set_yticks(box_cov)

ax.yaxis.grid(False, which="major")
ax.xaxis.grid(False, which="major")

ax.set_xlim(0.75, 1.05)
ax.set_ylim(0.8, 1.0)

# leg = ax.legend(ncol=3, loc=(1.03, 0.15), fontsize=8)
# leg.get_frame().set_facecolor("white")
# leg.get_frame().set_edgecolor("black")
# leg.get_frame().set_alpha(1.0)

plt.xticks(fontsize=8)
plt.yticks(fontsize=8)
plt.tight_layout()

# plt.savefig(f"{plot_folder}/{dataset}_{score}_covs.png")


In [None]:
fig, ax = plt.subplots(figsize=(2, 2))

for i, (bc, lc) in enumerate(cov_combos):
    ax.scatter(leff[i], beff[i], color=colors[str(lc)], marker=markers[str(bc)], alpha=0.8, label=fr"$(1-\alpha_B)=${bc}, $(1-\alpha_L)$={lc}", linewidth=1, s=48)

# for xtick in label_cov:
#     ax.axvline(x=xtick, color='black', linestyle=':', linewidth=1, alpha=0.6)

# for ytick in box_cov:
#     ax.axhline(y=ytick, color='black', linestyle=':', linewidth=1, alpha=0.6)

ax.set_ylabel(r'MPIW', fontsize=10)
ax.set_xlabel(r'Mean set size', fontsize=10)

# ax.set_xticks([1, 2, 3, 4])
# ax.set_yticks([])

ax.yaxis.grid(True, which="major")
ax.xaxis.grid(True, which="major")

ax.set_xlim(0.5, 7.5)
ax.set_ylim(65, 215)

# leg = ax.legend(ncol=3, loc=(1.03, 0.15), fontsize=8)
# leg.get_frame().set_facecolor("white")
# leg.get_frame().set_edgecolor("black")
# leg.get_frame().set_alpha(1.0)

plt.xticks(fontsize=8)
plt.yticks(fontsize=8)
plt.tight_layout()

# plt.savefig(f"{plot_folder}/{dataset}_{score}_eff.png")


#### Plot: Set size and MPIW vs. misclassification

In [44]:
# datasets
dataset = "coco_val" #["coco_val", "cityscapes", "bdd100k_train"]

res_folder = f"../../../../media/atimans/hdd/output/{dataset}"
plot_folder = "plots"
Path(plot_folder).mkdir(exist_ok=True, parents=True)

In [64]:
methods = [("std", "abs"), ("ens", "norm"), ("cqr", "quant")]

In [None]:
label_paths = []
box_paths = []

for method, score in methods:
    label_paths.append(f"{res_folder}/{method}_conf_x101fpn_{method}_rank_class/{method}_conf_x101fpn_{method}_rank_class_label_table.csv")
    box_paths.append(f"{res_folder}/{method}_conf_x101fpn_{method}_rank_class/{method}_conf_x101fpn_{method}_rank_class_box_set_table_{score}_res.csv")

print(label_paths, "\n", box_paths)

In [66]:
lcov_cl, lcov_miscl = [], []
leff_cl, leff_miscl = [], []

bcov_cl, bcov_miscl = [], []
beff_cl, beff_miscl = [], []

row = 4 # class_selected

In [67]:
for p in label_paths:
    df = pd.read_csv(p)
    lcov_cl.append(df["cov set cl"].iloc[row].item())
    lcov_miscl.append(df["cov set miscl"].iloc[row].item())
    leff_cl.append(df["mean set size cl"].iloc[row].item())
    leff_miscl.append(df["mean set size miscl"].iloc[row].item())

In [68]:
for p in box_paths:
    df = pd.read_csv(p)
    bcov_cl.append(df["cov box cl"].iloc[row].item())
    bcov_miscl.append(df["cov box miscl"].iloc[row].item())
    beff_cl.append(df["mpiw cl"].iloc[row].item())
    beff_miscl.append(df["mpiw miscl"].iloc[row].item())

In [None]:
print("Label cov cl:", lcov_cl, "\n", "Label cov miscl:", lcov_miscl, "\n", "Label eff cl:", leff_cl, "\n", "Label eff miscl:", leff_miscl)
print("Box cov cl:", bcov_cl, "\n", "Box cov miscl:", bcov_miscl, "\n", "Box eff cl:", beff_cl, "\n", "Box eff miscl:", beff_miscl)

In [None]:
colors = {"Classif.":"#023047", "Misclassif.":"#E63946"}
markers = {"Box-Std":"o", "Box-Ens":"*", "Box-CQR":"^"}

fig, ax = plt.subplots(figsize=(2, 2))

for i, m in enumerate(markers.keys()):
    ax.scatter(lcov_cl[i], bcov_cl[i], color=colors["Classif."], marker=markers[m], alpha=0.8, label=m, linewidth=1, s=48)
    ax.scatter(lcov_miscl[i], bcov_miscl[i], color=colors["Misclassif."], marker=markers[m], alpha=0.8, linewidth=1, s=48)

ax.set_ylabel(r'Box cov.', fontsize=10)
ax.set_xlabel(r'Label cov.', fontsize=10)

# ax.set_xticks([0.8, 0.9, 1.0])
# ax.set_yticks([0.8, 0.9, 1.0])

# ax.yaxis.grid(False, which="major")
# ax.xaxis.grid(False, which="major")
ax.set_ylim(0.92, 0.96)
ax.set_xlim(0.98, 1.01)

# leg = ax.legend(ncol=3, loc=(1.03, 0.15), fontsize=8)
# leg.get_frame().set_facecolor("white")
# leg.get_frame().set_edgecolor("black")
# leg.get_frame().set_alpha(1.0)
ax.legend()

plt.xticks(fontsize=8)
plt.yticks(fontsize=8)
plt.tight_layout()

# plt.savefig(f"{plot_folder}/{dataset}_{score}_covs.png")


In [None]:
import matplotlib.lines as mlines

colors = {"Classif.":"#023047", "Misclassif.":"#E63946"}
markers = {"Box-Std":"o", "Box-Ens":"*", "Box-CQR":"^"}

fig, ax = plt.subplots(figsize=(1.8, 1.5))

# Scatter plots for actual data
for i, m in enumerate(markers.keys()):
    ax.scatter(leff_cl[i], beff_cl[i], color=colors["Classif."], marker=markers[m], alpha=0.8, linewidth=1, s=48)
    ax.scatter(leff_miscl[i], beff_miscl[i], color=colors["Misclassif."], marker=markers[m], alpha=0.8, linewidth=1, s=48)

# Setting labels and limits
ax.set_ylabel(r'MPIW', fontsize=8, labelpad=-3)
ax.set_xlabel(r'Mean set size', fontsize=8, labelpad=0)
ax.set_ylim(78, 104)
ax.set_xlim(1.9, 3.3)

# Create custom handles for the marker type legend (all grey)
marker_handles = [mlines.Line2D([], [], color='grey', marker=markers[m], linestyle='None', markersize=6, label=m) for m in markers]

# Add the marker type legend to the plot
leg_markers = ax.legend(handles=marker_handles, loc='upper right', fontsize=6)

# Create handles for the color legend
classif_handle = mlines.Line2D([], [], color=colors["Classif."], marker='s', linestyle='None', markersize=5, label='Classif.')
misclassif_handle = mlines.Line2D([], [], color=colors["Misclassif."], marker='s', linestyle='None', markersize=5, label='Misclassif.')

# Add the color legend to the plot
ax.legend(handles=[classif_handle, misclassif_handle], loc='lower left', fontsize=6)

# Manually add the first legend back to the plot
ax.add_artist(leg_markers)

plt.xticks(fontsize=6)
plt.yticks(fontsize=6)
plt.tight_layout()

# plt.savefig(f"{plot_folder}/{dataset}_size_vs_misclassif.png")