Define Dataset Class

In [1]:
from torchvision import datasets, tv_tensors
from pathlib import Path
from PIL import Image
import json
import torch

class FMOWDataset(datasets.VisionDataset):
    def __init__(self, root, label_df, transforms=None):
        root = Path(root)
        super().__init__(root=root, transforms=transforms)
        self.label_df = label_df
        self.categories = sorted(self.label_df["category"].unique())
        self.class_ids = {cat:idx for idx,cat in enumerate(self.categories)}


    def __getitem__(self, idx):
        class_names = []
        boxes = []
        path = self.label_df.loc[idx, "img_path"]
        image = Image.open(f"{self.root}/{path}.jpg").convert("RGB")
        with open(f"{self.root}/{path}.json", "r") as f:
            metadata = json.load(f)
            for target in metadata["bounding_boxes"]:
                class_names.append(self.class_ids[target["category"]])
                box = target["box"]
                boxes.append([box[0], box[1], box[0]+box[2]-1, box[1]+box[3]-1])
        target = {
            "boxes": tv_tensors.BoundingBoxes(data=boxes, format="XYXY", canvas_size=image.size), # type: ignore
            "labels": torch.tensor(class_names)
        }
        if self.transforms is not None:
            image, target = self.transforms(image, target)
        return image, target

    def __len__(self):
        return len(self.label_df)

Define Transforms

In [2]:
from torchvision.transforms import v2, InterpolationMode
import torch
validation_transform = v2.Compose([
    v2.ToImage(),
    v2.SanitizeBoundingBoxes(),
    v2.ToDtype(torch.float32, scale=True)
])
stratified_transform = v2.Compose([
    v2.ToImage(),
    v2.SanitizeBoundingBoxes(),
    v2.ToDtype(torch.float32, scale=True)   
])
shifted_transform = v2.Compose([
    v2.ToImage(), 
    v2.SanitizeBoundingBoxes(), 
    v2.Resize(size=int(torch.randint(56, 448,(1,)))),
    v2.Resize(size=448,interpolation=InterpolationMode.NEAREST),
    v2.ToDtype(torch.float32, scale=True), 
])

Define helper class to compute model metrics

In [3]:
from sklearn.metrics import roc_auc_score, accuracy_score
from torchvision.ops import box_iou
import numpy as np

class ClassificationROCAUC:
    def __init__(self):
        return
    
    def __call__(self, pred_confs, targets):
        return roc_auc_score(targets, pred_confs)
    
class ObjectDetectorROCAUC:
    def __init__(self, iou_threshold):
        self.iou_threshold = iou_threshold

    def __call__(self, pred_batches, target_batches):
        num_tgts = 0
        correct = []
        scores = []
        for pred_batch, target_batch in zip(pred_batches, target_batches):
            for pred, target in zip(pred_batch, target_batch):
                pred["boxes"] = pred["boxes"].cpu()
                pred["labels"] = pred["labels"].cpu()
                pred["scores"] = pred["scores"].cpu()
                target["boxes"] = target["boxes"].cpu()
                target["labels"] = target["labels"].cpu()
                N = target["boxes"].shape[0]
                class_correct = pred["labels"][:, None] == target["labels"][None]
                pred_tgt_iou = box_iou(pred["boxes"], target["boxes"])
                scores.append(pred["scores"].repeat(N,1).T.flatten().numpy())
                num_tgts += N
                box_correct = pred_tgt_iou > self.iou_threshold
                correct.append((class_correct & box_correct).flatten().float().numpy())
        scores = np.concatenate(scores)
        correct = np.concatenate(correct)
        return roc_auc_score(correct, scores)

class ObjectDetectorAccuracy:
    def __init__(self, iou_threshold):
        self.iou_threshold = iou_threshold

    def __call__(self, pred_batches, target_batches):
        target_labels = []
        pred_labels = []
        """
        We want to feed the metric calculator the following:

        Multiclass:
        1 dummy prediction per missed detection (no prediction intersects target with high enough iou)
        1 prediction per true detection corresponding to class prediction with highest confidence
        1 dummy target for each false positive (a prediction that does not intersect any target with high enough iou)

        Multilabel:
        num_class dummy predictions per missed detection 
        num_class predictions per true detection
        1 dummy target per false positive
        """
        for pred_batch, target_batch in zip(pred_batches, target_batches):
            for pred, target in zip(pred_batch, target_batch):
                pred["boxes"] = pred["boxes"].cpu()
                pred["labels"] = pred["labels"].cpu()
                pred["scores"] = pred["scores"].cpu()
                target["boxes"] = target["boxes"].cpu()
                target["labels"] = target["labels"].cpu()
                M = pred["boxes"].shape[0]
                N = target["boxes"].shape[0]
                pred_tgt_iou = box_iou(pred["boxes"], target["boxes"]) # MxN
                abv_th = pred_tgt_iou > self.iou_threshold
                """ 
                Determine number of missed detections
                """
                missed_detections = ~abv_th.any(dim=0) # (1,N)
                md_targets = target["labels"][missed_detections] # (num_missed_detections,)
                md_preds = -1 * torch.ones_like(md_targets) # dummy predictions
                """
                Determine number of false positives
                """
                false_positives = ~abv_th.any(dim=1) # (M,1)
                fp_preds = pred["labels"][false_positives] # (num_false_positives,)
                fp_targets = -1 * torch.ones_like(fp_preds) # dummy targets
                """
                Multiclass implementation (one class prediction per box)
                """
                pred_grid = pred["labels"].repeat(N,1).T
                pred_grid = pred_grid[abv_th]
                pred_class = torch.cat([md_preds, pred_grid, fp_preds])
                tgt_grid = target["labels"].repeat(M,1)
                tgt_grid = tgt_grid[abv_th]
                tgt_class = torch.cat([md_targets, tgt_grid, fp_targets])
                pred_labels.append(pred_class.cpu().numpy())
                target_labels.append(tgt_class.cpu().numpy())
        pred_labels = np.concatenate(pred_labels)
        target_labels = np.concatenate(target_labels)
        return accuracy_score(target_labels, pred_labels)

Instantiate Model Class and find the weight files 

In [4]:
from torchvision import models
from pathlib import Path
nclasses = 10
model_class = models.get_model(name="fasterrcnn_resnet50_fpn", num_classes=nclasses, img_size_min=448, img_size_max=768)
rundir = Path("/home/jmcmillan/mlqp-phase-ii-base/stress_tests/runs/rcnn_v1")
weight_files = list(rundir.rglob("*.ckpt"))

Make Dataloaders

In [5]:
from torch.utils.data import DataLoader
import pandas as pd
import numpy as np

val_df:pd.DataFrame = pd.read_csv("/data/fmow-rgb-chipped/shifted_dfs/val_df.csv")
val_df = val_df.groupby("category").sample(frac=0.5).reset_index()
val_dataset = FMOWDataset(root="/data/fmow-rgb-chipped", label_df=val_df, transforms=validation_transform)
nw = 0
pers = nw > 0
val_loader = DataLoader(val_dataset, batch_size=32, pin_memory=True, num_workers=nw, collate_fn=lambda batch: tuple(zip(*batch)),persistent_workers=pers)
model_class.eval()
for vimg, vtgt in val_loader:
    o = model_class(vimg)
    break
test_df:pd.DataFrame = pd.read_csv("/data/fmow-rgb-chipped/shifted_dfs/test_df.csv")
test_df = test_df.groupby("category").sample(frac=0.5).reset_index()
test_dataset = FMOWDataset(root="/data/fmow-rgb-chipped", label_df=test_df, transforms=shifted_transform)
test_loader = DataLoader(test_dataset, batch_size=32, pin_memory=True, num_workers=nw, collate_fn=lambda batch: tuple(zip(*batch)),persistent_workers=pers)

Instantiate Metric Class and Metric Helper

In [6]:
import underspecification
from torchvision import models
from pathlib import Path
iou_threshold=0.3
acc_metric = ObjectDetectorAccuracy(iou_threshold=iou_threshold)
roc_metric = ObjectDetectorROCAUC(iou_threshold=iou_threshold)
metric_save = underspecification.MetricEvaluator(model_class,weight_files,val_loader, test_loader, roc_metric, device=0)
metric_save.calculate_metrics(metric_filename="shifted_rocauc.npy")

Evaluating Models: 100%|██████████| 20/20 [1:11:17<00:00, 213.88s/it]


Finally, instantiate StressTester Class and perform stress test.

In [8]:
import underspecification

val_metric_path = "/home/jmcmillan/mlqp-phase-ii-base/stress_tests/runs/rcnn_v1/val_shifted_rocauc.npy"
test_metric_path = "/home/jmcmillan/mlqp-phase-ii-base/stress_tests/runs/rcnn_v1/test_shifted_rocauc.npy"
figname = "spearman_shifted_rocauc_interval.png"
roc_stress_tester = underspecification.MetricStressTester(val_metric_path, test_metric_path, figure_savename=figname)
roc_stress_tester.calculate_underspecification()
roc_stress_tester.plot_confidence_interval(figname)