In [1]:
import itertools
import os

from sklearn.model_selection import KFold
from torch.utils.data import DataLoader
import torch

from detr_config import Config
from detr_dataset import collate_fn, get_train_dataset, get_test_dataset, detr_processor
from detr_model import DETRModel

from coco_eval import CocoEvaluator

The `max_size` parameter is deprecated and will be removed in v4.26. Please specify in `size['longest_edge'] instead`.


In [2]:
# Dataset

train_valid_dataset = get_train_dataset()

loading annotations into memory...
Done (t=0.01s)
creating index...
index created!


In [3]:
def convert_to_xywh(boxes):
    xmin, ymin, xmax, ymax = boxes.unbind(1)
    boxes = torch.stack((xmin, ymin, xmax - xmin, ymax - ymin), dim=1)
    boxes = boxes.tolist()
    return boxes

def prepare_for_coco_detection(predictions):
    coco_results = []
    for image_id, prediction in predictions.items():
        if len(prediction) == 0:
            continue

        scores = prediction["scores"].tolist()
        labels = prediction["labels"].tolist()
        boxes = prediction["boxes"]
        boxes =  convert_to_xywh(boxes)

        coco_results.extend(
            [
                {
                    "image_id": image_id,
                    "category_id": labels[k],
                    "bbox": box,
                    "score": scores[k],
                }
                for k, box in enumerate(boxes)
            ]
        )
    return coco_results

In [4]:
# HyperParameters

hyperparameters = itertools.product(*[
    Config.BACKBONES,
    Config.NUM_QUERIES,
    Config.D_MODEL,
    Config.TRANSFORMER_LAYERS,
])

hyperparameters = itertools.product(*[
    ['resnet50.a1_in1k'],
    [10],
    [64],
    [2],
])


# Hyperparameter Search

for backbone, num_queries, d_model, transformer_layers in hyperparameters:
    print('(Num Queries, Dim model, Enc-Dec Layers): ', 
            f'({num_queries}, {d_model}, {transformer_layers})' )

    # Training with K-fold Cross Validation 

    k_fold = KFold(n_splits=10, shuffle=True, random_state=123456)

    for fold, (train_idx, valid_idx) in enumerate(k_fold.split(train_valid_dataset)):        
        print(f"Fold {fold + 1}")
        
        checkpoints_dir = os.path.join(
            'lightning_logs',
            f'backbone={backbone.split(".")[0]}_queries={num_queries}_dmodel={d_model}_layers={transformer_layers}',
            f'fold_{fold+1}',
            'checkpoints'
        )
        checkpoint = [f for f in os.listdir(checkpoints_dir) if 'last' not in f][0]
        checkpoint_path = os.path.join(checkpoints_dir, checkpoint)
        model = DETRModel.load_from_checkpoint(checkpoint_path)
        
        test_dataset = get_test_dataset()
        evaluator = CocoEvaluator(coco_gt=test_dataset.coco, iou_types=["bbox"])
        
        test_loader = DataLoader(
            dataset = test_dataset,
            batch_size = 8,
            collate_fn=collate_fn
        )
        
        for batch in test_loader:
            labels = batch['labels']
            pixel_values = batch['pixel_values']
            outputs = model(pixel_values)
            predictions = detr_processor.post_process_object_detection(outputs, threshold=0.1)
            image_ids = [ label['image_id'].item() for label in labels]
            predictions = {image_id:output for image_id, output in zip(image_ids, predictions)}
            predictions = prepare_for_coco_detection(predictions)
            evaluator.update(predictions)
            
        evaluator.synchronize_between_processes()
        evaluator.accumulate()
        evaluator.summarize()


        break # Fold
    break # Hyperparameter

(Num Queries, Dim model, Enc-Dec Layers):  (10, 64, 2)
Fold 1


  rank_zero_warn(


loading annotations into memory...
Done (t=0.01s)
creating index...
index created!
Accumulating evaluation results...
DONE (t=0.01s).
IoU metric: bbox
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.000
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.000
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.000
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.000
 Average Recall     (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.000
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=