AP, mAP

In [1]:
import datasets

full_data = datasets.load_from_disk("data/")



Loading dataset from disk:   0%|          | 0/48 [00:00<?, ?it/s]

In [208]:
full_data["train"][0]

{'pixel_values': tensor([[[ 0.5878,  0.6049,  0.6392,  ...,  0.5193,  0.3138,  0.1939],
          [ 0.5536,  0.5878,  0.6392,  ...,  0.4679,  0.3481,  0.3309],
          [ 0.6563,  0.6906,  0.7419,  ...,  0.4508,  0.3994,  0.3823],
          ...,
          [-1.4158, -1.1418, -1.0048,  ...,  0.8447,  0.8104,  0.7933],
          [-0.9020, -0.9020, -1.1075,  ...,  0.7762,  0.7591,  0.7419],
          [-0.8335, -0.9363, -1.0904,  ...,  0.5536,  0.4851,  0.4851]],
 
         [[-0.6176, -0.6001, -0.5651,  ..., -0.8803, -1.0903, -1.2129],
          [-0.6527, -0.6176, -0.5651,  ..., -0.8978, -1.0203, -1.0553],
          [-0.5476, -0.5126, -0.4601,  ..., -0.8627, -0.9328, -0.9503],
          ...,
          [-1.3004, -1.0203, -0.8803,  ...,  1.7458,  1.7108,  1.6933],
          [-0.7752, -0.7752, -0.9853,  ...,  1.6758,  1.6583,  1.6408],
          [-0.7052, -0.8102, -0.9678,  ...,  1.4482,  1.3782,  1.3782]],
 
         [[ 0.3742,  0.3916,  0.4265,  ...,  0.2173,  0.0082, -0.1138],
          [ 

In [53]:
import torch
import transformers

MODEL = transformers.YolosForObjectDetection.from_pretrained('checkpoint-44000', local_files_only=True) # from checkpoint
PROCESSOR = transformers.YolosFeatureExtractor.from_pretrained('hustvl/yolos-tiny')


def detect_boxes(image, threshold=0.9):
    inputs = PROCESSOR(image, return_tensors="pt", size={"height": 800, "width": 800})
    outputs = MODEL(**inputs)

    target_sizes = torch.tensor([image.size[::-1]])
    results = PROCESSOR.post_process_object_detection(outputs, threshold=threshold, target_sizes=target_sizes)[0]
    return results



In [207]:
import os
import numpy as np
from tqdm import tqdm

DATASET = full_data["test"]

IMAGE_PATH='metrics_data/images' # path to which we save images from the dataset
ANNOTATIONS_PATH='metrics_data/annots' # path to which we save ground truths from the dataset
DETECTION_PATH='metrics_data/dcs' # path to which we save predictions 

for path in [IMAGE_PATH, ANNOTATIONS_PATH, DETECTION_PATH]:
    if not os.path.exists(path):
        os.mkdir(path)

for i, data in enumerate(tqdm(DATASET)):
    image = data['image']
    image.save(os.path.join(IMAGE_PATH, f"{i}.jpg")) # saving image
    results = detect_boxes(image) # model inference
    annotations = data['label_ids'] 
    with open(os.path.join(ANNOTATIONS_PATH, f"{i}.txt"), "w") as f:
        for box, label in zip(annotations['boxes'], annotations['class_labels']): # saving ground truths
            f.write(f"{label} {box[0]} {box[1]} {box[2]} {box[3]}\n")
    
    idxes = np.argsort(results['scores'].detach().numpy())[::-1] # indexes for sorting by prediction score
    
    with open(os.path.join(DETECTION_PATH, f"{i}.txt"), "w") as f:
        for score, label, box in zip(results["scores"].detach().numpy()[idxes],\
                                     results["labels"].detach().numpy()[idxes],\ 
                                     results["boxes"].detach().numpy()[idxes]):
            f.write(f"{label} {score} {int(box[0])} {int(box[1])} {int(box[2])} {int(box[3])}\n") # saving predictions

100%|████████████████████████████████████████████████████████████████████████████████| 334/334 [16:46<00:00,  3.01s/it]


Software used for computing metrics: [Object detection metrics](https://github.com/rafaelpadilla/review_object_detection_metrics)