### Inference on a larger data set ###

In [1]:
import sys
import os
import numpy as np
import json
import pandas as pd
import logging
from pathlib import Path
from matplotlib import pyplot as plt
import albumentations as alb
import itertools

logger = logging.getLogger(__name__)

# PyTorch
import torch
from torch.utils.data import DataLoader
from torchvision import ops

# Hugging Face Library
from transformers import RTDetrV2ForObjectDetection, RTDetrImageProcessor

%load_ext autoreload
%autoreload 2
import computervision
from computervision.imageproc import is_image, ImageData, clipxywh, xyxy2xywh, xywh2xyxy
from computervision.imageproc import plot_boxes
from computervision.datasets import DETRdataset, get_gpu_info
from computervision.transformations import AugmentationTransform
from computervision.performance import DetectionMetrics
from computervision.inference import DETR

print(f'Project version: {computervision.__version__}')
print(f'Python version:  {sys.version}')

Project version: v0.0.2
Python version:  3.12.3 (main, Jun 18 2025, 17:59:45) [GCC 13.3.0]


In [2]:
# Check GPU availability
device, device_str = get_gpu_info()
print(f'Current device {device}')

CUDA available: True
Number of GPUs found:  1
Current device ID: 0
GPU device name:   NVIDIA GeForce RTX 3060 Laptop GPU
PyTorch version:   2.8.0a0+34c6371d24.nv25.08
CUDA version:      13.0
CUDNN version:     91200
Device for model training/inference: cuda:0
Current device cuda:0


In [3]:
# Directories and files
dataset = 'dataset_object_roboflow_240930'
annotations_file_name = 'roboflow_240930_dset.parquet'
image_dir = os.path.join(os.environ.get('DATA'), dataset)
annotations_file = os.path.join(image_dir, annotations_file_name)

file_col = 'multi_file'
bbox_col = 'bbox'
pos_col = 'pos'

model_name = 'rtdetr_roboflow_251005_01'
model_dir = os.path.join(os.environ.get('DATA'), 'model', model_name)
checkpoint = 'checkpoint-3800'
checkpoint_dir = os.path.join(model_dir, checkpoint)

results_dir = os.path.join(model_dir, 'results')
Path(results_dir).mkdir(parents=True, exist_ok=True)

In [4]:
# Load the model configuration files
config_file = os.path.join(model_dir, f'{model_name}.json')
with open(config_file, mode='r') as fl:
    model_config = json.load(fl)
display(sorted(list(model_config.keys())))

# Annotations for the test set
df = pd.read_parquet(annotations_file).astype({'pos': int})
df = df.loc[df['dset'].isin(['val', 'test'])].reset_index(drop=True)

# Labels: Convert the model's label id's into tooth positions
id2label = model_config.get('id2label')
id2label = {int(k): int(v) for k, v in id2label.items()}
label2id = {v: k for k, v in id2label.items()}
df = df.assign(label=df[pos_col].apply(lambda pos: label2id.get(pos)))
display(df.head(2))

['bbox_format', 'id2label', 'model_info', 'processor_params', 'training_args']

Unnamed: 0,id,license,file_name,height,width,date_captured,file_name_hash,dset,multi_file,bbox,category,disease,pos,box_id,label
0,2,1,pbws-super-set-1-completed__PBWs_Super_Set_3-0...,480,640,2024-09-17T23:44:33+00:00,eb79ef10bf,val,eb79ef10bf.jpg,"[0.0, 1.0, 128.205, 159.0]",tooth 24,teeth,12,eb79ef10bf_16,11
1,2,1,pbws-super-set-1-completed__PBWs_Super_Set_3-0...,480,640,2024-09-17T23:44:33+00:00,eb79ef10bf,val,eb79ef10bf.jpg,"[121.0, 4.0, 230.744, 167.0]",tooth 25,teeth,13,eb79ef10bf_17,12


### Load the model ###

In [5]:
processor = RTDetrImageProcessor.from_pretrained(checkpoint_dir)
model = RTDetrV2ForObjectDetection.from_pretrained(checkpoint_dir).to(device)

### Create the data set and the data loader for running predictions ###

In [6]:
def collate_fn(batch):
    """
    Collates a batch of data samples into a single dictionary for model input.
    """
    data = {}
    data["pixel_values"] = torch.stack([x["pixel_values"] for x in batch]).to(device)
    data["labels"] = [x["labels"].to(device) for x in batch]
    return data

dataset = DETRdataset(data=df.copy(),
                      image_processor=processor,
                      image_dir=image_dir,
                      file_name_col=file_col,
                      label_id_col='label',
                      bbox_col=bbox_col,
                      transforms=[alb.NoOp()])

batch_size = 4
dl = DataLoader(dataset=dataset, batch_size=4, collate_fn=collate_fn)

# Calculate the number of batches in the data set
n_images = len(dataset)
n_batches = int(np.floor(n_images / batch_size))
rest = int(n_images - (n_batches * batch_size))
if rest > 0:
    n_batches += 1
print(f'Number of images in data set:  {n_images}')
print(f'Batch size:                    {batch_size}')
print(f'Number of batches in data set: {n_batches}')

Number of images in data set:  96
Batch size:                    4
Number of batches in data set: 24


### Predict on the data set ###

In [7]:
def predict_batch(batch, threshold):
    # Predict on the batch and process the output
    image_id_list = [int(label.get('image_id').cpu()) for label in batch['labels']]
    target_size_list = [label.get('orig_size') for label in batch['labels']]
    image_size_list = [tuple(s.cpu().numpy()) for s in target_size_list]
    target_sizes = torch.stack(target_size_list)
    
    with torch.no_grad():
        outputs = model(**batch)
        output_batch = processor.post_process_object_detection(outputs=outputs, 
                                                               target_sizes=target_sizes, 
                                                               threshold=threshold)
    pred_df_image_list = []
    for i, image_id in enumerate(image_id_list):
        x_lim, y_lim = (0, image_size_list[i][1]), (0, image_size_list[i][0])
        score_list = output_batch[i].get('scores').cpu().numpy()
        label_list = output_batch[i].get('labels').cpu().numpy()
        pos_list = [id2label.get(cl) for cl in label_list] 
        box_list = output_batch[i].get('boxes').cpu().numpy()
        box_list = [clipxywh(xyxy2xywh(list(box)), xlim=x_lim, ylim=y_lim, decimals=0) for box in box_list]
        box_area_list = [box[2] * box[3] for box in box_list]
        pred_dict = {pos_col: pos_list, bbox_col: box_list, 'score': score_list, 'area': box_area_list}
        pred_df = pd.DataFrame(pred_dict)
        pred_df.insert(loc=0, column='image_id', value=image_id)
        pred_df_image_list.append(pred_df)
    pred_batch = pd.concat(pred_df_image_list, axis=0, ignore_index=True)
    return pred_batch

In [8]:
# Predict on the data set
threshold = 0.05
pred_df_list = []
for b, batch in enumerate(dl):
    if (b +1) % 5 == 0:
        print(f'Running batch {b + 1} / {n_batches}')
    pred_df_list.append(predict_batch(batch, threshold=threshold))
pred_df = pd.concat(pred_df_list, axis=0, ignore_index=True)

Running batch 5 / 24
Running batch 10 / 24
Running batch 15 / 24
Running batch 20 / 24


In [10]:
pred_df.head()

Unnamed: 0,image_id,pos,bbox,score,area
0,0,18,"[385, 155, 254, 204]",0.984009,51816
1,0,19,"[212, 161, 393, 198]",0.973566,77814
2,0,20,"[96, 153, 204, 206]",0.973493,42024
3,0,15,"[391, 1, 249, 145]",0.069184,36105
4,0,14,"[225, 3, 399, 139]",0.067809,55461


In [11]:
print(len(pred_df['image_id'].unique()))

96
