## How to train RT-DETR on dental radiographs ##

### Create object detection data set from panoramic radiographs ###
The goal is to train a RT-DETR model to detect teetch with their positions on dental radiographs.
We will crop the large panoramic x-rays into smaller images to create our object detection data set.
There are two reasons why we are doing this:
1. Provide smaller high-resolution images to the detection model for the fixed input size of 64 x 640 pixels 
2. Use the model for predicting tooth positions on smaller periapical or bitewing x-rays

### Imports ###

In [11]:
import os
import glob
import numpy as np
import pandas as pd
from pathlib import Path
from matplotlib import pyplot as plt
from matplotlib import patches

# Imports from this package
# If you get an error, install the package in your python environment
# Do not forget to activate the environmen before running this command
# python -m pip install -e .

%load_ext autoreload
%autoreload 2
import computervision
from computervision.dentexdata import DentexData
from computervision.imageproc import ImageData

# Print package version
print(f'Computervision package version {computervision.__version__}')

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
Computervision package version 0.0.0


In [12]:
# Data directory (modify for local storage)
# This will be inside the root of the repository
data_dir = os.path.join(os.environ.get('HOME'), 'data')

# Directory to store the data set
dataset_dir = os.path.join(data_dir, 'dentex_detection')
image_dir = os.path.join(dataset_dir, 'quadrant_enumeration', 'xrays')

# Create an output directory to store examples and results
output_dir = os.path.join(dataset_dir, 'output')
Path(output_dir).mkdir(parents=True, exist_ok=True)

### Load the annotations ####
We use the annotations data frame created in the first notebook

In [13]:
url = 'https://dsets.s3.amazonaws.com/dentex/dentex-quadrant-enumeration.tar.gz'
annotations_data_file_name = f'{os.path.basename(url).split('.')[0]}.parquet'
annotations_data_file = os.path.join(dataset_dir, annotations_data_file_name)
df = pd.read_parquet(annotations_data_file)

# Let's make sure that we have the expected number of images available
expected_images = 634
file_list = glob.glob(os.path.join(image_dir, '*.png'))
# We want to be sure that the number of images is correct before we continue
try:
    assert len(file_list) == expected_images
except AssertionError:
    print(f'Warning. Expected {expected_images} images in {image_dir}')
print(f'Found {len(file_list)} images in {image_dir}')
display(df.head(3))

Found 634 images in /app/data/dentex_detection/quadrant_enumeration/xrays


Unnamed: 0,file_name,bbox,segmentation,quadrant,position
0,train_0.png,"[1376.5882352941176, 288.0, 102.94117647058829...","[[1467, 664, 1479, 288, 1388, 299, 1376, 655]]",1,1
1,train_0.png,"[1303.0588235294117, 293.88235294117646, 85.29...","[[1376, 670, 1388, 296, 1317, 293, 1314, 485, ...",1,2
2,train_0.png,"[1241.2941176470588, 255.6470588235294, 85.294...","[[1297, 623, 1326, 543, 1317, 258, 1244, 255, ...",1,3


### Load the annotations and convert into a pandas data frame ###

In [15]:
# Create a (quadrant) bounding box that contains a list of bounding boxes
def compass_box(bbox_list_xywh: list, offset:int) -> list:
    assert isinstance(bbox_list_xywh, list)
    bbox_list_xyxy = [xywh2xyxy(bbox) for bbox in bbox_list_xywh]
    bbox_list_x = flatten([[bbox[0], bbox[2]] for bbox in bbox_list_xyxy])
    bbox_list_y = flatten([[bbox[1], bbox[3]] for bbox in bbox_list_xyxy])
    quadrant_bbox_xywh = xyxy2xywh([min(bbox_list_x)-offset, 
                                    min(bbox_list_y)-offset, 
                                    max(bbox_list_x)+offset, 
                                    max(bbox_list_y)+offset])
    return quadrant_bbox_xywh

# Create a function to build (x, y) tuples from segmentation lists
def segmentation_edges(segmentation: list) -> np.ndarray:
    output = None
    try:
        assert isinstance(segmentation, list)
        assert isinstance(segmentation[0], (int, np.int64))
        assert len(segmentation) % 2 == 0
    except AssertionError as e:
        logger.error('segmentation must be a flat even-lenth list of integer values')
    else:
        seg_x = [segmentation[i] for i in range(len(segmentation)) if i % 2 == 0]
        seg_y = [segmentation[i] for i in range(len(segmentation)) if i % 2 == 1]
        output = np.array(list(zip(seg_x, seg_y)))
    return output

### Crop the images and save them in a new directory ###

In [24]:
# Loop over all panoramic radiographs
for f, file in enumerate(file_list[:2]):
    if (f+1) % 100 == 0:
        print(f'File {f+1} / {len(file_list)}')
    file_name = os.path.basename(file)
    im = ImageData().load_image(file)
    # Loop over the four quadrants for each image
    quadrant_list = sorted(list(df.loc[df['file_name']==file_name, 'quadrant'].unique()))
    for quadrant in quadrant_list:
        print(quadrant)

1
2
3
4
1
2
3
4


In [23]:
quadrant_list

[np.int64(1), np.int64(2), np.int64(3), np.int64(4)]