### Download and process the Dentex training data ###
Full data set: https://zenodo.org/records/7812323/files/training_data.zip

Object detection data: https://dsets.s3.amazonaws.com/dentex/dentex-quadrant-enumeration.tar.gz

In [27]:
import os
import numpy as np
import pandas as pd
import glob
import json
from pathlib import Path

from matplotlib import pyplot as plt
from matplotlib import patches

# Appearance of the Notebook
from IPython.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))
np.set_printoptions(linewidth=110)
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 100)
pd.set_option('display.width', 1000)

# Import this module with autoreload
%load_ext autoreload
%autoreload 2
import detection as dt
from detection.dentexdata import DentexData
from detection.fileutils import FileOP

print(f'Project module version: {dt.__version__}')

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
Project module version: 0.0.1.post1.dev6+g8d94cd2.d20250220


### Define data locations for this notebook ###

In [16]:
# Data directory
data_root = os.path.join(os.environ.get('HOME'), 'data')
data_dir = os.path.join(data_root, 'dentex_detection')
model_dir = os.path.join(data_dir, 'model')

Path(data_dir).mkdir(exist_ok=True, parents=True)
Path(model_dir).mkdir(exist_ok=True, parents=True)

# This is where the xrays are in the archive, so should be left as-is
image_dir = os.path.join(data_dir, 'quadrant_enumeration', 'xrays')
print(f'image_dir: {image_dir}')

# Directory for the output
output_dir = os.path.join(data_dir, 'output')
Path(output_dir).mkdir(exist_ok=True, parents=True)

image_dir: /app/data/dentex_detection/quadrant_enumeration/xrays


### Download Dentex object detection data set from AWS ###

In [28]:
# Download the package from AWS
dtx = DentexData(data_dir=data_dir)
url = dtx.detection_url
print(f'Downloading dentex object detection data from: {url}')

sz = FileOP().file_size_from_url(url)
sz_GB = sz/1.0e9

Downloading dentex object detection data from: https://dsets.s3.amazonaws.com/dentex/dentex-quadrant-enumeration.tar.gz


In [29]:
# Download and extract the data
print(f'Downloading {sz_GB:.2f} GB from:\n{url}')
data_tar_file = dtx.download_image_data(url=url)

# Check the images on disk
file_list = glob.glob(os.path.join(image_dir, '*.png'))
expected_n_images = 634
if not len(file_list) == expected_n_images:
    print(f'WARNING: expected number of images ({expected_n_images}) does not match the number of images on disk.')
    print(f'Delete files and start over.')
else:
    print(f'Extracted {len(file_list)} images.')

Downloading 1.72 GB from:
https://dsets.s3.amazonaws.com/dentex/dentex-quadrant-enumeration.tar.gz
Extracting from .gz archive.
Uncompressed output file exists: /app/data/dentex_detection/dentex-quadrant-enumeration.tar. Skipping.
Extracted 634 images.


### Functions ###

In [103]:
# Create a function to show an image with bounding boxes
def show_image_with_boxes(image, box_list, label_list, edgecolor=None, ax=None):
    color = plt.cm.rainbow(np.linspace(0, 1, len(box_list)))
    color_list = [color[c] for c in range(len(color))]
    if ax is None:
        fig, ax = plt.subplots(figsize=(4, 4))
    for b, box in enumerate(box_list):
        if edgecolor is None:
            boxcolor = color_list[b]
        else:
            boxcolor = edgecolor
        anchor = (box[0], box[1])
        width = box[2]
        height = box[3]
        #anchor, width, height = rectangle(box)
        ax.imshow(image)
        rect = patches.Rectangle(xy=anchor, 
                                 width=width, 
                                 height=height, 
                                 linewidth=1.5, 
                                 edgecolor=boxcolor, 
                                 facecolor='none', 
                                 alpha=0.7)
        ax.text(x=anchor[0], y=anchor[1], s=label_list[b], color=boxcolor)
        ax.add_patch(rect)
    ax.set(xticks=[], yticks=[])
    return ax

### Load the annotations into a data frame ###

In [102]:
annotations_file_name = 'train_quadrant_enumeration.json'
annotations_file = os.path.join(data_dir, 
                                'quadrant_enumeration', 
                                annotations_file_name)
annotations = dtx.load_annotations(annotations_file)

# Convert the annotations into a data frame
df = dtx.annotations_to_df()

# The categories describe the quadrant and tooth positions
df = df.rename(columns={'category_id_1_name': 'quadrant',
                        'category_id_2_name': 'pos'})

display(df.head(10))

Unnamed: 0,iscrowd,image_id,bbox,segmentation,id,area,category_id_1,category_id_2,height,width,file_name,quadrant,pos
0,0,1,"[1283.3333333333333, 459.25925925925924, 100.0...","[[1366, 459, 1383, 662, 1380, 716, 1295, 721, ...",1,22904,0,0,1316,2710,train_283.png,1,1
1,0,1,"[1200.0, 460.18518518518516, 90.74074074074065...","[[1290, 463, 1290, 619, 1276, 719, 1224, 725, ...",2,18597,0,1,1316,2710,train_283.png,1,2
2,0,1,"[1100.9259259259259, 451.85185185185185, 119.4...","[[1162, 451, 1212, 637, 1218, 663, 1220, 679, ...",3,24808,0,2,1316,2710,train_283.png,1,3
3,0,1,"[914.8148148148148, 466.66666666666663, 174.07...","[[965, 466, 1050, 562, 1088, 709, 1088, 760, 1...",4,28017,0,4,1316,2710,train_283.png,1,5
4,0,1,"[822.2222222222222, 484.25925925925924, 182.40...","[[915, 484, 981, 671, 997, 700, 1002, 721, 100...",5,35576,0,5,1316,2710,train_283.png,1,6
5,0,1,"[729.6296296296296, 494.4444444444444, 143.518...","[[790, 494, 822, 556, 845, 592, 860, 637, 870,...",6,29269,0,6,1316,2710,train_283.png,1,7
6,0,1,"[609.2592592592592, 500.92592592592587, 142.59...","[[672, 503, 609, 724, 703, 755, 735, 755, 751,...",7,25191,0,7,1316,2710,train_283.png,1,8
7,0,1,"[1377.7777777777776, 450.9259259259259, 100.0,...","[[1387, 450, 1377, 714, 1418, 719, 1461, 717, ...",8,24466,1,0,1316,2710,train_283.png,2,1
8,0,1,"[1476.8518518518517, 460.18518518518516, 100.0...","[[1486, 460, 1490, 556, 1476, 672, 1488, 697, ...",9,19440,1,1,1316,2710,train_283.png,2,2
9,0,1,"[1555.5555555555554, 428.7037037037037, 137.96...","[[1629, 449, 1578, 568, 1565, 621, 1560, 661, ...",10,22296,1,2,1316,2710,train_283.png,2,3


In [105]:
# Let's draw an x-ray image with the bounding boxes in one quadrant
image_idx = 123
quadrant = 2

file_name = sorted(list(df['file_name'].unique()))[image_idx]
print(file_name)
df_file = df.loc[(df['file_name'] == file_name) & ]



train_209.png


Unnamed: 0,iscrowd,image_id,bbox,segmentation,id,area,category_id_1,category_id_2,height,width,file_name,quadrant,pos
973,0,35,"[1386.0, 504.0, 100.0, 330.0]","[[1484, 824, 1486, 762, 1476, 712, 1472, 562, ...",974,23332,0,0,1316,2882,train_209.png,1,1
974,0,35,"[1318.0, 472.0, 94.0, 350.0]","[[1390, 780, 1384, 822, 1328, 810, 1318, 762, ...",975,20466,0,1,1316,2882,train_209.png,1,2
975,0,35,"[1246.0, 428.0, 100.0, 384.0]","[[1248, 740, 1246, 782, 1262, 812, 1290, 802, ...",976,28530,0,2,1316,2882,train_209.png,1,3
976,0,35,"[1168.0, 488.0, 114.0, 324.0]","[[1170, 724, 1168, 764, 1170, 796, 1210, 812, ...",977,25900,0,3,1316,2882,train_209.png,1,4
977,0,35,"[1074.0, 476.0, 118.0, 332.0]","[[1090, 736, 1074, 754, 1078, 786, 1114, 808, ...",978,24314,0,4,1316,2882,train_209.png,1,5
978,0,35,"[958.0, 494.0, 158.0, 314.0]","[[978, 700, 958, 724, 958, 754, 958, 772, 992,...",979,32834,0,5,1316,2882,train_209.png,1,6
979,0,35,"[796.0, 478.0, 174.0, 300.0]","[[820, 526, 806, 586, 828, 644, 806, 698, 796,...",980,40672,0,6,1316,2882,train_209.png,1,7
980,0,35,"[1483.3333333333333, 504.5454545454545, 96.969...","[[1484, 760, 1483, 819, 1528, 834, 1568, 825, ...",981,23864,1,0,1316,2882,train_209.png,2,1
981,0,35,"[1569.6969696969695, 524.2424242424242, 80.303...","[[1571, 737, 1569, 796, 1587, 822, 1619, 818, ...",982,19134,1,1,1316,2882,train_209.png,2,2
982,0,35,"[1715.151515151515, 484.8484848484848, 89.3939...","[[1715, 683, 1727, 753, 1746, 787, 1771, 798, ...",983,21318,1,3,1316,2882,train_209.png,2,4
