### Dentex toothmodel image augmentations ###

In [1]:
import os
import numpy as np
import pandas as pd
import glob
import json
import copy
import random
from pathlib import Path
import logging
import itertools
from PIL import Image
import datetime
import seaborn as sns
import albumentations as alb

from matplotlib import pyplot as plt
from matplotlib import patches

logger = logging.getLogger(name=__name__)

# PyTorch framework
import torch

# Hugging Face Library
from transformers import RTDetrV2ForObjectDetection, RTDetrImageProcessor

# Appearance of the Notebook
from IPython.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))
np.set_printoptions(linewidth=110)
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 100)
pd.set_option('display.width', 1000)

# Import this module with autoreload
%load_ext autoreload
%autoreload 2
import detection as dt
from detection.dentexdata import DentexData, fdi_ada_mapping, val_test_split
from detection.detrdataset import get_gpu_info, DetectionDatasetFromDF
from detection.detrdataset import AugTransform
from detection.fileutils import FileOP
from detection.imageproc import ImageData, xywh2xyxy, xyxy2xywh, clipxywh

print(f'Project module version: {dt.__version__}')

Project module version: 0.0.1.post1.dev16+g367a5b6


In [3]:
# Set device
device, device_str = get_gpu_info()

CUDA available: True
Number of GPUs found:  1
Current device ID: 0
GPU device name:   NVIDIA GeForce GTX 1080 with Max-Q Design
PyTorch version:   2.6.0a0+ecf3bae40a.nv25.01
CUDA version:      12.8
CUDNN version:     90700


### Define data locations for this notebook ###

In [4]:
# Data directory
data_root = os.path.join(os.environ.get('HOME'), 'data')
data_dir = os.path.join(data_root, 'dentex_detection')
model_dir = os.path.join(data_dir, 'model')

# Image directory (after cropping the data)
image_dir = os.path.join(data_dir, 'quadrants')

### Make sure that the data is available ###

In [5]:
# Check the images on disk
file_list = glob.glob(os.path.join(image_dir, '*.png'))
expected_n_images = 2531
if not len(file_list) == expected_n_images:
    print(f'WARNING: expected number of images ({expected_n_images}) does not match the number of images on disk.')
    print(f'Delete files and start over.')
else:
    print(f'Found {len(file_list)} images.')

Found 2531 images.


### Configure the training functions ###

In [9]:
annotation_file_name = 'train_split_250223.parquet'
annotation_file = os.path.join(image_dir, annotation_file_name)


Unnamed: 0,file_name,quadrant,pos,bbox,segmentation,fdi,ada,dset
0,train_0_1.png,1,1,"[666, 102, 103, 376]","[[757, 478, 769, 102, 678, 113, 666, 469]]",11,8,train
1,train_0_1.png,1,2,"[593, 107, 85, 377]","[[666, 484, 678, 110, 607, 107, 604, 299, 619,...",12,7,train


In [21]:
df = pd.read_parquet(annotation_file)
# Create the label column
label_name_list = sorted(list(df['ada'].unique()))
id2label = dict(zip(range(len(label_name_list)), label_name_list))
label2id = {name: label for label, name in id2label.items()}

df = df.assign(file=df['file_name'].apply(lambda f: os.path.join(image_dir, f)),
               label=df['ada'].apply(lambda i: label2id.get(i)))

display(df.head())

Unnamed: 0,file_name,quadrant,pos,bbox,segmentation,fdi,ada,dset,file,label
0,train_0_1.png,1,1,"[666, 102, 103, 376]","[[757, 478, 769, 102, 678, 113, 666, 469]]",11,8,train,/app/data/dentex_detection/quadrants/train_0_1...,7
1,train_0_1.png,1,2,"[593, 107, 85, 377]","[[666, 484, 678, 110, 607, 107, 604, 299, 619,...",12,7,train,/app/data/dentex_detection/quadrants/train_0_1...,6
2,train_0_1.png,1,3,"[531, 69, 85, 368]","[[587, 437, 616, 357, 607, 72, 534, 69, 531, 4...",13,6,train,/app/data/dentex_detection/quadrants/train_0_1...,5
3,train_0_1.png,1,4,"[457, 31, 115, 403]","[[522, 434, 572, 378, 543, 31, 463, 40, 457, 3...",14,5,train,/app/data/dentex_detection/quadrants/train_0_1...,4
4,train_0_1.png,1,5,"[369, 10, 100, 406]","[[437, 416, 469, 378, 466, 10, 381, 31, 378, 2...",15,4,train,/app/data/dentex_detection/quadrants/train_0_1...,3
