### Build the training script ###

In [45]:
import os
import numpy as np
import pandas as pd
import logging
import glob
import json
import datetime
from pathlib import Path
from matplotlib import pyplot as plt
from matplotlib import patches

# PyTorch framework
import torch

# Hugging Face Library
from transformers import RTDetrV2ForObjectDetection, RTDetrImageProcessor
from transformers import TrainingArguments, Trainer

# Appearance of the Notebook
from IPython.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))
np.set_printoptions(linewidth=110)
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 100)
pd.set_option('display.width', 1000)

# Import this module with autoreload
%load_ext autoreload
%autoreload 2
import detection as dt
from detection.detrdataset import get_gpu_info, DetectionDatasetFromDF
from detection.detransform import DetrTransform
from detection.imageproc import clipxywh, ImageData
from detection.mapeval import MAPEvaluator

print(f'Project module version: {dt.__version__}')

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
Project module version: 0.0.1.post1.dev26+g0781d45


In [46]:
# Set device
device, device_str = get_gpu_info()

# Save the date in a string
date_str = datetime.date.today().strftime('%y%m%d')
print(f'Date: {date_str}')

CUDA available: True
Number of GPUs found:  1
Current device ID: 0
GPU device name:   NVIDIA GeForce RTX 3060 Laptop GPU
PyTorch version:   2.6.0a0+ecf3bae40a.nv25.01
CUDA version:      12.8
CUDNN version:     90700
Date: 250224


### Locations for the data ###

In [32]:
# Data directory
data_root = os.path.join(os.environ.get('HOME'), 'data')
data_dir = os.path.join(data_root, 'dentex_detection')

# Save model checkpoints
model_dir = os.path.join(data_dir, 'model')
Path(model_dir).mkdir(exist_ok=True, parents=True)

# Model name
model_version = 1
model_name = f'rtdetr_{date_str}_{str(model_version).zfill(2)}'
print(f'Training model: {model_name}')

# Log files
log_dir = os.path.join(model_dir, 'log')
Path(log_dir).mkdir(exist_ok=True, parents=True)

# Image directory and annotation data
image_dir = os.path.join(data_dir, 'quadrants')
annotation_file_name = 'train_split_250224.parquet'
annotation_file = os.path.join(image_dir, annotation_file_name)

# Check the images on disk
file_list = glob.glob(os.path.join(image_dir, '*.png'))
expected_n_images = 2531
if not len(file_list) == expected_n_images:
    print(f'WARNING: expected number of images ({expected_n_images}) does not match the number of images on disk.')
    print(f'Delete files and start over.')
else:
    print(f'Found {len(file_list)} images.')

Training model: rtdetr_250224_01
Found 2531 images.


### Load the annotations ###

In [23]:
df = pd.read_parquet(annotation_file)
# Create the label column (the tooth position, but starting from 0)
label_name_list = sorted(list(df['ada'].unique()))
id2label = dict(zip(range(len(label_name_list)), label_name_list))
id2label = {int(label_id): str(name) for label_id, name in id2label.items()}
label2id = {str(name): int(label_id) for label_id, name in id2label.items()}

In [27]:
# Add columns with the file paths and the labels
df = df.assign(file=df['file_name'].apply(lambda f: os.path.join(image_dir, f)),
               label=df['ada'].apply(lambda name: label2id.get(str(name))))
display(df.head())
print(sorted(list(df['label'].unique())))

Unnamed: 0,file_name,quadrant,pos,bbox,segmentation,fdi,ada,dset,file,label
0,train_0_1.png,1,1,"[666, 102, 103, 376]","[[757, 478, 769, 102, 678, 113, 666, 469]]",11,8,train,/app/data/dentex_detection/quadrants/train_0_1...,7
1,train_0_1.png,1,2,"[593, 107, 85, 377]","[[666, 484, 678, 110, 607, 107, 604, 299, 619,...",12,7,train,/app/data/dentex_detection/quadrants/train_0_1...,6
2,train_0_1.png,1,3,"[531, 69, 85, 368]","[[587, 437, 616, 357, 607, 72, 534, 69, 531, 4...",13,6,train,/app/data/dentex_detection/quadrants/train_0_1...,5
3,train_0_1.png,1,4,"[457, 31, 115, 403]","[[522, 434, 572, 378, 543, 31, 463, 40, 457, 3...",14,5,train,/app/data/dentex_detection/quadrants/train_0_1...,4
4,train_0_1.png,1,5,"[369, 10, 100, 406]","[[437, 416, 469, 378, 466, 10, 381, 31, 378, 2...",15,4,train,/app/data/dentex_detection/quadrants/train_0_1...,3


[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31]


In [28]:
# Augmentations
use_transform = 'transform_1'

# Hugging face model checkpoint
hf_checkpoint = 'PekingU/rtdetr_v2_r101vd'
image_processor = RTDetrImageProcessor.from_pretrained(hf_checkpoint)
model = RTDetrV2ForObjectDetection.from_pretrained(hf_checkpoint,
                                                   id2label=id2label,
                                                   label2id=label2id,
                                                   anchor_image_size=None,
                                                   ignore_mismatched_sizes=True)

# Custom collate_fn to batch the images
def collate_fn(batch):
    data = {}
    data["pixel_values"] = torch.stack([x["pixel_values"] for x in batch])
    data["labels"] = [x["labels"] for x in batch]
    return data

Some weights of RTDetrV2ForObjectDetection were not initialized from the model checkpoint at PekingU/rtdetr_v2_r101vd and are newly initialized because the shapes did not match:
- model.decoder.class_embed.0.bias: found shape torch.Size([80]) in the checkpoint and torch.Size([32]) in the model instantiated
- model.decoder.class_embed.0.weight: found shape torch.Size([80, 256]) in the checkpoint and torch.Size([32, 256]) in the model instantiated
- model.decoder.class_embed.1.bias: found shape torch.Size([80]) in the checkpoint and torch.Size([32]) in the model instantiated
- model.decoder.class_embed.1.weight: found shape torch.Size([80, 256]) in the checkpoint and torch.Size([32, 256]) in the model instantiated
- model.decoder.class_embed.2.bias: found shape torch.Size([80]) in the checkpoint and torch.Size([32]) in the model instantiated
- model.decoder.class_embed.2.weight: found shape torch.Size([80, 256]) in the checkpoint and torch.Size([32, 256]) in the model instantiated
- mode

### Set up logging ###

In [38]:
log_file_name = f'train_log_{date_str}.log'
log_file = os.path.join(log_dir, log_file_name)
dtfmt = '%y%m%d-%H:%M'
logfmt = '%(asctime)s-%(name)s-%(levelname)s-%(message)s'
logging.basicConfig(filename=log_file,
                    filemode='w',
                    level=logging.INFO,
                    format=logfmt,
                    datefmt=dtfmt)
logger = logging.getLogger(name=__name__)

### Training arguments ###

In [40]:
training_args_dict = {'output_dir': os.path.join(model_dir, model_name), 
                      'num_train_epochs': 20, 
                      'max_grad_norm': 0.1, 
                      'learning_rate': 5e-5, 
                      'warmup_steps': 300,
                      'per_device_train_batch_size': 4, 
                      'dataloader_num_workers': 2, 
                      'metric_for_best_model': 'eval_map',
                      'greater_is_better': True, 
                      'load_best_model_at_end': True,
                      'eval_strategy': 'epoch', 
                      'save_strategy': 'epoch', 
                      'save_total_limit': 2, 
                      'remove_unused_columns': False,
                      'eval_do_concat_batches': False}

logger.info(json.dumps(training_args_dict))

### Create the data sets ###

In [30]:
# Create the image transformations for the data sets
train_transform = DetrTransform(use_transform).train_transform()
val_transform = DetrTransform(use_transform).val_transform()

# Create the data sets
dset_list = sorted(list(df['dset'].unique()))
print(dset_list)
dataset_dict = {}
for dset in dset_list:
    df_dset = df.loc[df['dset'] == dset]
    if dset == 'train':
        transform = train_transform
    else:
        transform = val_transform
    dataset = DetectionDatasetFromDF(data=df_dset, 
                                     processor=image_processor, 
                                     file_col='file', 
                                     label_col='label', 
                                     bbox_col='bbox', 
                                     transform=transform, 
                                     bbox_format='xywh', 
                                     validate=True)
    dataset_dict.update({dset: dataset})
    print(f'Number of images in {dset.upper()}: {len(dataset)}')

['test', 'train', 'val']
Number of images in TEST: 32
Number of images in TRAIN: 2479
Number of images in VAL: 20


In [49]:
# Set the evaluation metrics
eval_compute_metrics_fn = MAPEvaluator(image_processor=image_processor, threshold=0.01, id2label=id2label)
training_args = TrainingArguments(**training_args_dict)

# Set up the training
trainer = Trainer(model=model,
                  args=training_args,
                  train_dataset=dataset_dict.get('train'),
                  eval_dataset=dataset_dict.get('val'),
                  processing_class=image_processor,
                  data_collator=collate_fn,
                  compute_metrics=eval_compute_metrics_fn)

# Run the training
trainer.train()

In [50]:
trainer.train()

Epoch,Training Loss,Validation Loss,Map,Map 50,Map 75,Map Small,Map Medium,Map Large,Mar 1,Mar 10,Mar 100,Mar Small,Mar Medium,Mar Large,Map 1,Mar 100 1,Map 2,Mar 100 2,Map 3,Mar 100 3,Map 4,Mar 100 4,Map 5,Mar 100 5,Map 6,Mar 100 6,Map 7,Mar 100 7,Map 8,Mar 100 8,Map 9,Mar 100 9,Map 10,Mar 100 10,Map 11,Mar 100 11,Map 12,Mar 100 12,Map 13,Mar 100 13,Map 14,Mar 100 14,Map 15,Mar 100 15,Map 16,Mar 100 16,Map 17,Mar 100 17,Map 18,Mar 100 18,Map 19,Mar 100 19,Map 20,Mar 100 20,Map 21,Mar 100 21,Map 22,Mar 100 22,Map 23,Mar 100 23,Map 24,Mar 100 24,Map 25,Mar 100 25,Map 26,Mar 100 26,Map 27,Mar 100 27,Map 28,Mar 100 28,Map 29,Mar 100 29,Map 30,Mar 100 30,Map 31,Mar 100 31,Map 32,Mar 100 32
1,38.1224,14.336624,0.3861,0.6687,0.4105,-1.0,0.0,0.3899,0.505,0.6878,0.7261,-1.0,0.0,0.7329,0.3033,0.8,0.4848,0.8,0.2364,0.8333,0.4533,0.85,0.0634,0.675,0.4781,0.76,0.3948,0.72,0.653,0.84,0.4494,0.76,0.5318,0.68,0.4775,0.8,0.2021,0.66,0.3673,0.65,0.3726,0.8333,0.5192,0.78,0.2502,0.8,0.25,0.7333,0.1104,0.7,0.3273,0.5,0.0497,0.7667,0.3351,0.8,0.505,0.7,0.4182,0.56,0.2233,0.74,0.2149,0.72,0.4504,0.54,0.3554,0.56,0.4884,0.7,0.5306,0.7,0.6624,0.76,0.5428,0.74,0.6532,0.775
2,23.28,13.048721,0.495,0.78,0.5233,-1.0,0.0,0.5006,0.5811,0.6898,0.7224,-1.0,0.0,0.7277,0.3222,0.8,0.4023,0.8,0.6003,0.8667,0.215,0.85,0.7029,0.725,0.632,0.74,0.4626,0.76,0.6431,0.84,0.4613,0.76,0.4742,0.6,0.5156,0.78,0.4371,0.78,0.4101,0.5,0.5994,0.7,0.4283,0.74,0.7505,0.75,0.2738,0.6333,0.1851,0.6333,0.2004,0.6,0.4031,0.7,0.7283,0.86,0.6979,0.78,0.4655,0.56,0.0949,0.56,0.5469,0.8,0.4868,0.64,0.4347,0.62,0.5577,0.625,0.6761,0.82,0.7031,0.8,0.6155,0.72,0.7127,0.775


NOTE! Installing ujson may make loading annotations faster.


KeyboardInterrupt: 