# **Notebook 2a**: RF-DETR

<span style="font-size: 1.5rem;">By **Michael Farrugia**</span>

## Package Installation

In [1]:
INSTALL_PACKAGES = False

In [2]:
if INSTALL_PACKAGES:
    !pip install -r requirements.txt

## Model Setup

In [3]:
from rfdetr import RFDETRBase

rf_detr_model = RFDETRBase()

Loading pretrain weights


## Model Training

In [4]:
import torch

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

Using device: cuda


The images used in this notebook are split according to the `.json` files. To use the images in training, the `images` folder must exist - if this folder does not exist, the cell below is used to create the necessary folders according to how the dataset is split.

In [5]:
import json

with open("./dataset/COCO-based_COCO/annotations/train.json", "r") as f:
    train_annotations = json.load(f)

with open("./dataset/COCO-based_COCO/annotations/val.json", "r") as f:
    val_annotations = json.load(f)

with open("./dataset/COCO-based_COCO/annotations/test.json", "r") as f:
    test_annotations = json.load(f)

print(f"Number of images in the train set: {len(train_annotations ['images'])}")
print(f"Number of images in the validation set: {len(val_annotations ['images'])}")
print(f"Number of images in the test set: {len(test_annotations ['images'])}")

Number of images in the train set: 483
Number of images in the validation set: 88
Number of images in the test set: 86


In [6]:
import os
import shutil

if not os.path.exists("./dataset/COCO-based_COCO/images/"):
    os.makedirs("./dataset/COCO-based_COCO/images/")

    for dataset, dataset_type in zip([train_annotations, val_annotations, test_annotations], ["train", "val", "test"]):
        os.makedirs(f"./dataset/COCO-based_COCO/images/{dataset_type}/")
        img_filenames = [img ["file_name"] for img in dataset ["images"]]
        
        for img_filename in img_filenames:
            member_name = img_filename.split("-") [1].split("_") [0]

            img_src_path = f"./{member_name}/images/{img_filename}"
            img_dst_path = f"./dataset/COCO-based_COCO/images/{dataset_type}/{img_filename}"
            
            shutil.copyfile(img_src_path, img_dst_path)

Furthermore, RF-DETR expects a `_annotations.coco.json` file to exist in each of the `train`, `val` and `test` folders. The cell below creates these files by splitting the original annotations file according to the image splits. Other requirements for RF-DETR compatibility, such as 0-indexed category ids and the presence of the `supercategory` field, are also handled in the cell below.

Finally, rather than using `val`, RF-DETR uses `valid`. The `val` folder is simply copied into the `valid` folder.

In [7]:
fixed_train_annotations = train_annotations.copy()
fixed_val_annotations = val_annotations.copy()
fixed_test_annotations = test_annotations.copy()

for dataset in [fixed_train_annotations, fixed_val_annotations, fixed_test_annotations]:
    min_category_id = min(category ["id"] for category in dataset ["categories"])
    
    if "supercategory" not in dataset ["categories"] [0]:
        for category in dataset ["categories"]:
            category ["supercategory"] = "Maltese Traffic Sign"
            
            if min_category_id == 1:
                category ["id"] -= 1

    if min_category_id == 1:
        for annotation in dataset ["annotations"]:
            annotation ["category_id"] -= 1

In [8]:
# check if _annotations.coco.json files exist in train, val, test folders
for dataset_type in ["train", "val", "test"]:
    annotation_file_path = f"./dataset/COCO-based_COCO/images/{dataset_type}/_annotations.coco.json"
    
    if not os.path.exists(annotation_file_path):
        print(f"_annotations.coco.json file not found in {dataset_type} folder. Creating the file...")
        if dataset_type == "train":
            with open(annotation_file_path, "w") as f:
                json.dump(fixed_train_annotations, f)
        elif dataset_type == "val":
            with open(annotation_file_path, "w") as f:
                json.dump(fixed_val_annotations, f)
        else: 
            with open(annotation_file_path, "w") as f:
                json.dump(fixed_test_annotations, f)
    else:
        print(f"_annotations.coco.json file already exists in {dataset_type} folder")

_annotations.coco.json file already exists in train folder
_annotations.coco.json file already exists in val folder
_annotations.coco.json file already exists in test folder


In [9]:
# copy val folder to valid folder
if not os.path.exists("./dataset/COCO-based_COCO/images/valid/"):
    shutil.copytree("./dataset/COCO-based_COCO/images/val/", "./dataset/COCO-based_COCO/images/valid/")
    print("Copied val folder to valid folder")

In [10]:
print("NUmber of classes in train set:")
print(len(fixed_train_annotations ["categories"]))

NUmber of classes in train set:
6


In [None]:
rf_detr_model.train(
    dataset_dir = "./dataset/COCO-based_COCO/images/", 
    tensorboard = True, 
    device = device, 
    num_epochs = 10,
    early_stopping = True,
    early_stopping_patience = 3,
    resolution = 448, # required to be a 56-divisible number
    batch_size = 4,
    output_dir = "params/rf-detr",
    lr = 1e-4,
    grad_accum_steps = 4,
)

Reinitializing your detection head with 7 classes.


TensorBoard logging initialized. To monitor logs, use 'tensorboard --logdir C:\Users\micha\Documents\School\University\3rd Year\Advanced Computer Vision for Artificial Intelligence\Assignment\params\rf-detr' and open http://localhost:6006/ in browser.
Not using distributed mode
git:
  sha: N/A, status: clean, branch: N/A

Namespace(num_classes=7, grad_accum_steps=4, print_freq=10, amp=True, lr=0.0001, lr_encoder=0.00015, batch_size=4, weight_decay=0.0001, epochs=100, lr_drop=100, clip_max_norm=0.1, lr_vit_layer_decay=0.8, lr_component_decay=0.7, do_benchmark=False, dropout=0, drop_path=0.0, drop_mode='standard', drop_schedule='constant', cutoff_epoch=0, pretrained_encoder=None, pretrain_weights='rf-detr-base.pth', pretrain_exclude_keys=None, pretrain_keys_modify_to_load=None, pretrained_distiller=None, encoder='dinov2_windowed_small', vit_encoder_num_layers=12, window_block_indexes=None, position_embedding='sine', out_feature_indexes=[2, 5, 8, 11], freeze_encoder=False, layer_norm=True