In [None]:
!pip install rfdetr
!pip install supervision

In [None]:
import kagglehub

# Download latest version
path = kagglehub.dataset_download("anulayakhare/crackathon-data")

print("Path to dataset files:", path)

In [None]:
# Convert YOLO to COCO for RDD2022 randomized_dataset structure
import json
import os
from PIL import Image
import shutil

def yolo_to_coco(images_dir, labels_dir, output_json, categories):
    coco = {
        "info": {},
        "licenses": [],
        "categories": categories,
        "images": [],
        "annotations": []
    }
    annotation_id = 0
    image_id = 0
    for image_name in os.listdir(images_dir):
        if not image_name.lower().endswith((".jpg", ".jpeg", ".png")):
            continue
        image_path = os.path.join(images_dir, image_name)
        label_path = os.path.join(labels_dir, os.path.splitext(image_name)[0] + ".txt")
        image = Image.open(image_path)
        width, height = image.size
        coco["images"].append({
            "id": image_id,
            "width": width,
            "height": height,
            "file_name": image_name
        })
        if os.path.exists(label_path):
            with open(label_path) as f:
                for line in f:
                    parts = line.strip().split()
                    if len(parts) < 5:
                        continue
                    class_id = int(parts[0])
                    x, y, w, h = map(float, parts[1:5])
                    x_min = (x - w / 2) * width
                    y_min = (y - h / 2) * height
                    bbox_width = w * width
                    bbox_height = h * height
                    coco["annotations"].append({
                        "id": annotation_id,
                        "image_id": image_id,
                        "category_id": class_id,
                        "bbox": [x_min, y_min, bbox_width, bbox_height],
                        "area": bbox_width * bbox_height,
                        "iscrowd": 0
                    })
                    annotation_id += 1
        image_id += 1
    # Write to a writable directory (e.g., /kaggle/working)
    with open(output_json, "w") as f:
        json.dump(coco, f, indent=2)
    print(f"Saved COCO annotations to {output_json}")

# Define categories for RDD2022, add 'supercategory' field
categories = [
    {"id": 0, "name": "Longitudinal Crack", "supercategory": "none"},
    {"id": 1, "name": "Transverse Crack", "supercategory": "none"},
    {"id": 2, "name": "Alligator Crack", "supercategory": "none"},
    {"id": 3, "name": "Other Corruption", "supercategory": "none"},
    {"id": 4, "name": "Pothole", "supercategory": "none"}
 ]

# Use /kaggle/working for output
input_base_dir = "/kaggle/input/crackathon-data/randomized_dataset"
output_base_dir = "/kaggle/working/randomized_dataset"
os.makedirs(output_base_dir, exist_ok=True)
for split in ["train", "val", "test"]:
    images_input_dir = os.path.join(input_base_dir, split, "images")
    labels_input_dir = os.path.join(input_base_dir, split, "labels")

    output_split_dir = os.path.join(output_base_dir, split)
    os.makedirs(output_split_dir, exist_ok=True)

    # Copy images directly to the output split directory (e.g., /kaggle/working/randomized_dataset/train/)
    for image_name in os.listdir(images_input_dir):
        if not image_name.lower().endswith(('.jpg', '.jpeg', '.png')):
            continue
        src_path = os.path.join(images_input_dir, image_name)
        dst_path = os.path.join(output_split_dir, image_name)
        shutil.copy(src_path, dst_path)
    print(f"Copied images from {images_input_dir} to {output_split_dir}")

    output_json = os.path.join(output_split_dir, "_annotations.coco.json")
    yolo_to_coco(images_input_dir, labels_input_dir, output_json, categories)

# Rename 'val' to 'valid' as required by RFDETR
val_dir = os.path.join(output_base_dir, "val")
valid_dir = os.path.join(output_base_dir, "valid")
if os.path.exists(val_dir) and not os.path.exists(valid_dir):
    os.rename(val_dir, valid_dir)
    print(f"Renamed {val_dir} to {valid_dir}")
elif os.path.exists(valid_dir):
    print(f"{valid_dir} already exists, no rename needed.")
else:
    print(f"Neither {val_dir} nor {valid_dir} exist, no rename performed.")

In [None]:
from rfdetr import RFDETRNano
model = RFDETRNano()
model.train(
    dataset_dir="/kaggle/working/randomized_dataset",
    epochs=10,
    batch_size=16,  # P100-optimized
    grad_accum_steps=2,
    lr=1e-4,
    mixed_precision=True,  # Speed boost
    gradient_checkpointing=True,
    output_dir="/kaggle/working/rfdetr-p100"
)
