https://huggingface.co/learn/computer-vision-course/en/unit3/vision-transformers/vision-transformer-for-objection-detection

Transform data into following structure

```python
DatasetDict({
    train: Dataset({
        features: ['image', 'image_id', 'width', 'height', 'objects'],
        num_rows: 5297
    })
    test: Dataset({
        features: ['image', 'image_id', 'width', 'height', 'objects'],
        num_rows: 1766
    })
})
```


In [1]:
from datasets import Dataset, DatasetDict
from PIL import Image
import json
import os
import supervision as sv
from transformers import AutoImageProcessor

In [7]:
def sv_to_custom_format(sv_dataset, class_mapping=None):
    formatted_data = []

    image_id = 0
    for path, image, detections in sv_dataset:
        # Load image
        image = path
        width, height = (416, 416)
        
        # Process detections
        object_ids = list(range(1, len(detections) + 1))
        bboxes = []
        areas = []
        categories = []

        for detection in detections:
            x_min, y_min, x_max, y_max = detection[0]  # Supervision bounding box
            bbox = [x_min, y_min, x_max - x_min, y_max - y_min]  # Convert to [x_min, y_min, width, height]
            area = (x_max - x_min) * (y_max - y_min)  # Compute area
            category = class_mapping[detection[3]] if class_mapping else str(detection[3])

            bboxes.append(bbox)
            areas.append(area)
            categories.append(category)

        # Add to formatted dataset
        formatted_data.append({
            'image': image,
            'image_id': image_id,
            'width': width,
            'height': height,
            'objects': {
                'id': object_ids,
                'area': areas,
                'bbox': bboxes,
                'category': categories
            }
        })
        
        image_id += 1

    return formatted_data

In [8]:
def get_class_mappings_from_coco(annotation_path):
    """
    Extracts class mappings from a COCO annotations file.
    
    Args:
        annotation_path (str): Path to the COCO annotations JSON file.
        
    Returns:
        dict: Mapping from class IDs to class names.
    """
    with open(annotation_path, "r") as f:
        coco_data = json.load(f)
    
    # Extract categories
    categories = coco_data["categories"]
    class_mapping = {category["id"]: category["name"] for category in categories}
    return class_mapping

In [9]:
ds_path = r"C:\Users\isaac\dev\CV_Garbage_Detection\Data"

ds_train = sv.DetectionDataset.from_coco(
    images_directory_path=os.path.join(ds_path, "train"),
    annotations_path=os.path.join(ds_path, "train", "_annotations.coco.json"),
)

ds_test = sv.DetectionDataset.from_coco(
    images_directory_path=os.path.join(ds_path, "test"),
    annotations_path=os.path.join(ds_path, "test", "_annotations.coco.json"),
)

ds_valid = sv.DetectionDataset.from_coco(
    images_directory_path=os.path.join(ds_path, "valid"),
    annotations_path=os.path.join(ds_path, "valid", "_annotations.coco.json"),
)

In [None]:
class_mappings = get_class_mappings_from_coco(os.path.join(ds_path, "train", "_annotations.coco.json"))

formatted_train = sv_to_custom_format(ds_train, class_mappings)
formatted_test = sv_to_custom_format(ds_test, class_mappings)
formatted_valid = sv_to_custom_format(ds_valid, class_mappings)