In [1]:
import os
import json
import shutil
import xml.etree.ElementTree as ET
import pandas as pd
import yaml
import random

# Object detection using YOLO bounding boxes

## 1. Data preparation 

We need to convert and reorganize the dataset into YOLO format. This involves:  
1. **Creating the required directory structure** (`images/` and `labels/` for `train`, `val`, `test`).  
2. **Extracting bounding boxes** from JSON annotations, filtering only rectangles.  
3. **Normalizing coordinates** to YOLO format (`class x_center y_center width height`).  
4. **Copying images and saving annotations** in the correct locations. 

This time we will also add random not fractured images to the new dataset and create empty annotation files for those, since they do not have any bounding box information.

In [10]:
all_not_fractured_files = {
    os.path.splitext(f)[0]: f for f in os.listdir(not_fractured_img_dir) if f.endswith((".jpg", ".png", ".jpeg"))
}
all_not_fractured_jsons = {os.path.splitext(f)[0]: f for f in os.listdir(not_fractured_ann_dir) if f.endswith(".json")}

# Ensure we only use images that have corresponding annotations
all_not_fractured_images = {k: v for k, v in all_not_fractured_files.items() if k in all_not_fractured_jsons}

all_not_fractured_files

{'IMG0000000': 'IMG0000000.jpg',
 'IMG0000001': 'IMG0000001.jpg',
 'IMG0000002': 'IMG0000002.jpg',
 'IMG0000003': 'IMG0000003.jpg',
 'IMG0000004': 'IMG0000004.jpg',
 'IMG0000005': 'IMG0000005.jpg',
 'IMG0000006': 'IMG0000006.jpg',
 'IMG0000007': 'IMG0000007.jpg',
 'IMG0000008': 'IMG0000008.jpg',
 'IMG0000009': 'IMG0000009.jpg',
 'IMG0000010': 'IMG0000010.jpg',
 'IMG0000011': 'IMG0000011.jpg',
 'IMG0000012': 'IMG0000012.jpg',
 'IMG0000013': 'IMG0000013.jpg',
 'IMG0000014': 'IMG0000014.jpg',
 'IMG0000015': 'IMG0000015.jpg',
 'IMG0000016': 'IMG0000016.jpg',
 'IMG0000017': 'IMG0000017.jpg',
 'IMG0000018': 'IMG0000018.jpg',
 'IMG0000020': 'IMG0000020.jpg',
 'IMG0000021': 'IMG0000021.jpg',
 'IMG0000022': 'IMG0000022.jpg',
 'IMG0000023': 'IMG0000023.jpg',
 'IMG0000024': 'IMG0000024.jpg',
 'IMG0000026': 'IMG0000026.jpg',
 'IMG0000027': 'IMG0000027.jpg',
 'IMG0000028': 'IMG0000028.jpg',
 'IMG0000029': 'IMG0000029.jpg',
 'IMG0000030': 'IMG0000030.jpg',
 'IMG0000031': 'IMG0000031.jpg',
 'IMG00000

In [13]:
all_not_fractured = [f for f in os.listdir(not_fractured_img_dir) if f.endswith((".jpg", ".png", ".jpeg"))]
len(all_not_fractured_files)

3366

In [14]:

# Define dataset paths
raw_data_dir = "raw data"
not_fractured_img_dir = os.path.join(raw_data_dir, "not fractured", "img")
not_fractured_ann_dir = os.path.join(raw_data_dir, "not fractured", "ann")
yolo_data_dir = "data_object_detection_incl_not_fractured_yolo"

# Set a fixed seed for reproducibility
RANDOM_SEED = 42
random.seed(RANDOM_SEED)

# Create YOLO folder structure
for split in ["train", "val", "test"]:
    os.makedirs(os.path.join(yolo_data_dir, "images", split), exist_ok=True)
    os.makedirs(os.path.join(yolo_data_dir, "labels", split), exist_ok=True)

# Function to convert bounding boxes to YOLO format
def convert_bbox_to_yolo(img_w, img_h, bbox):
    x_min, y_min = bbox[0]
    x_max, y_max = bbox[1]

    # Convert to YOLO format
    x_center = ((x_min + x_max) / 2) / img_w
    y_center = ((y_min + y_max) / 2) / img_h
    bbox_width = (x_max - x_min) / img_w
    bbox_height = (y_max - y_min) / img_h

    return f"0 {x_center:.6f} {y_center:.6f} {bbox_width:.6f} {bbox_height:.6f}"

# Track used 'not fractured' images across all splits
used_not_fractured = set()

all_not_fractured = [f for f in os.listdir(not_fractured_img_dir) if f.endswith((".jpg", ".png", ".jpeg"))]

# Process each dataset split
for split in ["train", "val", "test"]:
    img_src_dir = os.path.join(raw_data_dir, split, "img")
    ann_src_dir = os.path.join(raw_data_dir, split, "ann")

    img_dest_dir = os.path.join(yolo_data_dir, "images", split)
    ann_dest_dir = os.path.join(yolo_data_dir, "labels", split)

    image_files = {os.path.splitext(f)[0]: f for f in os.listdir(img_src_dir) if f.endswith((".jpg", ".png", ".jpeg"))}
    fractured_count = len(image_files)

    # Process fractured images
    for json_file in os.listdir(ann_src_dir):
        if not json_file.endswith(".json"):
            continue
        
        base_name = json_file.replace(".jpg.json", "").replace(".png.json", "").replace(".jpeg.json", "")

        if base_name not in image_files:
            print(f"Skipping {json_file}: No matching image found")
            continue

        img_name = image_files[base_name]
        img_path = os.path.join(img_src_dir, img_name)
        json_path = os.path.join(ann_src_dir, json_file)

        # Load annotation JSON
        with open(json_path, "r") as f:
            data = json.load(f)

        img_width, img_height = data["size"]["width"], data["size"]["height"]
        yolo_annotations = []

        # Process objects (only rectangles)
        for obj in data["objects"]:
            if obj["geometryType"] == "rectangle":
                bbox = obj["points"]["exterior"]
                yolo_annotations.append(convert_bbox_to_yolo(img_width, img_height, bbox))

        # Save YOLO annotations
        if yolo_annotations:
            yolo_label_path = os.path.join(ann_dest_dir, base_name + ".txt")
            with open(yolo_label_path, "w") as f:
                f.write("\n".join(yolo_annotations))

        # Copy image to YOLO dataset
        shutil.copy(img_path, img_dest_dir)

    # Process 'not fractured' images
    available_images = list(set(all_not_fractured) - used_not_fractured)

    # Ensure there are enough images to sample
    num_to_sample = min(fractured_count, len(available_images))
    if num_to_sample == 0:
        print(f"Warning: No 'not fractured' images available for {split}")
        continue

    selected_images = random.sample(available_images, num_to_sample)

    for img_name in selected_images:
        used_not_fractured.add(img_name)  # Mark image as used

        img_path = os.path.join(not_fractured_img_dir, img_name)

        # Copy image
        shutil.copy(img_path, img_dest_dir)

        # Create empty annotation file
        yolo_label_path = os.path.join(ann_dest_dir, img_name + ".txt")
        with open(yolo_label_path, "w") as f:
            pass  # Empty file

print("Dataset conversion completed successfully.")


Dataset conversion completed successfully.


Let's make sure that we have the same number of images and annotations. This can also be compared to our `02_raw_data_analysis.ipynb`, and should now contain twice the previous files. 

In [15]:
data_counts = {"Split": [], "Images": [], "Annotations": []}

# Count images and annotations in each split
for split in ["train", "val", "test"]:
    img_dir = os.path.join(yolo_data_dir, "images", split)
    ann_dir = os.path.join(yolo_data_dir, "labels", split)

    num_images = len([f for f in os.listdir(img_dir) if f.endswith((".jpg", ".png", ".jpeg"))])
    num_annotations = len([f for f in os.listdir(ann_dir) if f.endswith(".txt")])

    data_counts["Split"].append(split)
    data_counts["Images"].append(num_images)
    data_counts["Annotations"].append(num_annotations)

df_counts = pd.DataFrame(data_counts)
df_counts

Unnamed: 0,Split,Images,Annotations
0,train,1148,1148
1,val,164,164
2,test,122,122


In the next step, we need to create the yaml file necessary to use the YOLO model.

In [16]:
# Define the paths for train, val, and test images
data_yaml = {
    "train": "images/train",
    "val": "images/val",
    "test": "images/test",  # Optional
    "nc": 1,  # Number of classes
    "names": ["fractured"]  # Class names
}

# Save the updated YAML file
yaml_path = os.path.join(yolo_data_dir, "data.yaml")
with open(yaml_path, "w") as file:
    yaml.dump(data_yaml, file, default_flow_style=False)

print(f"data.yaml created at: {yaml_path}")

data.yaml created at: data_object_detection_incl_not_fractured_yolo\data.yaml


## 2. Train YOLO model

In [21]:
from ultralytics import YOLO

model = YOLO("yolov8n.pt")  # Use "n" for a lightweight model
model.train(data="data_object_detection_yolo\data.yaml", epochs=5, imgsz=640, batch=8)


Ultralytics 8.3.73  Python-3.11.9 torch-2.6.0+cpu CPU (13th Gen Intel Core(TM) i7-1370P)
[34m[1mengine\trainer: [0mtask=detect, mode=train, model=yolov8n.pt, data=data_object_detection_yolo\data.yaml, epochs=5, time=None, patience=100, batch=8, imgsz=640, save=True, save_period=-1, cache=False, device=None, workers=8, project=None, name=train5, exist_ok=False, pretrained=True, optimizer=auto, verbose=True, seed=0, deterministic=True, single_cls=False, rect=False, cos_lr=False, close_mosaic=10, resume=False, amp=True, fraction=1.0, profile=False, freeze=None, multi_scale=False, overlap_mask=True, mask_ratio=4, dropout=0.0, val=True, split=val, save_json=False, save_hybrid=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=True, source=None, vid_stride=1, stream_buffer=False, visualize=False, augment=False, agnostic_nms=False, classes=None, retina_masks=False, embed=None, show=False, save_frames=False, save_txt=False, save_conf=False, save_crop=False, show_labels=Tru

RuntimeError: Dataset 'data_object_detection_yolo/data.yaml' error  'data_object_detection_yolo\data.yaml' does not exist

In [22]:
model.train(data=r"C:Users/natha/Documents/HSLU_local/Computer Vision/CVI03.HS24-Computer-Vision/datasets/data_object_detection_yolo/data.yaml",
            epochs=5, imgsz=640, batch=8)


Ultralytics 8.3.73  Python-3.11.9 torch-2.6.0+cpu CPU (13th Gen Intel Core(TM) i7-1370P)
[34m[1mengine\trainer: [0mtask=detect, mode=train, model=yolov8n.pt, data=C:Users/natha/Documents/HSLU_local/Computer Vision/CVI03.HS24-Computer-Vision/datasets/data_object_detection_yolo/data.yaml, epochs=5, time=None, patience=100, batch=8, imgsz=640, save=True, save_period=-1, cache=False, device=None, workers=8, project=None, name=train6, exist_ok=False, pretrained=True, optimizer=auto, verbose=True, seed=0, deterministic=True, single_cls=False, rect=False, cos_lr=False, close_mosaic=10, resume=False, amp=True, fraction=1.0, profile=False, freeze=None, multi_scale=False, overlap_mask=True, mask_ratio=4, dropout=0.0, val=True, split=val, save_json=False, save_hybrid=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=True, source=None, vid_stride=1, stream_buffer=False, visualize=False, augment=False, agnostic_nms=False, classes=None, retina_masks=False, embed=None, show=Fals

RuntimeError: Dataset 'C:Users/natha/Documents/HSLU_local/Computer Vision/CVI03.HS24-Computer-Vision/datasets/data_object_detection_yolo/data.yaml' error  'C:Users/natha/Documents/HSLU_local/Computer Vision/CVI03.HS24-Computer-Vision/datasets/data_object_detection_yolo/data.yaml' does not exist

In [14]:
from ultralytics import YOLO

# Load a pretrained YOLO model
model = YOLO("yolo11n.pt")

# Perform object detection on an image
results = model("https://ultralytics.com/images/bus.jpg")

# Visualize the results
for result in results:
    result.show()

Creating new Ultralytics Settings v0.0.6 file  
View Ultralytics Settings with 'yolo settings' or at 'C:\Users\natha\AppData\Roaming\Ultralytics\settings.json'
Update Settings with 'yolo settings key=value', i.e. 'yolo settings runs_dir=path/to/dir'. For help see https://docs.ultralytics.com/quickstart/#ultralytics-settings.
Downloading https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11n.pt to 'yolo11n.pt'...


100%|██████████| 5.35M/5.35M [00:00<00:00, 40.0MB/s]



Downloading https://ultralytics.com/images/bus.jpg to 'bus.jpg'...


100%|██████████| 134k/134k [00:00<00:00, 7.05MB/s]


image 1/1 c:\Users\natha\Documents\HSLU_local\Computer Vision\CVI03.HS24-Computer-Vision\bus.jpg: 640x480 4 persons, 1 bus, 85.7ms
Speed: 4.9ms preprocess, 85.7ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 480)
