In [None]:
import os

os.environ["CHECKPOINTS_PATH"] = "../checkpoints"
os.environ["TRACKING_RESULTS_PATH"] = "data/processed_tracking_results"

import itertools
import shutil
from pathlib import Path

import matplotlib.pyplot as plt
import pandas as pd
from src.api.models.pydantic import SimRoomClassDTO
from src.config import UNKNOWN_CLASS_ID
from ultralytics import YOLO

from experiment.experiment_utils import (
    calculate_metrics,
    create_confusion_matrix,
    evaluate_predictions,
    render_confusion_matrix,
    update_confusion_matrix,
)
from experiment.settings import (
    CLASS_ID_TO_NAME,
    FINAL_PREDICTIONS_PATH,
    FULLY_LABELED_RECORDINGS,
    GAZE_SEGMENTATION_RESULTS_PATH,
    LABELING_REC_DIFF_BACKGROUND_ID,
    LABELING_REC_SAME_BACKGROUND_ID,
    OBJECT_DATASETS_PATH,
    TRAINING_DATASETS_PATH,
    YOLO_MODELS_PATH,
    SIMROOM_ID,
    RECORDINGS_PATH,
    RECORDING_FRAMES_PATH,
    MODELS_PATH
)
from src.api.db import Session, engine
from src.api.repositories import simrooms_repo
from src.api.services import simrooms_service
import cv2
import numpy as np
import tempfile
from src.utils import extract_frames_to_dir
import albumentations as A
from tqdm import tqdm

%matplotlib inline

# Global Config Parameters

In [None]:
IMG_CROP_SIZE = 640
IMG_CROP_SIZE_HALF = IMG_CROP_SIZE // 2
OBJECT_DETECTION_DATASETS_PATH = TRAINING_DATASETS_PATH / "object_detection"

# Create Training Data

In [3]:
def get_tracking_results_per_class(session: Session, labeling_recording_id: str):
    calibration_id = simrooms_repo.get_calibration_recording(
        session, simroom_id=SIMROOM_ID, recording_id=labeling_recording_id
    ).id
    tracked_classes = simrooms_repo.get_tracked_classes(session, calibration_id)

    if len(tracked_classes) != 15:
        raise ValueError(f"Expected 15 tracked classes but got {len(tracked_classes)}")

    tracking_results_per_class = {
        tracked_class.id: simrooms_repo.get_tracking_result_paths(
            session, calibration_id, tracked_class.id
        )
        for tracked_class in tracked_classes
    }

    return tracking_results_per_class, tracked_classes

In [45]:
def get_per_class_metadata(
    session: Session,
    labeling_recording_id: str,
):
    tracking_results_per_class, tracked_classes = get_tracking_results_per_class(
        session, labeling_recording_id
    )
    per_class_metadata = {}

    for tracked_class in tracked_classes:
        class_id = tracked_class.id
        tracking_results = tracking_results_per_class[class_id]

        frame_indexes = []
        laplacian_variances = []
        mask_areas = []
        bboxes = []
        for tracking_result in tracking_results:
            file = np.load(tracking_result)
            if int(tracking_result.stem) != int(file["frame_idx"]):
                raise ValueError(
                    f"Frame index mismatch: {tracking_result.stem} != {file['frame_idx']}"
                )

            frame_indexes.append(int(tracking_result.stem))
            laplacian_variances.append(file["laplacian_variance"])
            mask_areas.append(np.sum(file["mask"]))
            bboxes.append(file["box"])

        per_class_metadata[class_id] = {
            "class_name": tracked_class.class_name,
            "color": tracked_class.color,
            "frame_indexes": frame_indexes,
            "laplacian_variances": laplacian_variances,
            "mask_areas": mask_areas,
            "bboxes": bboxes,
        }

    return per_class_metadata


def select_samples_per_class(per_class_metadata, num_samples_per_class):
    selected_samples_per_class = {}

    for class_id, metadata in per_class_metadata.items():
        if len(metadata["frame_indexes"]) < num_samples_per_class:
            print(
                f"Class {CLASS_ID_TO_NAME[class_id]} has only {len(metadata['frame_indexes'])} samples, ignoring it"
            )
            continue

        frame_indexes = metadata["frame_indexes"]
        laplacian_variances = metadata["laplacian_variances"]
        mask_areas = metadata["mask_areas"]
        bboxes = metadata["bboxes"]

        # zip all metadata together
        sample_list = list(zip(frame_indexes, bboxes))

        # choose num_samples random samples
        selected_samples = np.random.choice(
            len(sample_list), num_samples_per_class, replace=False
        )

        selected_samples = [sample_list[i] for i in selected_samples]

        # unzip the selected samples
        selected_frame_indexes, selected_bboxes = zip(*selected_samples)
        selected_samples_per_class[class_id] = {
            "frame_indexes": selected_frame_indexes,
            "bboxes": selected_bboxes,
        }

    return selected_samples_per_class


def get_samples_per_frame(per_class_metadata):
    samples_per_frame = {}

    for class_id, metadata in per_class_metadata.items():
        frame_indexes = metadata["frame_indexes"]
        bboxes = metadata["bboxes"]

        # zip all metadata together
        sample_list = list(zip(frame_indexes, bboxes))

        for frame_index, bbox in sample_list:
            if frame_index not in samples_per_frame:
                samples_per_frame[frame_index] = []
            samples_per_frame[frame_index].append((class_id, bbox))

    return samples_per_frame


def get_train_val_split(
    selected_samples_per_class,
    train_ratio=0.8,
):
    train_samples_per_class = {}
    val_samples_per_class = {}

    for class_id, metadata in selected_samples_per_class.items():
        frame_indexes = metadata["frame_indexes"]
        bboxes = metadata["bboxes"]

        # zip all metadata together
        sample_list = list(zip(frame_indexes, bboxes))

        # shuffle the samples
        np.random.shuffle(sample_list)

        # split the samples into train and val
        split_index = int(len(sample_list) * train_ratio)
        train_samples = sample_list[:split_index]
        val_samples = sample_list[split_index:]

        # unzip the selected samples
        train_frame_indexes, train_bboxes = zip(*train_samples)
        val_frame_indexes, val_bboxes = zip(*val_samples)

        train_samples_per_class[class_id] = {
            "frame_indexes": train_frame_indexes,
            "bboxes": train_bboxes,
        }
        val_samples_per_class[class_id] = {
            "frame_indexes": val_frame_indexes,
            "bboxes": val_bboxes,
        }

    return train_samples_per_class, val_samples_per_class


def plot_per_class_laplacian_variance(per_class_metadata):
    # Plot boxplots of laplacian variances per class in a single graph (one boxplot per class)
    fig, ax = plt.subplots(figsize=(12, 6))
    for class_id, metadata in per_class_metadata.items():
        laplacian_variances = metadata["laplacian_variances"]
        ax.boxplot(
            laplacian_variances,
            positions=[class_id],
            widths=0.5,
            patch_artist=True,
            boxprops=dict(facecolor=metadata["color"]),
        )
    ax.set_xticks(list(per_class_metadata.keys()))
    ax.set_xticklabels([
        metadata["class_name"] for metadata in per_class_metadata.values()
    ])
    plt.setp(ax.xaxis.get_majorticklabels(), rotation=45, ha="right")
    ax.set_xlabel("Class")
    ax.set_ylabel("Laplacian Variance")
    ax.set_title("Laplacian Variance per Class")
    plt.grid()
    plt.show()

    # Plot histograms of laplacian variances per class in a single figure (one histogram per row)
    fig, axs = plt.subplots(
        len(per_class_metadata), 1, figsize=(12, 6 * len(per_class_metadata))
    )
    for i, (class_id, metadata) in enumerate(per_class_metadata.items()):
        laplacian_variances = metadata["laplacian_variances"]
        axs[i].hist(
            laplacian_variances,
            bins=50,
            color=metadata["color"],
            alpha=0.7,
            edgecolor="black",
        )
        axs[i].set_title(f"Laplacian Variance Histogram - {metadata['class_name']}")
        axs[i].set_xlabel("Laplacian Variance")
        axs[i].set_ylabel("Frequency")
        axs[i].grid()
    plt.tight_layout()
    plt.show()

In [6]:
frames, tmp_frames_dir = simrooms_service.extract_tmp_frames(
    LABELING_REC_SAME_BACKGROUND_ID
)

In [19]:
with Session(engine) as session:
    per_class_metadata = get_per_class_metadata(session, LABELING_REC_SAME_BACKGROUND_ID)

In [None]:
plot_per_class_laplacian_variance(per_class_metadata)

In [46]:
def draw_bboxes(
    image_np, bboxes, labels, class_name_map=None, color=(0, 255, 0), thickness=2
):
    img_res = image_np.copy()
    font = cv2.FONT_HERSHEY_SIMPLEX
    font_scale = 0.5
    font_thickness = 1

    if not isinstance(bboxes, (list, np.ndarray)):
        print(f"Warning: bboxes is not a list or ndarray: {type(bboxes)}")
        return img_res
    if not isinstance(labels, (list, np.ndarray)):
        print(f"Warning: labels is not a list or ndarray: {type(labels)}")
        # Attempt to proceed if labels seem usable, otherwise return
        if len(bboxes) != len(labels):
            print("Warning: bbox and label length mismatch, cannot draw labels.")
            labels = ["?" for _ in bboxes]  # Placeholder
        elif not all(isinstance(l, (str, int, float)) for l in labels):
            print("Warning: labels contain non-primitive types, cannot draw reliably.")
            labels = ["?" for _ in bboxes]

    for bbox, label in zip(bboxes, labels):
        # Assuming bbox format allows direct conversion to int x_min, y_min, x_max, y_max
        # This might need adjustment based on the ACTUAL format in your bboxes list
        # Example for pascal_voc or albumentations (after denormalizing)
        try:
            # Check if bbox has at least 4 elements
            if len(bbox) < 4:
                print(f"Warning: Skipping invalid bbox (fewer than 4 coords): {bbox}")
                continue
            x_min, y_min, x_max, y_max = map(int, bbox[:4])
        except (ValueError, TypeError) as e:
            print(f"Warning: Could not convert bbox coords to int: {bbox}, Error: {e}")
            continue  # Skip this bbox

        cv2.rectangle(img_res, (x_min, y_min), (x_max, y_max), color, thickness)

        label_name = (
            str(label)
            if class_name_map is None
            else class_name_map.get(label, str(label))
        )
        # Simple text placement above the box
        (text_width, text_height), baseline = cv2.getTextSize(
            label_name, font, font_scale, font_thickness
        )
        text_y = (
            y_min - baseline if y_min - baseline > text_height else y_min + text_height
        )
        cv2.putText(
            img_res, label_name, (x_min, text_y), font, font_scale, color, font_thickness
        )

    return img_res

In [97]:
def samples_per_class_to_samples_per_frame(samples_per_class):
    samples_per_frame = {}
    for class_id, metadata in samples_per_class.items():
        frame_indexes = metadata["frame_indexes"]
        bboxes = metadata["bboxes"]
        sample_list = list(zip(frame_indexes, bboxes))

        for frame_index, bbox in sample_list:
            if frame_index not in samples_per_frame:
                samples_per_frame[frame_index] = []
            samples_per_frame[frame_index].append((class_id, bbox))

    return samples_per_frame


def create_data_files(
    labels_path: Path,
    images_path: Path,
    class_label_to_model_id,
    sample_idx,
    image,
    bboxes,
    class_labels,
):
    padded_sample_idx = str(sample_idx).zfill(10)
    image_width, image_height = image.shape[1], image.shape[0]

    # Save image
    image_path = images_path / f"{padded_sample_idx}.jpg"
    cv2.imwrite(image_path, image)

    # Save labels
    labels_path = labels_path / f"{padded_sample_idx}.txt"

    # transform bboxes to YOLO format
    file_rows = []
    for bbox, class_label in zip(bboxes, class_labels):
        x1, y1, x2, y2 = bbox

        # get xywh format
        x_center = (x1 + x2) / 2
        y_center = (y1 + y2) / 2
        width = x2 - x1
        height = y2 - y1

        # normalize the values
        x_center /= image_width
        y_center /= image_height
        width /= image_width
        height /= image_height

        model_id = class_label_to_model_id[class_label]
        file_rows.append(f"{model_id} {x_center} {y_center} {width} {height}")

    with open(labels_path, "w") as f:
        for row in file_rows:
            f.write(row + "\n")


def create_train_or_val_dataset(
    per_class_metadata,
    class_label_to_model_id,
    train_samples_per_class,
    samples_per_frame,
    frames,
    images_path,
    labels_path,
    is_validation=False,
):
    selected_samples_per_frame = samples_per_class_to_samples_per_frame(
        train_samples_per_class
    )

    current_sample_idx = 0
    for frame_idx, frame in enumerate(tqdm(frames)):
        # check if the frame has any samples
        if selected_samples_per_frame.get(frame_idx) is None:
            continue

        image = cv2.imread(str(frame))

        # gather boxes and labels for the current frame
        class_ids, bboxes = zip(*samples_per_frame[frame_idx])
        bboxes = np.array(bboxes)
        class_labels = [
            per_class_metadata[class_id]["class_name"] for class_id in class_ids
        ]

        # for all selected samples in this frame, create crops
        for class_id, box in selected_samples_per_frame[frame_idx]:
            x1, y1, x2, y2 = box
            cx, cy = (x1 + x2) // 2, (y1 + y2) // 2

            # create a crop around the center of the box
            x_min = max(0, cx - IMG_CROP_SIZE_HALF)
            y_min = max(0, cy - IMG_CROP_SIZE_HALF)
            x_max = min(image.shape[1], cx + IMG_CROP_SIZE_HALF)
            y_max = min(image.shape[0], cy + IMG_CROP_SIZE_HALF)

            if not is_validation:
                transform_steps = [
                    A.Crop(x_min=x_min, y_min=y_min, x_max=x_max, y_max=y_max),
                    A.PadIfNeeded(min_height=IMG_CROP_SIZE, min_width=IMG_CROP_SIZE),
                    # A.HorizontalFlip(p=0.5),
                    # A.RandomBrightnessContrast(p=0.2)
                ]
            else:
                transform_steps = [
                    A.Crop(x_min=x_min, y_min=y_min, x_max=x_max, y_max=y_max),
                    A.PadIfNeeded(min_height=IMG_CROP_SIZE, min_width=IMG_CROP_SIZE),
                ]

            transform = A.Compose(
                transform_steps,
                bbox_params=A.BboxParams(
                    format="pascal_voc", label_fields=["class_labels"], min_visibility=0.7
                ),
            )

            # Augment the image and boxes
            augmented = transform(image=image, bboxes=bboxes, class_labels=class_labels)
            transformed_image = augmented["image"]
            transformed_bboxes = augmented["bboxes"]
            transformed_class_labels = augmented["class_labels"]

            # Save the transformed image and labels
            create_data_files(
                labels_path,
                images_path,
                class_label_to_model_id,
                current_sample_idx,
                transformed_image,
                transformed_bboxes,
                transformed_class_labels,
            )

            current_sample_idx += 1


def create_metadata_yaml(
    dataset_path: Path,
    per_class_metadata: dict,
):
    abs_dataset_path = dataset_path.resolve()

    metadata_yaml = f"""
path: {abs_dataset_path}
train: images/train
val: images/val
names:
"""

    class_label_to_model_id = {}
    for i, metadata in enumerate(per_class_metadata.values()):
        metadata_yaml += f"  {i}: {metadata['class_name']}\n"
        class_label_to_model_id[metadata["class_name"]] = i

    metadata_yaml_path = dataset_path / "data.yaml"
    with open(metadata_yaml_path, "w") as f:
        f.write(metadata_yaml)

    return class_label_to_model_id


def create_dataset(
    per_class_metadata: dict,
    frames: list[Path],
    datasets_path: Path,
    num_samples_per_class: int,
):
    print(f"Creating dataset with {num_samples_per_class} samples per class")

    dataset_path = datasets_path / f"{num_samples_per_class}_samples"
    train_images_path = dataset_path / "images/train"
    train_labels_path = dataset_path / "labels/train"
    val_images_path = dataset_path / "images/val"
    val_labels_path = dataset_path / "labels/val"
    data_yml_path = dataset_path / "data.yaml"

    train_images_path.mkdir(parents=True, exist_ok=True)
    train_labels_path.mkdir(parents=True, exist_ok=True)
    val_images_path.mkdir(parents=True, exist_ok=True)
    val_labels_path.mkdir(parents=True, exist_ok=True)

    # Gather necessary metadata
    selected_samples_per_class = select_samples_per_class(
        per_class_metadata, num_samples_per_class
    )
    samples_per_frame = get_samples_per_frame(per_class_metadata)
    train_samples_per_class, val_samples_per_class = get_train_val_split(
        selected_samples_per_class, train_ratio=0.8
    )

    # Create the dataset
    print(f"Creating training dataset")
    class_label_to_model_id = create_metadata_yaml(dataset_path, per_class_metadata)
    create_train_or_val_dataset(
        per_class_metadata,
        class_label_to_model_id,
        train_samples_per_class,
        samples_per_frame,
        frames,
        train_images_path,
        train_labels_path,
    )
    print(f"Creating validation dataset")
    create_train_or_val_dataset(
        per_class_metadata,
        class_label_to_model_id,
        val_samples_per_class,
        samples_per_frame,
        frames,
        val_images_path,
        val_labels_path,
        is_validation=True,
    )

    return dataset_path

In [None]:
if OBJECT_DETECTION_DATASETS_PATH.exists():
    shutil.rmtree(OBJECT_DETECTION_DATASETS_PATH)
OBJECT_DETECTION_DATASETS_PATH.mkdir(parents=True, exist_ok=True)

dataset_path = create_dataset(
    per_class_metadata=per_class_metadata,
    frames=frames,
    datasets_path=OBJECT_DETECTION_DATASETS_PATH,
    num_samples_per_class=2000,
)

Creating dataset with 2000 samples per class
Class infuus has only 786 samples, ignoring it
Class ampulevloeistof has only 648 samples, ignoring it
Class ampulepoeder has only 1116 samples, ignoring it
Creating training dataset


100%|██████████| 14121/14121 [01:05<00:00, 217.15it/s]


Creating validation dataset


100%|██████████| 14121/14121 [00:18<00:00, 745.95it/s] 


PosixPath('data/training_datasets/object_detection/2000_samples')

# Model Training

In [3]:
from ultralytics import YOLO

In [4]:
model = YOLO("yolo11n.pt")  # load a pretrained model

dataset_path = OBJECT_DETECTION_DATASETS_PATH / "2000_samples/data.yaml"
results = model.train(
    data=dataset_path, epochs=20, imgsz=IMG_CROP_SIZE, device="cuda", batch=32
)

New https://pypi.org/project/ultralytics/8.3.139 available 😃 Update with 'pip install -U ultralytics'
Ultralytics 8.3.67 🚀 Python-3.10.12 torch-2.5.1+cu121 CUDA:0 (NVIDIA GeForce RTX 4090, 24564MiB)
[34m[1mengine/trainer: [0mtask=detect, mode=train, model=yolo11n.pt, data=data/training_datasets/object_detection/2000_samples/data.yaml, epochs=20, time=None, patience=100, batch=32, imgsz=640, save=True, save_period=-1, cache=False, device=cuda, workers=8, project=None, name=train11, exist_ok=False, pretrained=True, optimizer=auto, verbose=True, seed=0, deterministic=True, single_cls=False, rect=False, cos_lr=False, close_mosaic=10, resume=False, amp=True, fraction=1.0, profile=False, freeze=None, multi_scale=False, overlap_mask=True, mask_ratio=4, dropout=0.0, val=True, split=val, save_json=False, save_hybrid=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=True, source=None, vid_stride=1, stream_buffer=False, visualize=False, augment=False, agnostic_nms=False, cla

E0000 00:00:1747571238.291967    4858 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1747571238.308900    4858 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


Overriding model.yaml nc=80 with nc=15

                   from  n    params  module                                       arguments                     
  0                  -1  1       464  ultralytics.nn.modules.conv.Conv             [3, 16, 3, 2]                 
  1                  -1  1      4672  ultralytics.nn.modules.conv.Conv             [16, 32, 3, 2]                
  2                  -1  1      6640  ultralytics.nn.modules.block.C3k2            [32, 64, 1, False, 0.25]      
  3                  -1  1     36992  ultralytics.nn.modules.conv.Conv             [64, 64, 3, 2]                
  4                  -1  1     26080  ultralytics.nn.modules.block.C3k2            [64, 128, 1, False, 0.25]     
  5                  -1  1    147712  ultralytics.nn.modules.conv.Conv             [128, 128, 3, 2]              
  6                  -1  1     87040  ultralytics.nn.modules.block.C3k2            [128, 128, 1, True]           
  7                  -1  1    295424  ultralytic

[34m[1mtrain: [0mScanning /home/zilian/projects/bachelorproef/experiment/data/training_datasets/object_detection/2000_samples/labels/train.cache... 19200 images, 0 backgrounds, 0 corrupt: 100%|██████████| 19200/19200 [00:00<?, ?it/s]

[34m[1malbumentations: [0mBlur(p=0.01, blur_limit=(3, 7)), MedianBlur(p=0.01, blur_limit=(3, 7)), ToGray(p=0.01, method='weighted_average', num_output_channels=3), CLAHE(p=0.01, clip_limit=(1.0, 4.0), tile_grid_size=(8, 8))



[34m[1mval: [0mScanning /home/zilian/projects/bachelorproef/experiment/data/training_datasets/object_detection/2000_samples/labels/val.cache... 4800 images, 0 backgrounds, 0 corrupt: 100%|██████████| 4800/4800 [00:00<?, ?it/s]


Plotting labels to /home/zilian/projects/bachelorproef/runs/detect/train11/labels.jpg... 
[34m[1moptimizer:[0m 'optimizer=auto' found, ignoring 'lr0=0.01' and 'momentum=0.937' and determining best 'optimizer', 'lr0' and 'momentum' automatically... 
[34m[1moptimizer:[0m AdamW(lr=0.000526, momentum=0.9) with parameter groups 81 weight(decay=0.0), 88 weight(decay=0.0005), 87 bias(decay=0.0)
[34m[1mTensorBoard: [0mmodel graph visualization added ✅
Image sizes 640 train, 640 val
Using 8 dataloader workers
Logging results to [1m/home/zilian/projects/bachelorproef/runs/detect/train11[0m
Starting training for 20 epochs...

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       1/20       4.7G     0.8714      1.974     0.9322        136        640: 100%|██████████| 600/600 [01:18<00:00,  7.60it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 75/75 [00:17<00:00,  4.34it/s]


                   all       4800      16472      0.869      0.767      0.773      0.622

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       2/20      4.82G     0.7078     0.8363     0.8842        163        640: 100%|██████████| 600/600 [01:14<00:00,  8.06it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 75/75 [00:17<00:00,  4.35it/s]


                   all       4800      16472      0.825      0.802      0.834      0.691

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       3/20      4.65G     0.6736     0.6633     0.8773        129        640: 100%|██████████| 600/600 [01:09<00:00,  8.66it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 75/75 [00:14<00:00,  5.19it/s]


                   all       4800      16472      0.806      0.839      0.841      0.701

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       4/20      4.58G     0.6569     0.5859     0.8752        164        640: 100%|██████████| 600/600 [01:08<00:00,  8.73it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 75/75 [00:13<00:00,  5.37it/s]

                   all       4800      16472      0.816      0.839      0.861      0.729






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       5/20      4.59G     0.6285     0.5314     0.8687        119        640: 100%|██████████| 600/600 [01:22<00:00,  7.28it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 75/75 [00:15<00:00,  4.69it/s]

                   all       4800      16472      0.796      0.861      0.868      0.735






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       6/20      4.61G     0.6056     0.4947     0.8638        204        640: 100%|██████████| 600/600 [01:17<00:00,  7.77it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 75/75 [00:16<00:00,  4.48it/s]


                   all       4800      16472      0.815      0.853      0.853      0.736

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       7/20      4.64G     0.5899     0.4736     0.8613        188        640: 100%|██████████| 600/600 [01:07<00:00,  8.85it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 75/75 [00:13<00:00,  5.43it/s]


                   all       4800      16472      0.849      0.882      0.882      0.756

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       8/20      4.59G     0.5748     0.4554     0.8571        181        640: 100%|██████████| 600/600 [01:08<00:00,  8.73it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 75/75 [00:15<00:00,  4.74it/s]

                   all       4800      16472      0.837      0.857      0.884      0.765






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       9/20      4.64G     0.5606     0.4408     0.8538        140        640: 100%|██████████| 600/600 [01:09<00:00,  8.64it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 75/75 [00:13<00:00,  5.75it/s]

                   all       4800      16472      0.862      0.895      0.903      0.779






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      10/20      4.66G     0.5505     0.4253     0.8505        145        640: 100%|██████████| 600/600 [01:09<00:00,  8.60it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 75/75 [00:12<00:00,  6.20it/s]


                   all       4800      16472      0.877      0.856      0.898      0.784
Closing dataloader mosaic
[34m[1malbumentations: [0mBlur(p=0.01, blur_limit=(3, 7)), MedianBlur(p=0.01, blur_limit=(3, 7)), ToGray(p=0.01, method='weighted_average', num_output_channels=3), CLAHE(p=0.01, clip_limit=(1.0, 4.0), tile_grid_size=(8, 8))

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      11/20      4.61G     0.5103     0.3783     0.8234        107        640: 100%|██████████| 600/600 [01:06<00:00,  8.97it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 75/75 [00:19<00:00,  3.93it/s]


                   all       4800      16472      0.856      0.887      0.907      0.793

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      12/20      4.61G     0.4964     0.3644     0.8197        104        640: 100%|██████████| 600/600 [01:04<00:00,  9.29it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 75/75 [00:13<00:00,  5.55it/s]

                   all       4800      16472      0.896      0.871      0.919      0.803






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      13/20      4.61G     0.4863     0.3512     0.8177         98        640: 100%|██████████| 600/600 [01:07<00:00,  8.90it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 75/75 [00:13<00:00,  5.68it/s]

                   all       4800      16472      0.879      0.901      0.918      0.809






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      14/20      4.61G     0.4774     0.3398     0.8157        104        640: 100%|██████████| 600/600 [01:04<00:00,  9.35it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 75/75 [00:16<00:00,  4.47it/s]

                   all       4800      16472      0.894      0.913      0.924      0.814






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      15/20      4.61G     0.4701     0.3303     0.8137         97        640: 100%|██████████| 600/600 [01:05<00:00,  9.20it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 75/75 [00:14<00:00,  5.28it/s]

                   all       4800      16472      0.874      0.923      0.937      0.823






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      16/20      4.61G     0.4585      0.321     0.8103        113        640: 100%|██████████| 600/600 [01:07<00:00,  8.91it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 75/75 [00:17<00:00,  4.28it/s]


                   all       4800      16472      0.893       0.92      0.939       0.83

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      17/20      4.61G     0.4507     0.3127      0.809        109        640: 100%|██████████| 600/600 [01:04<00:00,  9.30it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 75/75 [00:16<00:00,  4.52it/s]


                   all       4800      16472      0.905      0.921      0.947      0.833

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      18/20      4.61G     0.4425     0.3044      0.808        105        640: 100%|██████████| 600/600 [01:05<00:00,  9.11it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 75/75 [00:14<00:00,  5.31it/s]

                   all       4800      16472      0.916      0.937      0.958      0.845






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      19/20      4.61G     0.4335     0.2948     0.8055        105        640: 100%|██████████| 600/600 [01:07<00:00,  8.92it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 75/75 [00:17<00:00,  4.32it/s]

                   all       4800      16472      0.932      0.915      0.958       0.85






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      20/20      4.61G      0.428     0.2867     0.8047         87        640: 100%|██████████| 600/600 [01:06<00:00,  9.04it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 75/75 [00:13<00:00,  5.57it/s]

                   all       4800      16472      0.926       0.93      0.958      0.852






20 epochs completed in 0.475 hours.
Optimizer stripped from /home/zilian/projects/bachelorproef/runs/detect/train11/weights/last.pt, 5.5MB
Optimizer stripped from /home/zilian/projects/bachelorproef/runs/detect/train11/weights/best.pt, 5.5MB

Validating /home/zilian/projects/bachelorproef/runs/detect/train11/weights/best.pt...
Ultralytics 8.3.67 🚀 Python-3.10.12 torch-2.5.1+cu121 CUDA:0 (NVIDIA GeForce RTX 4090, 24564MiB)
YOLO11n summary (fused): 238 layers, 2,585,077 parameters, 0 gradients, 6.3 GFLOPs


                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 75/75 [00:23<00:00,  3.19it/s]


                   all       4800      16472      0.926       0.93      0.958      0.852
        naaldcontainer       1466       1466      0.982      0.993      0.993      0.971
                 spuit       1889       1889      0.886      0.862      0.942      0.698
             keukenmes       2009       2009      0.963      0.982      0.991      0.931
                infuus         30         30      0.643      0.733      0.742      0.597
           stethoscoop       1803       1803      0.973      0.997      0.994      0.978
               bol wol       1099       1099      0.916      0.987      0.983      0.862
                 snoep       1262       1262      0.943      0.953      0.982      0.852
               nuchter        760        760      0.957      0.993      0.994      0.978
             fotokader       1087       1087      0.987      0.992      0.995      0.979
              iced tea       1598       1598      0.989      0.994      0.995      0.884
                  bri

In [49]:
model_path = Path("data/models/object_detection") / "2000_samples.pt"
model.save(str(model_path))

## Extract frames for trial recordings

In [17]:
if not RECORDING_FRAMES_PATH.exists():
    RECORDING_FRAMES_PATH.mkdir(parents=True, exist_ok=True)

for recording_id in tqdm(FULLY_LABELED_RECORDINGS, desc="Extracting frames"):
    print(f"Extracting frames for {recording_id}")
    recording_video_path = RECORDINGS_PATH / f"{recording_id}.mp4"
    recording_frames_path = RECORDING_FRAMES_PATH / recording_id

    if recording_frames_path.exists():
        shutil.rmtree(recording_frames_path)
    recording_frames_path.mkdir(parents=True, exist_ok=True)
    
    extract_frames_to_dir(recording_video_path, recording_frames_path)

Extracting frames:   0%|          | 0/3 [00:00<?, ?it/s]

Extracting frames for 67b71a70-da64-467a-9fb6-91bc29265fd1


Extracting frames:  33%|███▎      | 1/3 [00:08<00:17,  8.95s/it]

Extracting frames for 32f02db7-adc0-4556-a2da-ed2ba60a58c9


Extracting frames:  67%|██████▋   | 2/3 [00:12<00:06,  6.04s/it]

Extracting frames for b8eeecc0-06b1-47f7-acb5-89aab3c1724d


Extracting frames: 100%|██████████| 3/3 [00:17<00:00,  5.80s/it]
Extracting frames: 100%|██████████| 3/3 [00:17<00:00,  5.80s/it]
