### Convert RealSense .bag → MP4

In [1]:
import numpy as np
import pyrealsense2 as rs
import cv2

def bag_to_mp4(bag_path: str, out_path: str, fps: float = 30.0):
    """
    Read a RealSense .bag file and write out a color-only MP4.
    """
    pipeline = rs.pipeline()
    cfg = rs.config()
    cfg.enable_device_from_file(bag_path, repeat_playback=False)
    profile = pipeline.start(cfg)

    # get color stream resolution
    color_stream = profile.get_stream(rs.stream.color).as_video_stream_profile()
    w, h = color_stream.width(), color_stream.height()

    # set up VideoWriter (mp4v codec)
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    writer = cv2.VideoWriter(out_path, fourcc, fps, (w, h))

    try:
        while True:
            frames = pipeline.wait_for_frames()
            color = frames.get_color_frame()
            if not color:
                continue
            # convert to numpy array (RGB)
            img_rgb = np.asanyarray(color.get_data())
            # convert RGB→BGR for correct colors in OpenCV
            img_bgr = cv2.cvtColor(img_rgb, cv2.COLOR_RGB2BGR)
            writer.write(img_bgr)
    except RuntimeError:
        # bag playback ended
        pass
    finally:
        writer.release()
        pipeline.stop()
        print(f"✅ Wrote {out_path}")



### Run conversion

In [2]:
import os

input_folder = "ros_bags/2nd_batch"  # replace with your folder containing .bag files
output_folder = "ros_bags/2nd_batch_converted"      # replace with your desired output folder
fps = 30.0                           # set your desired fps

os.makedirs(output_folder, exist_ok=True)

for fname in os.listdir(input_folder):
    if fname.endswith(".bag"):
        bag_file = os.path.join(input_folder, fname)
        output_mp4 = os.path.join(output_folder, os.path.splitext(fname)[0] + ".mp4")
        bag_to_mp4(bag_file, output_mp4, fps)

✅ Wrote /home/yogee/Desktop/human_detector_ws/data_raw/ros_bags/2nd_batch_converted/20250603_155234.mp4
✅ Wrote /home/yogee/Desktop/human_detector_ws/data_raw/ros_bags/2nd_batch_converted/20250603_155333.mp4
✅ Wrote /home/yogee/Desktop/human_detector_ws/data_raw/ros_bags/2nd_batch_converted/20250603_155749.mp4
✅ Wrote /home/yogee/Desktop/human_detector_ws/data_raw/ros_bags/2nd_batch_converted/20250603_153521.mp4
✅ Wrote /home/yogee/Desktop/human_detector_ws/data_raw/ros_bags/2nd_batch_converted/20250603_153723.mp4
✅ Wrote /home/yogee/Desktop/human_detector_ws/data_raw/ros_bags/2nd_batch_converted/20250603_155117.mp4
✅ Wrote /home/yogee/Desktop/human_detector_ws/data_raw/ros_bags/2nd_batch_converted/20250603_154227.mp4
✅ Wrote /home/yogee/Desktop/human_detector_ws/data_raw/ros_bags/2nd_batch_converted/20250603_160032.mp4


### View annotated Video

In [5]:
#!/usr/bin/env python3
import cv2
import subprocess
from ultralytics import YOLO
import sys

# ─── ADJUST THESE ───────────────────────────────────────────────────────────────
MODEL_PATH   = "../src/human_detector/models/best.pt"
# MODEL_PATH   = "../src/human_detector/models/best_v2_yolov11n.pt"
SOURCE_VIDEO = "converted_mp4_videos/2nd_batch_converted/robots_humans_side_2.mp4"
# ───────────────────────────────────────────────────────────────────────────────

def main():
    model = YOLO(MODEL_PATH)

    cap = cv2.VideoCapture(SOURCE_VIDEO)
    if not cap.isOpened():
        print(f"Error: cannot open {SOURCE_VIDEO}")
        sys.exit(1)

    fps    = cap.get(cv2.CAP_PROP_FPS) or 25
    width  = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

    ffplay_cmd = [
        "ffplay",
        "-f", "rawvideo",
        "-pixel_format", "bgr24",
        "-video_size", f"{width}x{height}",
        "-framerate", str(int(fps)),
        "-autoexit",
        "-i", "-"
    ]

    try:
        proc = subprocess.Popen(ffplay_cmd, stdin=subprocess.PIPE, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
    except FileNotFoundError:
        print("Error: ffplay not found. Install ffmpeg/ffplay.")
        cap.release()
        sys.exit(1)

    try:
        while True:
            ret, frame = cap.read()
            if not ret:
                break

            results = model(frame)[0]
            boxes   = results.boxes.xyxy.cpu().numpy()
            confs   = results.boxes.conf.cpu().numpy()
            classes = results.boxes.cls.cpu().numpy()

            for (x1, y1, x2, y2), conf, cls in zip(boxes, confs, classes):
                x1, y1, x2, y2 = map(int, (x1, y1, x2, y2))
                name = model.names[int(cls)]
                score = conf.item()
                cv2.rectangle(frame, (x1, y1), (x2, y2), (0,255,0), 2)
                cv2.putText(frame, f"{name} {score:.2f}", (x1, y1-10),
                            cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0,255,0), 2)

            proc.stdin.write(frame.tobytes())

        cap.release()
        proc.stdin.close()
        proc.wait()

    except KeyboardInterrupt:
        cap.release()
        proc.stdin.close()
        proc.wait()

if __name__ == "__main__":
    main()



0: 480x640 1 person, 2 telecos, 4.0ms
Speed: 0.7ms preprocess, 4.0ms inference, 0.8ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 3 telecos, 11.0ms
Speed: 4.0ms preprocess, 11.0ms inference, 0.8ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 2 telecos, 8.8ms
Speed: 1.3ms preprocess, 8.8ms inference, 1.3ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 3 telecos, 4.2ms
Speed: 1.4ms preprocess, 4.2ms inference, 0.8ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 3 telecos, 5.2ms
Speed: 0.8ms preprocess, 5.2ms inference, 1.1ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 3 telecos, 4.4ms
Speed: 1.0ms preprocess, 4.4ms inference, 0.8ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 4 telecos, 4.0ms
Speed: 0.8ms preprocess, 4.0ms inference, 0.8ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 4 telecos, 4.2ms
Speed: 0.8ms p

BrokenPipeError: [Errno 32] Broken pipe

### Annotate videos with pretrained model

In [6]:
#!/usr/bin/env python3
import os
import cv2
import json
import sys
from ultralytics import YOLO

# ─── USER CONFIGURATION ─────────────────────────────────────────────────────────
#
#  1) Path to your trained weights (best.pt)
#  2) Path to the video you want to run inference on
#  3) Output directory (e.g. "Video_2") – the script will create this if needed.
#
MODEL_PATH   = "../src/human_detector/models/best.pt"
SOURCE_VIDEO = "converted_mp4_videos/2nd_batch_converted/robots_humans_side_2.mp4"
OUTPUT_DIR   = "labelled_data/robots_humans_side_2"
#                                                                              ──
#  After running this script, you will end up with:
#
#    /home/yogee/Desktop/human_detector_ws/Video_2/
#    ├── obj_train_data/
#    │   ├── frame_000000.png
#    │   ├── frame_000000.txt
#    │   ├── frame_000001.png
#    │   ├── frame_000001.txt
#    │   ├── …
#    │
#    ├── obj.names
#    ├── obj.data
#    └── train.txt
#
#  Which is the exact format Darknet/Yolo expects for “Video_2” if you want to
#  fine-tune on those automatically-generated labels.
# ────────────────────────────────────────────────────────────────────────────────


def create_dirs():
    os.makedirs(OUTPUT_DIR, exist_ok=True)
    os.makedirs(os.path.join(OUTPUT_DIR, "obj_train_data"), exist_ok=True)


def write_obj_names(class_names):
    """
    Write the file obj.names in OUTPUT_DIR. Each line is one class name.
    """
    fn = os.path.join(OUTPUT_DIR, "obj.names")
    with open(fn, "w") as f:
        for name in class_names:
            f.write(f"{name}\n")


def write_obj_data(num_classes):
    """
    Write the file obj.data in OUTPUT_DIR:

    classes = <num_classes>
    train = train.txt
    names = obj.names
    backup = backup/
    """
    fn = os.path.join(OUTPUT_DIR, "obj.data")
    with open(fn, "w") as f:
        f.write(f"classes = {num_classes}\n")
        f.write("train = train.txt\n")
        f.write("names = obj.names\n")
        f.write("backup = backup/\n")


def write_train_txt(image_filenames):
    """
    Given a list of image filenames (relative to OUTPUT_DIR), write them into train.txt.
    Example line in train.txt: 
        obj_train_data/frame_000000.png
        obj_train_data/frame_000001.png
        …
    """
    fn = os.path.join(OUTPUT_DIR, "train.txt")
    with open(fn, "w") as f:
        for rel_path in image_filenames:
            f.write(f"{rel_path}\n")


def xyxy_to_yolo(x1, y1, x2, y2, img_w, img_h):
    """
    Convert [x1,y1,x2,y2] to YOLO format:
      x_center = ((x1 + x2)/2) / img_w
      y_center = ((y1 + y2)/2) / img_h
      width    = (x2 - x1) / img_w
      height   = (y2 - y1) / img_h

    Returns a tuple of floats: (x_center, y_center, width, height)
    """
    x_center = ((x1 + x2) / 2.0) / img_w
    y_center = ((y1 + y2) / 2.0) / img_h
    w        = (x2 - x1) / img_w
    h        = (y2 - y1) / img_h
    return x_center, y_center, w, h


def main():
    # 1) Create output directories:
    create_dirs()
    images_folder = os.path.join(OUTPUT_DIR, "obj_train_data")

    # 2) Load the YOLO model once:
    model = YOLO(MODEL_PATH)

    # 3) Open the source video:
    cap = cv2.VideoCapture(SOURCE_VIDEO)
    if not cap.isOpened():
        print(f"❌ ERROR: could not open video file '{SOURCE_VIDEO}'")
        sys.exit(1)

    # 4) Grab video properties:
    frame_w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    frame_h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))

    print(f"Video loaded: {SOURCE_VIDEO}")
    print(f"  → Resolution: {frame_w}×{frame_h}")
    print(f"  → Total frames: {total_frames}\n")

    image_filenames = []

    frame_idx = 0
    while True:
        ret, frame = cap.read()
        if not ret:
            break

        # 5) Run YOLO inference on this frame
        results = model(frame)[0]

        # 6) Build paths:
        img_name = f"frame_{frame_idx:06d}.png"
        txt_name = f"frame_{frame_idx:06d}.txt"
        img_path = os.path.join(images_folder, img_name)
        txt_path = os.path.join(images_folder, txt_name)

        # 7) Save the raw frame as a PNG:
        cv2.imwrite(img_path, frame)

        # 8) Open the .txt file and write detections (in YOLO .txt format)
        with open(txt_path, "w") as f_txt:
            # results.boxes.xyxy is a tensor of shape (N, 4): [x1, y1, x2, y2]
            boxes   = results.boxes.xyxy.cpu().numpy()  # shape (N, 4)
            confs   = results.boxes.conf.cpu().numpy()  # shape (N,)
            classes = results.boxes.cls.cpu().numpy()   # shape (N,)

            for (x1, y1, x2, y2), conf, cls_idx in zip(boxes, confs, classes):
                # Convert to integers (pixel coords)
                x1_i, y1_i, x2_i, y2_i = map(int, (x1, y1, x2, y2))
                # Convert bounding box to YOLO format (normalized):
                x_c, y_c, w_rel, h_rel = xyxy_to_yolo(x1_i, y1_i, x2_i, y2_i, frame_w, frame_h)

                # Write one line per box: <class_id> <x_center> <y_center> <w> <h>
                # Each value is float. We’ll format to 6 decimal places.
                f_txt.write(f"{int(cls_idx)} {x_c:.6f} {y_c:.6f} {w_rel:.6f} {h_rel:.6f}\n")

        # 9) Keep a record of relative image path for train.txt:
        image_filenames.append(f"obj_train_data/{img_name}")

        if frame_idx % 50 == 0:
            print(f"  Processed frame {frame_idx}/{total_frames} …")

        frame_idx += 1

    cap.release()
    print(f"\n✅ Saved {frame_idx} frames + YOLO‐format .txt labels into '{images_folder}'")

    # 10) Write obj.names (one name per line) using model.names:
    write_obj_names(["person", "teleco"])
    print("✅ Wrote obj.names")

    # 11) Write obj.data (Darknet‐style):
    num_classes = len(model.names)
    write_obj_data(num_classes)
    print("✅ Wrote obj.data")

    # 12) Write train.txt listing all image files under obj_train_data/
    write_train_txt(image_filenames)
    print("✅ Wrote train.txt")

    print("\nAll done! Your directory structure is now:")
    print(f"  {OUTPUT_DIR}/obj_train_data/")
    print(f"    ├─ frame_000000.png")
    print(f"    ├─ frame_000000.txt")
    print(f"    ├─ frame_000001.png")
    print(f"    ├─ frame_000001.txt")
    print(f"    └─ …")
    print(f"\n  {OUTPUT_DIR}/obj.names")
    print(f"  {OUTPUT_DIR}/obj.data")
    print(f"  {OUTPUT_DIR}/train.txt\n")


if __name__ == "__main__":
    main()


Video loaded: converted_mp4_videos/2nd_batch_converted/robots_humans_side_2.mp4
  → Resolution: 640×480
  → Total frames: 1421


0: 480x640 1 person, 2 telecos, 4.3ms
Speed: 0.7ms preprocess, 4.3ms inference, 0.9ms postprocess per image at shape (1, 3, 480, 640)
  Processed frame 0/1421 …

0: 480x640 1 person, 3 telecos, 4.1ms
Speed: 0.7ms preprocess, 4.1ms inference, 0.8ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 2 telecos, 4.9ms
Speed: 0.7ms preprocess, 4.9ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 3 telecos, 3.8ms
Speed: 0.7ms preprocess, 3.8ms inference, 0.7ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 3 telecos, 3.8ms
Speed: 0.7ms preprocess, 3.8ms inference, 0.7ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 3 telecos, 3.8ms
Speed: 0.7ms preprocess, 3.8ms inference, 0.8ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 4 telecos, 4.

### Download person dataset

In [3]:
import os
import json
import requests
import random
from tqdm import tqdm
from pycocotools.coco import COCO
from pathlib import Path
from requests.adapters import HTTPAdapter
from urllib3.util.retry import Retry

from shutil import copyfile

# Set random seed for reproducibility
random.seed(42)

# Define paths
BASE_DIR = '../data_training/data_person'
IMG_DIR_TRAIN = os.path.join(BASE_DIR, 'images/train')
IMG_DIR_VAL = os.path.join(BASE_DIR, 'images/val')
LBL_DIR_TRAIN = os.path.join(BASE_DIR, 'labels/train')
LBL_DIR_VAL = os.path.join(BASE_DIR, 'labels/val')

for d in [IMG_DIR_TRAIN, IMG_DIR_VAL, LBL_DIR_TRAIN, LBL_DIR_VAL]:
    os.makedirs(d, exist_ok=True)

# Load COCO
coco = COCO('../data_training/data_person/annotations/instances_train2017.json')
catIds = coco.getCatIds(catNms=['person'])
imgIds = coco.getImgIds(catIds=catIds)
images = coco.loadImgs(imgIds)

# Split train/val
random.shuffle(images)
split_idx = int(0.9 * len(images))
train_images = images[:split_idx]
val_images = images[split_idx:]

# Retry session for downloads
session = requests.Session()
retry = Retry(connect=3, backoff_factor=0.5)
adapter = HTTPAdapter(max_retries=retry)
session.mount('http://', adapter)
session.mount('https://', adapter)

# Helper to convert COCO bbox to YOLO
def coco_to_yolo_bbox(bbox, img_w, img_h):
    x, y, w, h = bbox
    x_center = (x + w / 2) / img_w
    y_center = (y + h / 2) / img_h
    return [x_center, y_center, w / img_w, h / img_h]

# Process dataset
def process_dataset(images, img_dir, lbl_dir):
    for img in tqdm(images, desc=f"Processing {img_dir}"):
        file_name = img['file_name']
        img_path = os.path.join(img_dir, file_name)
        label_path = os.path.join(lbl_dir, file_name.replace('.jpg', '.txt'))

        # Download image if not exists
        if not os.path.isfile(img_path):
            try:
                img_data = session.get(img['coco_url']).content
                with open(img_path, 'wb') as f:
                    f.write(img_data)
            except Exception as e:
                print(f"Failed to download {file_name}: {e}")
                continue

        # Get annotations
        annIds = coco.getAnnIds(imgIds=[img['id']], catIds=catIds, iscrowd=False)
        anns = coco.loadAnns(annIds)

        if not anns:
            continue

        with open(label_path, 'w') as f:
            for ann in anns:
                bbox = coco_to_yolo_bbox(ann['bbox'], img['width'], img['height'])
                f.write(f"0 {' '.join([f'{x:.6f}' for x in bbox])}\n")

# Process both sets
process_dataset(train_images, IMG_DIR_TRAIN, LBL_DIR_TRAIN)
process_dataset(val_images, IMG_DIR_VAL, LBL_DIR_VAL)


loading annotations into memory...
Done (t=8.66s)
creating index...
index created!


Processing ../data_training/data_person/images/train:  21%|██        | 12238/57703 [1:38:30<6:05:56,  2.07it/s] 


KeyboardInterrupt: 

### Split Teleco dataset

In [8]:
# %% [code]
# 1) (Optional) install PyYAML if you haven't already
# !pip install pyyaml --quiet

import shutil
import random
from pathlib import Path
import yaml

# 2) Define your paths (all relative to your notebook cwd == HUMAN_DETECTOR_WS)
repo_root  = Path().resolve()
export_dir = repo_root / "labelled_data" / "robots_humans_side_2" / "obj_train_data"
names_file = repo_root / "labelled_data" / "robots_humans_side_2" / "obj.names"
dest_dir   = repo_root / "../data_training/data_teleco/robots_humans_side_2"

assert export_dir.exists(), f"Export dir not found: {export_dir}"
assert names_file.exists(), f"Names file not found: {names_file}"

# 3) Create train/val subfolders under training_data/images and training_data/labels
for split in ("train","val"):
    (dest_dir/"images"/split).mkdir(parents=True, exist_ok=True)
    (dest_dir/"labels"/split).mkdir(parents=True, exist_ok=True)

# 4) Gather all image files from the CVAT export
img_exts = {".png", ".jpg", ".jpeg"}
all_imgs = sorted(p for p in export_dir.iterdir() if p.suffix.lower() in img_exts)

# 5) Shuffle & split 80/20
random.seed(42)
random.shuffle(all_imgs)
n_train   = int(0.8 * len(all_imgs))
train_imgs, val_imgs = all_imgs[:n_train], all_imgs[n_train:]

# 6) Copy images and their .txt labels into the training_data folder structure
for split, imgs in [("train", train_imgs), ("val", val_imgs)]:
    for img_path in imgs:
        lbl_path = export_dir / f"{img_path.stem}.txt"
        if not lbl_path.exists():
            print(f"Missing label for: {img_path.name}")
            continue
        shutil.copy2(img_path,   dest_dir/"images"/split/img_path.name)
        shutil.copy2(lbl_path,   dest_dir/"labels"/split/f"{img_path.stem}.txt")

# 7) Read your class names
with open(names_file) as f:
    names = [line.strip() for line in f if line.strip()]

# 8) Write a data.yaml for YOLO training
cfg = {
    "path": str(dest_dir),     # base dir for train/val
    "train": "images/train",
    "val":   "images/val",
    "nc":    len(names),
    "names": names
}
with open(dest_dir/"data.yaml", "w") as f:
    yaml.safe_dump(cfg, f, sort_keys=False)

print(f"✅ Copied {len(train_imgs)} train / {len(val_imgs)} val images")
print(f"✅ data.yaml written to {dest_dir/'data.yaml'}")


✅ Copied 1136 train / 285 val images
✅ data.yaml written to /home/commu/Desktop/human_detector_ws/data_raw/../data_training/data_teleco/robots_humans_side_2/data.yaml
