# **RGB Cropping and Resizing**

In this notebook, the following steps are performed:

- Read YOLO label files containing object coordinates  
- Convert bounding boxes from YOLO normalized format to pixel coordinates  
- Crop the detected object from both RGB and depth images  
- Resize each cropped object to 256 × 256 resolution  
- Save the cropped patches to the target folder  
- Track how many patches are saved and how many are skipped  
- Log the reason for every skipped image for quick review  

## **Importing required libraries and modules**


In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
%pip install open3d
%pip install -U fpsample
%pip install ruamel.yaml

Collecting open3d
  Downloading open3d-0.19.0-cp311-cp311-manylinux_2_31_x86_64.whl.metadata (4.3 kB)
Collecting dash>=2.6.0 (from open3d)
  Downloading dash-3.0.4-py3-none-any.whl.metadata (10 kB)
Collecting configargparse (from open3d)
  Downloading configargparse-1.7.1-py3-none-any.whl.metadata (24 kB)
Collecting ipywidgets>=8.0.4 (from open3d)
  Downloading ipywidgets-8.1.7-py3-none-any.whl.metadata (2.4 kB)
Collecting addict (from open3d)
  Downloading addict-2.4.0-py3-none-any.whl.metadata (1.0 kB)
Collecting pyquaternion (from open3d)
  Downloading pyquaternion-0.9.9-py3-none-any.whl.metadata (1.4 kB)
Collecting flask>=3.0.0 (from open3d)
  Downloading flask-3.0.3-py3-none-any.whl.metadata (3.2 kB)
Collecting werkzeug>=3.0.0 (from open3d)
  Downloading werkzeug-3.0.6-py3-none-any.whl.metadata (3.7 kB)
Collecting retrying (from dash>=2.6.0->open3d)
  Downloading retrying-1.3.4-py3-none-any.whl.metadata (6.9 kB)
Collecting comm>=0.1.3 (from ipywidgets>=8.0.4->open3d)
  Downloading

In [None]:
import numpy as np
import os
import cv2
import yaml
import json
from google.colab.patches import cv2_imshow
import open3d as o3d
import fpsample as fp
import plotly.graph_objects as go
import torch
import matplotlib.pyplot as plt
from ruamel.yaml import YAML
from tqdm import tqdm
from concurrent.futures import ThreadPoolExecutor

In [None]:
project_base = "/content/drive/MyDrive/MLDL/6D-Pose-Estimation"

In [None]:
def load_json_file(path):
  with open(path, 'r') as f:
      data = json.load(f)
  return data

In [None]:
json_output_path = os.path.join(project_base, "data", "full_data", "train", "gt.json")
gt_rot_trans_bb_data = load_json_file(json_output_path)

## **Parallelized Cropping and Resizing of Detected Objects**

This routine automates the extraction of object patches from images by reading YOLO‐style label files, converting their normalized coordinates back into pixel‐level bounding boxes, and verifying that each box lies within the image bounds. Using a thread pool, it concurrently crops the specified regions from the source images, resizes them to a fixed 256×256 resolution, and writes the results to a target directory.


### **Cropping and resizing RGB**

In [None]:
def is_valid_bbox(x, y, w, h, img_width=640, img_height=480):
    return (
        w > 0 and h > 0 and
        x >= 0 and y >= 0 and
        x + w <= img_width and y + h <= img_height
    )

def crop_and_resize_image_task(fname, label_dir, image_dir, output_dir):
    RESIZED_IMG_WIDTH, RESIZED_IMG_HEIGHT = 256, 256
    image_key = fname.replace(".txt", "")
    label_path = os.path.join(label_dir, fname)
    image_path = os.path.join(image_dir, f"{image_key}.png")
    output_path = os.path.join(output_dir, f"{image_key}.png")

    try:
        with open(label_path, 'r') as f:
            line = f.readline().strip()
            if not line:
                return ("skip", image_key, "empty label")

            class_id, xc, yc, w, h = map(float, line.split())
            x = (xc - w / 2) * 640
            y = (yc - h / 2) * 480
            w *= 640
            h *= 480

        if not is_valid_bbox(x, y, w, h):
            return ("skip", image_key, "invalid bbox")

        image = cv2.imread(image_path)
        if image is None:
            return ("skip", image_key, "image not found")

        x1 = int(max(0, x))
        y1 = int(max(0, y))
        x2 = int(min(640, x + w))
        y2 = int(min(480, y + h))

        cropped = image[y1:y2, x1:x2]
        resized = cv2.resize(cropped, (RESIZED_IMG_WIDTH, RESIZED_IMG_HEIGHT))

        success = cv2.imwrite(output_path, resized)
        if success:
            return ("saved", image_key, None)
        else:
            return ("skip", image_key, "failed to save")

    except Exception as e:
        return ("skip", image_key, str(e))

def crop_and_resize_from_yolo_preds_parallel(label_dir, image_dir, output_dir, max_workers=8):
    os.makedirs(output_dir, exist_ok=True)
    label_files = [f for f in os.listdir(label_dir) if f.endswith(".txt")]

    saved, skipped = 0, 0
    skipped_files = []

    print(f"Starting parallel cropping for {len(label_files)} files...")

    with ThreadPoolExecutor(max_workers=max_workers) as executor:
        futures = [executor.submit(crop_and_resize_image_task, fname, label_dir, image_dir, output_dir)
                   for fname in label_files]

        for future in tqdm(futures, desc="Processing"):
            status, image_key, reason = future.result()
            if status == "saved":
                saved += 1
            else:
                skipped += 1
                skipped_files.append((image_key, reason))

    print(f"\nCropping complete.")
    print(f"Saved: {saved}")
    print(f"Skipped: {skipped}")

    # Log skipped details
    skipped_log = os.path.join(output_dir, "skipped_images.txt")
    with open(skipped_log, 'w') as f:
        for key, reason in skipped_files:
            f.write(f"{key}: {reason}\n")
    print(f"Skipped list saved to: {skipped_log}")

In [3]:
crop_and_resize_from_yolo_preds_parallel(
    label_dir="/content/drive/MyDrive/MLDL/6D-Pose-Estimation/models/yolov10m/bbox_predictions/labels",
    image_dir="/content/drive/MyDrive/MLDL/6D-Pose-Estimation/data/full_data/train/images",
    output_dir="/content/drive/MyDrive/MLDL/6D-Pose-Estimation/data/cropped_resized_data",
    max_workers=8
)

Starting parallel cropping for 14220 files...
Processing: 100%|██████████| 14220/14220 [21:46<00:00, 10.88it/s]
Cropping complete.
Saved: 14214
Skipped: 6
Skipped list saved to: /content/drive/MyDrive/MLDL/6D-Pose-Estimation/data/cropped_resized_data/skipped_images.txt



### **Cropping and resizing depth**

In [None]:
def crop_and_resize_depth_task(fname, label_dir, depth_dir, output_dir):

    RESIZED_IMG_WIDTH, RESIZED_IMG_HEIGHT = 256, 256
    image_key = fname.replace(".txt", "")
    label_path = os.path.join(label_dir, fname)
    depth_path = os.path.join(depth_dir, f"{image_key}.png")
    output_path = os.path.join(output_dir, f"{image_key}.png")

    try:
        with open(label_path, 'r') as f:
            line = f.readline().strip()
            if not line:
                return ("skip", image_key, "empty label")

            class_id, xc, yc, w, h = map(float, line.split())
            x = (xc - w / 2) * 640
            y = (yc - h / 2) * 480
            w *= 640
            h *= 480

        if not is_valid_bbox(x, y, w, h):
            return ("skip", image_key, "invalid bbox")

        depth = cv2.imread(depth_path, cv2.IMREAD_UNCHANGED)  # uint16 difference between rgb cropping and depth
        if depth is None or depth.dtype != np.uint16:
            return ("skip", image_key, "depth image invalid or not found")

        x1 = int(max(0, x))
        y1 = int(max(0, y))
        x2 = int(min(640, x + w))
        y2 = int(min(480, y + h))

        cropped = depth[y1:y2, x1:x2]
        resized = cv2.resize(cropped, (RESIZED_IMG_WIDTH, RESIZED_IMG_HEIGHT), interpolation=cv2.INTER_NEAREST) #difference

        success = cv2.imwrite(output_path, resized)
        if success:
            return ("saved", image_key, None)
        else:
            return ("skip", image_key, "failed to save")

    except Exception as e:
        return ("skip", image_key, str(e))

def crop_and_resize_depth_parallel(label_dir, depth_dir, output_dir, max_workers=8):
    os.makedirs(output_dir, exist_ok=True)
    label_files = [f for f in os.listdir(label_dir) if f.endswith(".txt")]

    saved, skipped = 0, 0
    skipped_files = []

    print(f"Starting parallel cropping for depth: {len(label_files)} files...")

    with ThreadPoolExecutor(max_workers=max_workers) as executor:
        futures = [executor.submit(crop_and_resize_depth_task, fname, label_dir, depth_dir, output_dir)
                   for fname in label_files]

        for future in tqdm(futures, desc="Processing Depth"):
            status, image_key, reason = future.result()
            if status == "saved":
                saved += 1
            else:
                skipped += 1
                skipped_files.append((image_key, reason))

    print(f"\nDepth cropping complete.")
    print(f"Saved: {saved}")
    print(f"Skipped: {skipped}")

    skipped_log = os.path.join(output_dir, "skipped_depth.txt")
    with open(skipped_log, 'w') as f:
        for key, reason in skipped_files:
            f.write(f"{key}: {reason}\n")
    print(f"Skipped list saved to: {skipped_log}")


In [4]:
crop_and_resize_depth_parallel(
    label_dir="/content/drive/MyDrive/MLDL/6D-Pose-Estimation/models/yolov10m/bbox_predictions/labels",
    image_dir="/content/drive/MyDrive/MLDL/6D-Pose-Estimation/data/full_data/train/depth",
    output_dir="/content/drive/MyDrive/MLDL/6D-Pose-Estimation/data/cropped_resized_depth_data",
    max_workers=8
)


Starting parallel cropping for 14220 files...
Processing: 100%|██████████| 14220/14220 [21:46<00:00, 10.88it/s]
Cropping complete.
Saved: 14214
Skipped: 6
Skipped list saved to: /content/drive/MyDrive/MLDL/6D-Pose-Estimation/data/cropped_resized_depth_data/skipped_images.txt

