In [None]:
import os
import glob
import random
import cv2
import albumentations as A
from roboflow import Roboflow
from pathlib import Path


In [None]:
PATH_TO_DADASET = Path("../datasets/Surgical-Tools-Detection-1")
if not PATH_TO_DADASET.exists():
    rf = Roboflow(api_key="P9o0JjFVWdK2Z3nzefUI")
    project = rf.workspace("northeastern-university-yu4fz").project("surgical-tools-detection-c8b3w")
    version = project.version(1)
    dataset = version.download("yolov11")
else:
    print("Dataset folder already exists, skipping download.")
                

In [2]:
# ───────────────────────────────────────────────────────────────────────────────
# CONFIGURATION 
# ───────────────────────────────────────────────────────────────────────────────

# 1) Base folder where your dataset resides
BASE_DIR = Path("../datasets/Surgical-Tools-Detection-1")

# 2) Which splits do you want to augment?
#    You can choose any subset of ["train", "valid", "test"].
#    Typical workflow: only augment "train".
SPLITS_TO_AUGMENT = ["train"]

# 3) How many augmented copies to generate per original image?
#    E.g. AUG_PER_IMAGE = 2 → for each original image, produce 2 new augmented images.
AUG_PER_IMAGE = 2

# 4) File extensions (change if you use .png instead of .jpg)
IMG_EXT = ".jpg"
LABEL_EXT = ".txt"

# 5) Folders (inside each split) where augmented outputs will be written.
#    We recommend parallel structure: images_aug/  labels_aug/
AUG_IMAGE_FOLDER_NAME = "images_aug"
AUG_LABEL_FOLDER_NAME = "labels_aug"

# 6) Random seed for reproducibility
RANDOM_SEED = 42
random.seed(RANDOM_SEED)

In [3]:
# assert if the base directory exists
if not BASE_DIR.exists():
    raise FileNotFoundError(f"Base directory {BASE_DIR} does not exist.")

In [4]:
# ───────────────────────────────────────────────────────────────────────────────
# CUSTOM SPECULAR SPOT TRANSFORM (simulates glare on metallic instruments)
# ───────────────────────────────────────────────────────────────────────────────

class SpecularSpot(A.ImageOnlyTransform):
    """
    Albumentations custom transform that draws a random white ellipse
    to simulate specular glare or reflection on metallic instruments.
    """
    def __init__(self, always_apply=False, p=0.2):
        super().__init__(always_apply, p)

    def apply(self, img, **params):
        h, w = img.shape[:2]
        # Random center point somewhere in the image
        cx = random.randint(int(0.1 * w), int(0.9 * w))
        cy = random.randint(int(0.1 * h), int(0.9 * h))
        # Random ellipse radii (small relative to image size)
        rx = random.randint(int(0.02 * w), int(0.06 * w))
        ry = random.randint(int(0.02 * h), int(0.06 * h))
        # Draw filled white ellipse
        cv2.ellipse(img, (cx, cy), (rx, ry), angle=0, startAngle=0, endAngle=360,
                    color=(255, 255, 255), thickness=-1)
        return img

In [7]:
# ─────────────────────────────────
# DEFINE ALBUMENTATIONS PIPELINE 
# ─────────────────────────────────

transform = A.Compose(
    [
        # ──────────────── GEOMETRIC TRANSFORMS ──────────────────

        # Random 90° rotations (0°, 90°, 180°, or 270°)
        A.RandomRotate90(p=0.5),

        # Random rotation between –90° and +90°, pad with black if needed
        A.Rotate(limit=90, p=0.7, border_mode=cv2.BORDER_CONSTANT, value=0),

        # Perspective warp (simulate oblique camera angles)
        A.Perspective(scale=(0.05, 0.10), keep_size=True, p=0.3),

        # Slight affine shear/shift/scale jitter (simulates slight camera drift)
        A.ShiftScaleRotate(
            shift_limit=0.02, scale_limit=0.02, rotate_limit=5,
            border_mode=cv2.BORDER_CONSTANT, value=0, p=0.4
        ),

        # Pad to at least 800×800, then random-crop exactly 800×800
        A.PadIfNeeded(min_height=800, min_width=800,
                      border_mode=cv2.BORDER_CONSTANT, value=0),
        A.RandomCrop(width=800, height=800, p=1.0),

        # ──────────────── OCCLUSION / PARTIAL-VIEW TRANSFORMS ──────────────────

        # CoarseDropout: random black rectangles to simulate occlusion by tissue/tools
        A.CoarseDropout(
            max_holes=2,
            max_height=100,
            max_width=100,
            min_holes=1,
            min_height=50,
            min_width=50,
            fill_value=0,
            p=0.4
        ),

        # CoarseDropout: simulating cutout with a single square black patch
        A.CoarseDropout(
            max_holes=1,
            max_height=120,
            max_width=120,
            min_holes=1,
            min_height=120,
            min_width=120,
            fill_value=0,
            p=0.3
        ),

        # RandomResizedCrop: sometimes zoom into a random region (partial instrument)
        A.RandomResizedCrop(
            size=(800, 800),
            scale=(0.6, 1.0), ratio=(0.75, 1.33),
            p=0.5
        ),

        # ──────────────── BLUR / NOISE TRANSFORMS ──────────────────

        # Motion blur (simulates instrument/camera motion)
        A.MotionBlur(blur_limit=7, p=0.3),

        # Gaussian blur (simulates out-of-focus or lens condensation)
        A.GaussianBlur(blur_limit=(3, 7), p=0.3),

        # Gaussian noise (sensor noise simulation)
        A.GaussNoise(var_limit=(10.0, 50.0), p=0.4),

        # ──────────────── COLOR / LIGHT TRANSFORMS ──────────────────

        # Random brightness & contrast adjustment
        A.RandomBrightnessContrast(brightness_limit=0.3,
                                   contrast_limit=0.3, p=0.6),

        # Hue, saturation, value shifts (simulate different OR lighting)
        A.HueSaturationValue(hue_shift_limit=10,
                             sat_shift_limit=30,
                             val_shift_limit=20, p=0.5),

        # RGB channel shift (camera calibration differences)
        A.RGBShift(r_shift_limit=15,
                   g_shift_limit=15,
                   b_shift_limit=15, p=0.4),

        # CLAHE (adaptive histogram equalization)
        A.CLAHE(clip_limit=3.0, tile_grid_size=(8, 8), p=0.2),

        # Random shadow (simulate overhead lamp casting shadow)
        A.RandomShadow(shadow_roi=(0, 0.5, 1, 1),
                       num_shadows_lower=1,
                       num_shadows_upper=2, p=0.3),

        # ──────────────── SPECIAL SURGICAL SCENE TRANSFORMS ──────────────────

        # Additional motion blur for rapid tool movement
        A.MotionBlur(blur_limit=7, p=0.2),

        # Custom specular spot (simulates glare/reflection on metallic surfaces)
        SpecularSpot(p=0.2),

        # Random fog/haze (mild lens haze or condensation)
        A.RandomFog(fog_coef_lower=0.1, fog_coef_upper=0.3,
                    alpha_coef=0.08, p=0.15),

        # Random rain (small streaks, can mimic fluid droplets on lens)
        A.RandomRain(blur_value=3, p=0.1),

        # Mild elastic transform (simulates slight lens distortion)
        A.ElasticTransform(alpha=1, sigma=50,
                           alpha_affine=50, p=0.1),

        # Mild grid distortion (another form of lens warp)
        A.GridDistortion(num_steps=5,
                         distort_limit=0.3, p=0.1),
    ],
    bbox_params=A.BboxParams(
        format="yolo",
        label_fields=["class_labels"],
        min_visibility=0.3   # drop any bbox that’s <30% visible after augmentation
    ),
)

  A.Rotate(limit=90, p=0.7, border_mode=cv2.BORDER_CONSTANT, value=0),
  A.ShiftScaleRotate(
  A.PadIfNeeded(min_height=800, min_width=800,
  A.CoarseDropout(
  A.CoarseDropout(
  A.GaussNoise(var_limit=(10.0, 50.0), p=0.4),
  A.RandomShadow(shadow_roi=(0, 0.5, 1, 1),
  A.RandomFog(fog_coef_lower=0.1, fog_coef_upper=0.3,
  A.ElasticTransform(alpha=1, sigma=50,


In [8]:
# ───────────────────────────────────────────────────────────────────────────────
# UTILITY FUNCTIONS
# ───────────────────────────────────────────────────────────────────────────────

def yolo_line_to_bbox(line: str):
    """
    Parse a YOLO label line into (class_id, [x_center, y_center, width, height]).
    Input line example: "3 0.5234 0.4123 0.1234 0.2312"
    Returns: (int_cls, [x_c (float), y_c (float), w (float), h (float)]).
    """
    parts = line.strip().split()
    if len(parts) != 5:
        raise ValueError(f"Malformed YOLO line: {line}")
    cls = int(parts[0])
    x_c, y_c, w, h = map(float, parts[1:])
    return cls, [x_c, y_c, w, h]

def bbox_to_yolo_line(cls: int, bbox: list):
    """
    Convert class + normalized bbox (x_center, y_center, w, h) back to YOLO text format.
    Returns a string like: "3 0.523400 0.412300 0.123400 0.231200\n"
    """
    x_c, y_c, w, h = bbox
    return f"{cls} {x_c:.6f} {y_c:.6f} {w:.6f} {h:.6f}\n"

def ensure_folder(path: str):
    """
    Create folder if it doesn’t already exist.
    """
    if not os.path.exists(path):
        os.makedirs(path)


In [11]:
# ───────────────────────────────────────────────────────────────────────────────
# MAIN AUGMENTATION LOGIC
# ───────────────────────────────────────────────────────────────────────────────

for split in SPLITS_TO_AUGMENT:
    # 1) Define original image/label folders
    img_dir = BASE_DIR / split / "images"
    lbl_dir = BASE_DIR / split / "labels"

    # 2) Define where to write augmented outputs
    img_aug_dir = BASE_DIR / split / AUG_IMAGE_FOLDER_NAME
    lbl_aug_dir = BASE_DIR / split / AUG_LABEL_FOLDER_NAME
    ensure_folder(img_aug_dir)
    ensure_folder(lbl_aug_dir)

    # 3) Collect all image file paths in this split
    img_pattern = str(img_dir / f"*{IMG_EXT}")
    all_image_paths = sorted(glob.glob(img_pattern))

    print(f"[INFO] Split = '{split}': Found {len(all_image_paths)} images to augment.")
    print(f"[INFO] Generating {AUG_PER_IMAGE} augmentations per image → up to {len(all_image_paths)*AUG_PER_IMAGE} new samples.\n")

    # 4) Loop over each original image
    for img_path in all_image_paths:
        # 4a) Read the image (OpenCV reads as BGR)
        image = cv2.imread(img_path)
        if image is None:
            print(f"  [WARNING] Could not read image {img_path}. Skipping.")
            continue
        height, width = image.shape[:2]

        # 4b) Derive the corresponding YOLO label file path
        base_filename = Path(img_path).stem
        label_path = Path(lbl_dir) / f"{base_filename}{LABEL_EXT}"
        if not os.path.isfile(label_path):
            print(f"  [WARNING] Label file not found for {img_path} → {label_path}. Skipping.")
            continue

        # 4c) Parse all bounding boxes and class IDs from the label file
        bboxes = []        # list of [x_c, y_c, w, h]
        class_labels = []  # parallel list of integer class IDs
        with open(label_path, "r") as f_lbl:
            for line in f_lbl.readlines():
                cls_id, bbox = yolo_line_to_bbox(line)
                bboxes.append(bbox)
                class_labels.append(cls_id)

        # 4d) For each augmentation copy, apply transforms
        for aug_idx in range(AUG_PER_IMAGE):
            augmented = transform(image=image, bboxes=bboxes, class_labels=class_labels)
            aug_image = augmented["image"]
            aug_bboxes = augmented["bboxes"]
            aug_labels = augmented["class_labels"]

            # If Albumentations dropped all boxes (e.g., crop removed them), skip
            if len(aug_bboxes) == 0:
                print(f"    • [SKIP] {base_filename}_aug{aug_idx} → all boxes removed.")
                continue

            # 4e) Write out the new augmented image
            aug_img_name = f"{base_filename}_aug{aug_idx}{IMG_EXT}"
            aug_img_path = str(img_aug_dir / aug_img_name)
            cv2.imwrite(aug_img_path, aug_image)

            # 4f) Write out the new YOLO label file
            aug_lbl_name = f"{base_filename}_aug{aug_idx}{LABEL_EXT}"
            aug_lbl_path = str(lbl_aug_dir / aug_lbl_name)
            with open(aug_lbl_path, "w") as f_out:
                for bbox_norm, cls_norm in zip(aug_bboxes, aug_labels):
                    f_out.write(bbox_to_yolo_line(cls_norm, bbox_norm))

            print(f"    • Saved: {aug_img_name}  +  {aug_lbl_name}")

    print(f"\n[✓] Finished augmenting split '{split}'.\n")

[INFO] Split = 'train': Found 17789 images to augment.
[INFO] Generating 2 augmentations per image → up to 35578 new samples.

    • Saved: 250337875_593788858741541_5037414245975890979_n_jpg.rf.016a2b0f37703fabe05a28c0cbfb1753_aug0.jpg  +  250337875_593788858741541_5037414245975890979_n_jpg.rf.016a2b0f37703fabe05a28c0cbfb1753_aug0.txt
    • Saved: 250337875_593788858741541_5037414245975890979_n_jpg.rf.016a2b0f37703fabe05a28c0cbfb1753_aug1.jpg  +  250337875_593788858741541_5037414245975890979_n_jpg.rf.016a2b0f37703fabe05a28c0cbfb1753_aug1.txt
    • Saved: 250337875_593788858741541_5037414245975890979_n_jpg.rf.7e02f6235c1ca0a48209c160faf9c6b3_aug0.jpg  +  250337875_593788858741541_5037414245975890979_n_jpg.rf.7e02f6235c1ca0a48209c160faf9c6b3_aug0.txt
    • Saved: 250337875_593788858741541_5037414245975890979_n_jpg.rf.7e02f6235c1ca0a48209c160faf9c6b3_aug1.jpg  +  250337875_593788858741541_5037414245975890979_n_jpg.rf.7e02f6235c1ca0a48209c160faf9c6b3_aug1.txt
    • Saved: 250337875_593788