In [1]:
import os
import cv2
import numpy as np


In [2]:
# Configuration
input_image_dir = "output/images"
input_label_dir = "output/labels"
output_aug_image_dir = "augmented/images"
output_aug_label_dir = "augmented/labels"
os.makedirs(output_aug_image_dir, exist_ok=True)
os.makedirs(output_aug_label_dir, exist_ok=True)

class_id = 1  # Waldo's class ID
tile_size = 64  # image size is 64x64
# images_w_waldo = ['00267.txt', '01070.txt', '01071.txt']

In [3]:
# Helper: read label and return list of bounding boxes
def read_labels(label_path):
    boxes = []
    with open(label_path, "r") as f:
        for line in f:
            parts = line.strip().split()
            if len(parts) == 5 and int(float(parts[0])) == class_id:
                cls, cx, cy, w, h = map(float, parts)
                boxes.append((int(cls), cx, cy, w, h))
    return boxes

# Helper: write labels
def write_labels(label_path, boxes):
    with open(label_path, "w") as f:
        for cls, cx, cy, w, h in boxes:
            f.write(f"{cls} {cx:.2f} {cy:.2f} {w:.2f} {h:.2f}\n")

# Transformations
def rotate_image_and_boxes(img, boxes, angle):
    if angle == 0:
        return img.copy(), boxes

    img_rotated = cv2.rotate(img, {
        90: cv2.ROTATE_90_CLOCKWISE,
        180: cv2.ROTATE_180,
        270: cv2.ROTATE_90_COUNTERCLOCKWISE
    }[angle])

    new_boxes = []
    for cls, cx, cy, w, h in boxes:
        if angle == 90:
            new_cx = tile_size - cy
            new_cy = cx
            new_w, new_h = h, w  # switch width and height
        elif angle == 180:
            new_cx = tile_size - cx
            new_cy = tile_size - cy
            new_w, new_h = w, h
        elif angle == 270:
            new_cx = cy
            new_cy = tile_size - cx
            new_w, new_h = h, w  # switch width and height
        new_boxes.append((cls, new_cx, new_cy, new_w, new_h))

    return img_rotated, new_boxes

def flip_image_and_boxes(img, boxes):
    img_flipped = cv2.flip(img, 1)  # horizontal flip
    new_boxes = []
    for cls, cx, cy, w, h in boxes:
        new_cx = tile_size - cx
        new_boxes.append((cls, new_cx, cy, w, h))
    return img_flipped, new_boxes

def to_grayscale(img):
    return cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

def adjust_brightness(img, factor):
    img = img.astype(np.float32) * factor
    img = np.clip(img, 0, 255).astype(np.uint8)
    return img

# Augmentation pipeline
image_id = 100000
for img_name in sorted(os.listdir(input_image_dir)):
    if not img_name.lower().endswith(".jpg"):
        continue

    label_path = os.path.join(input_label_dir, os.path.splitext(img_name)[0] + ".txt")
    boxes = read_labels(label_path)
    if not boxes:
        continue  # skip if no Waldo

    img = cv2.imread(os.path.join(input_image_dir, img_name))
    base_variants = []

    for angle in [0, 90, 180, 270]:
        rotated_img, rotated_boxes = rotate_image_and_boxes(img, boxes, angle)
        base_variants.append((rotated_img, rotated_boxes))

        flipped_img, flipped_boxes = flip_image_and_boxes(rotated_img, rotated_boxes)
        base_variants.append((flipped_img, flipped_boxes))

    for img_variant, label_variant in base_variants:
        # Grayscale version
        gray_variant = to_grayscale(img_variant)
        gray_variant = cv2.cvtColor(gray_variant, cv2.COLOR_GRAY2BGR)
        all_images = [img_variant, gray_variant]

        for var_img in all_images:
            for factor in [0.2, 0.4, 0.6, 0.8, 1.0, 1.2, 1.4, 1.6, 1.8]:
                bright_img = adjust_brightness(var_img, factor)
                img_filename = f"{image_id:06d}.jpg"
                label_filename = f"{image_id:06d}.txt"

                cv2.imwrite(os.path.join(output_aug_image_dir, img_filename), bright_img)
                write_labels(os.path.join(output_aug_label_dir, label_filename), label_variant)
                image_id += 1

print(f"✅ Augmentation complete. Total images created: {image_id - 100000}")

✅ Augmentation complete. Total images created: 10080


In [4]:
import random
import numpy as np

In [5]:

nw_output_dir = "augmented/not_waldo"
os.makedirs(nw_output_dir, exist_ok=True)

# === Helper functions ===
def read_first_label_class(label_path):
    if not os.path.exists(label_path):
        return None
    with open(label_path, "r") as f:
        first_line = f.readline().strip()
        if not first_line:
            return None
        return int(first_line.split()[0])

def adjust_brightness(img, factor):
    img = img.astype(np.float32) * factor
    return np.clip(img, 0, 255).astype(np.uint8)

def to_grayscale(img):
    return cv2.cvtColor(cv2.cvtColor(img, cv2.COLOR_BGR2GRAY), cv2.COLOR_GRAY2BGR)

# === Main Processing ===
brightness_factors = [0.2, 0.4, 0.6, 0.8, 1.2, 1.4, 1.6, 1.8]

for img_name in sorted(os.listdir(input_image_dir)):
    if not img_name.lower().endswith(".jpg"):
        continue

    label_path = os.path.join(input_label_dir, os.path.splitext(img_name)[0] + ".txt")
    first_class = read_first_label_class(label_path)

    if first_class == 1:
        continue  # Skip Waldo-containing images

    img_path = os.path.join(input_image_dir, img_name)
    img = cv2.imread(img_path)
    if img is None:
        print(f"⚠️ Couldn't read {img_path}")
        continue

    # Random choice: grayscale or not
    if random.choice([True, False]):
        img = to_grayscale(img)

    # Random brightness
    factor = random.choice(brightness_factors)
    img = adjust_brightness(img, factor)

    output_name = os.path.splitext(img_name)[0] + ".jpg"
    output_path = os.path.join(nw_output_dir, output_name)
    cv2.imwrite(output_path, img)

print("✅ Non-Waldo random transformation complete.")


✅ Non-Waldo random transformation complete.
