In [1]:
!wget https://github.com/jbrownlee/Datasets/releases/download/Flickr8k/Flickr8k_Dataset.zip
!unzip -q Flickr8k_Dataset.zip

--2025-03-16 10:09:12--  https://github.com/jbrownlee/Datasets/releases/download/Flickr8k/Flickr8k_Dataset.zip
Resolving github.com (github.com)... 140.82.116.3
Connecting to github.com (github.com)|140.82.116.3|:443... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://objects.githubusercontent.com/github-production-release-asset-2e65be/124585957/47f52b80-3501-11e9-8f49-4515a2a3339b?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=releaseassetproduction%2F20250316%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Date=20250316T100912Z&X-Amz-Expires=300&X-Amz-Signature=6930054fae7a698700a06043a9a5cf510da7bc05f3569411ebd83cd68fa487f4&X-Amz-SignedHeaders=host&response-content-disposition=attachment%3B%20filename%3DFlickr8k_Dataset.zip&response-content-type=application%2Foctet-stream [following]
--2025-03-16 10:09:12--  https://objects.githubusercontent.com/github-production-release-asset-2e65be/124585957/47f52b80-3501-11e9-8f49-4515a2a3339b?X-Amz-Algorithm=AWS4-HMAC-SHA2

<center>
<h1 style="font-size:10vw"><b>Predefined photo</b>: uncomment any line
<br>
<b>Local file</b>: leave the <tt>fname = None</tt></h1>
</center>

In [None]:
import os
import cv2
import numpy as np
import random

# ---------------------------
# Paths: Update as needed
# ---------------------------
flickr8k_images_dir = "/content/Flicker8k_Dataset/"  # Existing folder with Flickr8k images
output_images_dir   = "distorted_images"                  # Folder to save the new distorted images
output_masks_dir    = "distorted_masks"                   # Folder to save the corresponding masks

os.makedirs(output_images_dir, exist_ok=True)
os.makedirs(output_masks_dir, exist_ok=True)

# ---------------------------
# Parameters for black boxes
# ---------------------------
num_boxes_min = 1   # Minimum number of black boxes to draw per image
num_boxes_max = 5   # Maximum number of black boxes to draw per image
box_size_min  = 20  # Minimum width/height for each black box
box_size_max  = 60  # Maximum width/height for each black box

# ---------------------------
# Loop over all images in Flickr8k
# ---------------------------
all_files = os.listdir(flickr8k_images_dir)
image_count = 0

for filename in all_files:
    # Only process image files
    if not filename.lower().endswith(('.jpg', '.jpeg', '.png')):
        continue

    # Full path to original image
    img_path = os.path.join(flickr8k_images_dir, filename)

    # Read image (as BGR)
    image = cv2.imread(img_path)
    if image is None:
        continue  # skip if reading fails

    # Prepare a blank mask (same size as image)
    # We'll use 0 for background, 255 for black-box region
    mask = np.zeros((image.shape[0], image.shape[1]), dtype=np.uint8)

    # Randomly choose how many black boxes to draw
    num_boxes = random.randint(num_boxes_min, num_boxes_max)

    for _ in range(num_boxes):
        # Random box size
        w = random.randint(box_size_min, box_size_max)
        h = random.randint(box_size_min, box_size_max)

        # Random top-left corner, ensuring box fits inside the image
        x = random.randint(0, max(0, image.shape[1] - w))
        y = random.randint(0, max(0, image.shape[0] - h))

        # Draw black box on the image
        image[y:y+h, x:x+w] = (0, 0, 0)  # BGR = black

        # Mark the mask region with 255
        mask[y:y+h, x:x+w] = 255

    # ---------------------------
    # Save distorted image & mask
    # ---------------------------
    base_name, ext = os.path.splitext(filename)

    distorted_img_path = os.path.join(output_images_dir, base_name + ext)
    distorted_mask_path = os.path.join(output_masks_dir, base_name + "_mask.png")

    # Save the distorted image
    cv2.imwrite(distorted_img_path, image)
    # Save the mask as a PNG (binary mask)
    cv2.imwrite(distorted_mask_path, mask)

    image_count += 1
    if image_count % 500 == 0:
        print(f"Processed {image_count} images...")

print(f"Done! Created {image_count} distorted images with masks.")

Processed 500 images...
Processed 1000 images...
