In [None]:
# Settings
# Note: folder images in input_images_dir and output_images_dir should contain only images. folder labels in input_labels_dir and output_labels_dir should contain only labels in YOLO format (.txt files).
# Folder structure: folder images and folder labels should have the same name. Each image should have a corresponding label file with the same name.
# Folder structure example: folder with subfolders images and labels. Subfolder images contains images, subfolder labels contains labels in YOLO format.

input_folder = 'particle-og'
output_folder = 'particle'

In [1]:
import os
import random
import shutil
from PIL import Image, ImageFilter
import numpy as np

def apply_blur_and_noise(image, blur_factor=0.5, noise_factor=0.5):
    """
    Apply blur and grayscale noise to an image.
    """
    blur_factor = max(0, min(1, blur_factor))
    noise_factor = max(0, min(1, noise_factor))

    max_blur_radius = 10
    blur_radius = blur_factor * max_blur_radius
    blurred_image = image.filter(ImageFilter.GaussianBlur(blur_radius))
    
    img_array = np.array(blurred_image)
    
    if noise_factor > 0:
        mean = 0
        sigma = 25 * noise_factor
        grayscale_noise = np.random.normal(mean, sigma, img_array.shape[:2]).astype(np.int16)
        grayscale_noise_rgb = np.stack([grayscale_noise] * 3, axis=-1)
        noisy_img_array = np.clip(img_array + grayscale_noise_rgb, 0, 255).astype(np.uint8)
    else:
        noisy_img_array = img_array
    
    noisy_image = Image.fromarray(noisy_img_array)
    return noisy_image

def create_random_versions(input_image_path, output_image_path, num_versions=10):
    """
    Create random versions of the image with different blur and noise levels.
    """
    image = Image.open(input_image_path)
    image_name, image_ext = os.path.splitext(os.path.basename(input_image_path))
    
    for i in range(num_versions):
        blur_factor = random.uniform(0, 1)
        noise_factor = random.uniform(0, 1)
        
        processed_image = apply_blur_and_noise(image, blur_factor, noise_factor)
        new_image_name = f"{image_name}_v{i+1}{image_ext}"
        processed_image.save(os.path.join(output_image_path, new_image_name))

def duplicate_label_files(input_label_path, output_label_path, image_name, num_versions=10):
    """
    Duplicate the label file to match the new image versions.
    """
    for i in range(num_versions):
        new_label_name = f"{image_name}_v{i+1}.txt"
        shutil.copyfile(input_label_path, os.path.join(output_label_path, new_label_name))

def process_dataset(input_images_dir, input_labels_dir, output_images_dir, output_labels_dir, num_versions=10):
    """
    Process the dataset by creating new versions of each image and duplicating the label files.
    """
    if not os.path.exists(output_images_dir):
        os.makedirs(output_images_dir)
    
    if not os.path.exists(output_labels_dir):
        os.makedirs(output_labels_dir)

    for subdir, _, files in os.walk(input_images_dir):
        for file in files:
            if file.endswith('.png'):
                image_path = os.path.join(subdir, file)
                
                # Assuming label txt has the same name as the image
                relative_subdir = os.path.relpath(subdir, input_images_dir)
                label_file = os.path.join(input_labels_dir, relative_subdir, os.path.splitext(file)[0] + '.txt')

                if os.path.exists(label_file):
                    output_image_subdir = os.path.join(output_images_dir, relative_subdir)
                    output_label_subdir = os.path.join(output_labels_dir, relative_subdir)

                    os.makedirs(output_image_subdir, exist_ok=True)
                    os.makedirs(output_label_subdir, exist_ok=True)

                    # Create random image versions
                    create_random_versions(image_path, output_image_subdir, num_versions)

                    # Duplicate label files
                    duplicate_label_files(label_file, output_label_subdir, os.path.splitext(file)[0], num_versions)

input_images_dir = f'{input_folder}/images'
input_labels_dir = f'{input_folder}/labels'

output_images_dir = f'{output_folder}/images'
output_labels_dir = f'{output_folder}/labels'


# Step 1: Process the dataset (create 10 random versions and duplicate labels)
process_dataset(input_images_dir, input_labels_dir, output_images_dir, output_labels_dir, num_versions=10)

