In [1]:
import os
import shutil
import random

In [2]:
desktop_path = os.path.join(os.path.expanduser("~"), "Desktop")
dataset_path = os.path.join(desktop_path, "Dataset xray")

labelled_path = os.path.join(dataset_path, "Dataset labelled")
unlabelled_path = os.path.join(dataset_path, "Dataset unlabelled")

pneumonia_path = os.path.join(labelled_path, "Pneumonia")
normal_path = os.path.join(labelled_path, "Normal")

# Step 1: Shuffle images within 'pneumonia' and 'normal' folders respectively
def shuffle_folder_images(folder_path):
    images = [f for f in os.listdir(folder_path) if os.path.isfile(os.path.join(folder_path, f))]
    random.shuffle(images)
    
    # Rename temporarily to preserve shuffle order
    for idx, filename in enumerate(images):
        old_path = os.path.join(folder_path, filename)
        temp_name = f"temp_{idx}_{filename}"
        temp_path = os.path.join(folder_path, temp_name)
        os.rename(old_path, temp_path)

    # Rename back to original name order after shuffle
    for temp_filename in os.listdir(folder_path):
        if temp_filename.startswith("temp_"):
            new_filename = temp_filename[5:]  # remove 'temp_' prefix
            os.rename(os.path.join(folder_path, temp_filename),
                      os.path.join(folder_path, new_filename))

shuffle_folder_images(pneumonia_path)
shuffle_folder_images(normal_path)

# Step 2: Combine all images into 'unlabelled dataset' and shuffle them
def copy_and_shuffle_to_unlabelled(src_folders, dest_folder):
    if not os.path.exists(dest_folder):
        os.makedirs(dest_folder)

    # Clear unlabelled dataset folder first
    for f in os.listdir(dest_folder):
        file_path = os.path.join(dest_folder, f)
        if os.path.isfile(file_path):
            os.remove(file_path)

    all_images = []
    for folder in src_folders:
        folder_images = [os.path.join(folder, f) for f in os.listdir(folder)
                         if os.path.isfile(os.path.join(folder, f))]
        all_images.extend(folder_images)

    random.shuffle(all_images)

    for idx, img_path in enumerate(all_images):
        ext = os.path.splitext(img_path)[1]
        dest_path = os.path.join(dest_folder, f"img_{idx}{ext}")
        shutil.copy(img_path, dest_path)

copy_and_shuffle_to_unlabelled([pneumonia_path, normal_path], unlabelled_path)

print("Done! Images shuffled within categories and merged into unlabelled dataset.")

Done! Images shuffled within categories and merged into unlabelled dataset.
