In [11]:
import os
import cv2
import random
from collections import defaultdict
import shutil

# Define the paths to your input and output folders
input_folder = "data/MSID"
output_folder = "data/MSID_US"

# Define the classes that you want to undersample
classes_to_undersample = ["Monkeypox", "Normal"]

# Define the undersampling ratio
undersampling_ratio = 0.4

# Initialize counters to keep track of the number of images in the original and undersampled dataset
num_images_original = defaultdict(int)
num_images_undersampled = defaultdict(int)

# Loop through the input folder and copy the images to the output folder
for class_name in os.listdir(input_folder):
    # Check if the class is one of the classes to undersample
    if class_name in classes_to_undersample:
        # Get the path to the class folder in the input folder
        class_folder = os.path.join(input_folder, class_name)
        # Get the path to the class folder in the output folder
        output_class_folder = os.path.join(output_folder, class_name)
        # Create the output class folder if it does not exist
        if not os.path.exists(output_class_folder):
            os.makedirs(output_class_folder)
        # Loop through the images in the class folder
        for image_name in os.listdir(class_folder):
            # Load the image with OpenCV
            image_path = os.path.join(class_folder, image_name)
            image = cv2.imread(image_path)
            # Check if the image should be undersampled
            if random.uniform(0, 1) <= undersampling_ratio:
                # Save the image to the output class folder
                output_image_path = os.path.join(output_class_folder, image_name)
                cv2.imwrite(output_image_path, image)
                # Update the counter for the undersampled dataset
                num_images_undersampled[class_name] += 1
            # Update the counter for the original dataset
            num_images_original[class_name] += 1
    else:
        # If the class is not one of the classes to undersample, copy the entire folder to the output folder
        shutil.copytree(os.path.join(input_folder, class_name), os.path.join(output_folder, class_name))

# Print the report on the number of images in the original and undersampled dataset
print("Original dataset:")
for class_name in num_images_original:
    print(f"{class_name}: {num_images_original[class_name]}")
print("Undersampled dataset:")
for class_name in num_images_undersampled:
    print(f"{class_name}: {num_images_undersampled[class_name]}")

Original dataset:
Normal: 293
Monkeypox: 279
Undersampled dataset:
Normal: 113
Monkeypox: 120
