In [1]:
import os

# Define the paths to the folders
class_0_path = "E:/dataset_temp/class_0"
class_1_path = "E:/dataset_temp/class_1"

# Function to count the number of files in a directory
def count_files(directory):
    return len([name for name in os.listdir(directory) if os.path.isfile(os.path.join(directory, name))])

# Count the number of images in each class
class_0_count = count_files(class_0_path)
class_1_count = count_files(class_1_path)

# Print the counts
print("Number of images in class 0:", class_0_count)
print("Number of images in class 1:", class_1_count)


Number of images in class 0: 1787
Number of images in class 1: 666


In [2]:
import os
import random
import shutil

# Define paths to the folders
class_0_path = "E:/dataset_temp/class_0"
class_1_path = "E:/dataset_temp/class_1"
resampled_class_0_path = "E:/dataset_temp/resampled_class_0"

# Ensure resampled_class_0_path exists, create if it doesn't
if not os.path.exists(resampled_class_0_path):
    os.makedirs(resampled_class_0_path)

# Function to randomly select files from a directory and copy them to another directory
def random_undersample(source_dir, target_dir, num_samples):
    filenames = os.listdir(source_dir)
    random.shuffle(filenames)
    for filename in filenames[:num_samples]:
        shutil.copy(os.path.join(source_dir, filename), target_dir)

# Number of samples to keep in each class
num_samples = min(class_0_count, class_1_count)

# Perform random undersampling on class 0
random_undersample(class_0_path, resampled_class_0_path, num_samples)

# Print the new count for class 0
resampled_class_0_count = count_files(resampled_class_0_path)
print("Number of images in resampled class 0:", resampled_class_0_count)


Number of images in resampled class 0: 666


In [16]:
import os
import random
import shutil
from PIL import Image, ImageOps, ImageFilter
from tqdm import tqdm

resampled_class_0_path = "E:/dataset_temp/resampled_class_0_augmented"
resampled_class_1_path = "E:/dataset_temp/class_1_augmented"

# Ensure resampled_class_0_path and resampled_class_1_path exist, create if they don't
for path in [resampled_class_0_path, resampled_class_1_path]:
    if not os.path.exists(path):
        os.makedirs(path)

def augment_data(source_dir, target_dir, num_augmented_samples):
    filenames = os.listdir(source_dir)
    total_files = len(filenames)
    
    # Apply horizontal flipping and zooming to all images
    for filename in tqdm(filenames, desc="Applying Augmentation"):
        img = Image.open(os.path.join(source_dir, filename))
        img = img.convert('RGB')
        
        # Apply horizontal flipping
        img_flipped = img.transpose(Image.FLIP_LEFT_RIGHT)
        img_flipped.save(os.path.join(target_dir, f"flipped_{filename}"))
        
        # Apply zooming
        for i in range(num_augmented_samples):
            zoom_factor = 1.0 + (i + 1) * 0.1  # Adjust zoom factor deterministically
            new_width = int(img.width * zoom_factor)
            new_height = int(img.height * zoom_factor)
            img_zoomed = img.resize((new_width, new_height), Image.BICUBIC)
            img_zoomed.save(os.path.join(target_dir, f"zoomed_{i}_{filename}"))

# Number of augmented samples to generate
num_augmented_samples = 4  # Adjust as needed

# Perform data augmentation on class 0
augment_data(class_0_path, resampled_class_0_path, num_augmented_samples)

# Perform data augmentation on class 1
augment_data(class_1_path, resampled_class_1_path, num_augmented_samples)

# Print completion message
print("Augmentation completed.")


Applying Augmentation: 100%|███████████████████████████████████████████████████████| 1787/1787 [05:17<00:00,  5.62it/s]
Applying Augmentation: 100%|█████████████████████████████████████████████████████████| 666/666 [03:36<00:00,  3.08it/s]

Augmentation completed.



