# AUGMENTATION OF FULL IMAGES

In [24]:
import os
import random
from tensorflow.keras.preprocessing import image
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import shutil
import numpy as np

In [15]:
# Set the paths
original_folder = 'emp_data'
balanced_folder = 'augmentated_emp_data'

In [16]:
# Set the augmentation parameters
rotation_range = 30
width_shift_range = 0.1
height_shift_range = 0.1
shear_range = 0.2
zoom_range = 0.2
horizontal_flip = True
fill_mode = "nearest"

In [17]:
# Set the desired number of samples per class
desired_samples_per_class = 33

In [18]:
# Create the balanced folder
os.makedirs(balanced_folder, exist_ok=True)

In [19]:
# Initialize the image data generator

datagen = ImageDataGenerator(
    rotation_range=rotation_range,
    width_shift_range=width_shift_range,
    height_shift_range=height_shift_range,
    shear_range=shear_range,
    zoom_range=zoom_range,
    horizontal_flip=horizontal_flip,
    fill_mode=fill_mode
)

In [25]:
# Iterate over the subfolders in the original folder
for subfolder_name in os.listdir(original_folder):
    subfolder_path = os.path.join(original_folder, subfolder_name)
    if not os.path.isdir(subfolder_path):
        continue

    # Get the list of image files in the subfolder
    image_files = [file for file in os.listdir(subfolder_path) if file.endswith(".jpg") or file.endswith(".png")]

    # Determine the number of samples in the subfolder
    num_samples = len(image_files)

    # Calculate the number of samples to keep and to generate
    samples_to_keep = min(desired_samples_per_class, num_samples)
    samples_to_generate = desired_samples_per_class - samples_to_keep

    # Move randomly selected samples to the balanced folder
    random.shuffle(image_files)
    
    os.makedirs(os.path.join(balanced_folder, subfolder_name), exist_ok=True)

    for i in range(samples_to_keep):
        file = image_files[i]
        source_path = os.path.join(subfolder_path, file)
        target_path = os.path.join(balanced_folder, subfolder_name, file)
        shutil.copy(source_path, target_path)

    # Generate augmented samples for the minority class
    for i in range(samples_to_generate):
        file = random.choice(image_files)
        source_path = os.path.join(subfolder_path, file)
        target_filename = f"augmented_{i}_{file}"
        target_path = os.path.join(balanced_folder, subfolder_name, target_filename)

        img = image.load_img(source_path)
        x = image.img_to_array(img)

        # Reshape the image to match the expected input shape of the data generator
        x = np.expand_dims(x, axis=0)

        # Generate augmented image
        augmented_images = datagen.flow(x, batch_size=1)

        # Convert the augmented image back to PIL format
        augmented_image = image.array_to_img(augmented_images[0][0])

        # Save the augmented image
        augmented_image.save(target_path)

print("Balanced dataset with augmented samples created!")


Balanced dataset with augmented samples created!
