In [1]:
import os
import random
import numpy as np
from PIL import Image
from tensorflow.keras.preprocessing.image import ImageDataGenerator

In [3]:

def balance_dataset(Oblique_INPUT_FOLDER,Oblique_OUTPUT_FOLDER, Overriding_INPUT_FOLDER, Overriding_OUTPUT_FOLDER, image_size=(224, 224)):
    
    # Function to load and resize images from a folder
    def load_and_process(folder_path):
        images = []
        for filename in os.listdir(folder_path):
            if filename.lower().endswith(('.jpg', '.jpeg', '.png')):
                img_path = os.path.join(folder_path, filename)
                img = Image.open(img_path).convert('RGB').resize(image_size)
                images.append(np.array(img))
        return images

    # Process Oblique images
    oblique_images = load_and_process(Oblique_INPUT_FOLDER)
    current_oblique_count = len(oblique_images)

    overriding_images = load_and_process(Overriding_INPUT_FOLDER)
    current_overriding_count = len(overriding_images)    
    print(f'Found : {current_oblique_count} oblique, {current_overriding_count} overriding.')
    
    needed = abs(current_overriding_count- current_oblique_count)
    minimum_type = 0 if current_oblique_count<current_overriding_count else 1
    oblique_needed = needed if minimum_type==1 else needed*2
    overriding_needed = needed if minimum_type==0 else needed*2
    # Generate augmented images if needed
    oblique_augmented_images =[]
    overriding_augmented_images =[]
    if needed > 0:
        datagen = ImageDataGenerator(rotation_range=40,shear_range=0.2,zoom_range=0.2,fill_mode='constant')
        for _ in range(oblique_needed):
            img = random.choice(oblique_images if minimum_type==0 else overriding_images)
            img_expanded = np.expand_dims(img, axis=0)
            aug_img = datagen.flow(img_expanded, batch_size=1)[0].astype(np.uint8)[0]
            oblique_augmented_images.append(aug_img)
        oblique_images+= oblique_augmented_images
        
        for _ in range(overriding_needed):
            img = random.choice(overriding_images  if minimum_type==0 else oblique_images)
            img_expanded = np.expand_dims(img, axis=0)
            aug_img = datagen.flow(img_expanded, batch_size=1)[0].astype(np.uint8)[0]
            overriding_augmented_images.append(aug_img)
        overriding_images +=overriding_augmented_images
        
    # Save augmented Oblique images
    for idx, img in enumerate(oblique_images):
        Image.fromarray(img).save(os.path.join(Oblique_OUTPUT_FOLDER, f'oblique_{idx}.png'))

    # Process and save Overriding images
    for idx, img in enumerate(overriding_images):
        Image.fromarray(img).save(os.path.join(Overriding_OUTPUT_FOLDER, f'overriding_{idx}.png'))

    print(f"Saved {len(oblique_images)} Oblique images to {Oblique_OUTPUT_FOLDER}")
    print(f"Saved {len(overriding_images)} Overriding images to {Overriding_OUTPUT_FOLDER}")



In [None]:
if __name__ == "__main__":
    Base_Folder = 'D:/Learning/University of sadat/Grade 4/Semester 2/06- Graduation Project/Coding/00- The DataSet/' 
    #### # for oblique images
    Oblique_INPUT_FOLDER = f'{Base_Folder}Augmented_DataSet/Oblique/'
    Oblique_OUTPUT_FOLDER = f'{Base_Folder}Balanced_DataSet/Oblique/'
    
    #### # for oblique images
    Overriding_INPUT_FOLDER = f'{Base_Folder}Augmented_DataSet/Overriding/'
    Overriding_OUTPUT_FOLDER = f'{Base_Folder}Balanced_DataSet/Overriding/'
    
    os.makedirs(Oblique_OUTPUT_FOLDER, exist_ok=True)
    os.makedirs(Overriding_OUTPUT_FOLDER, exist_ok=True)
    balance_dataset(Oblique_INPUT_FOLDER,Oblique_OUTPUT_FOLDER, Overriding_INPUT_FOLDER, Overriding_OUTPUT_FOLDER)


Found : 1446 oblique, 2843 overriding.
Saved 4240 Oblique images to D:/Learning/University of sadat/Grade 4/Semester 2/06- Graduation Project/Coding/Balanced_DataSet/Oblique/
Saved 4240 Overriding images to D:/Learning/University of sadat/Grade 4/Semester 2/06- Graduation Project/Coding/Balanced_DataSet/Overriding/
