In [None]:
# Randomly deleting images if input image > target image
import os
import numpy as np
import cv2
from tqdm import tqdm
import albumentations as A
import uuid
import random
import shutil

def resize_image(image, target_width=299, target_height=299):
    """
    Resize an image to the target width and height.
    """
    transform = A.Compose([
        A.Resize(height=target_height, width=target_width, p=1.0)
    ])
    resized = transform(image=image)
    return resized['image']
    
def augment_image(image):
    """
    Apply augmentations to an image.
    """
    transform = A.Compose([
        A.HorizontalFlip(p=0.5),
        A.VerticalFlip(p=0.5),
        A.RandomBrightnessContrast(p=0.5),
        A.Rotate(limit=20, p=0.5),
        A.RandomScale(scale_limit=0.2, p=0.5),
        A.Resize(height=299, width=299, p=1.0)
    ])
    augmented = transform(image=image)
    return augmented['image']

def process_images(input_folder, output_folder, target_count=1000):
    """
    Process images from the input folder, resizing originals, augmenting as necessary, 
    and deleting extra images to ensure the output folder contains exactly target_count images.
    """
    # Get all image files from the input folder
    image_files = [f for f in os.listdir(input_folder) if f.lower().endswith(('.png', '.jpg', '.jpeg'))]
    original_count = len(image_files)

    os.makedirs(output_folder, exist_ok=True)

    if original_count == 0:
        print(f"No images found in {input_folder}, skipping processing.")
        return
    print(f"Resizing and saving images to {output_folder}...")
    for img_file in image_files:
        input_img_path = os.path.join(input_folder, img_file)
        image = cv2.imread(input_img_path)
        if image is None:
            print(f"Failed to read image: {input_img_path}")
            continue
        
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        resized_image = resize_image(image)
        output_img_path = os.path.join(output_folder, f'orig_{uuid.uuid4().hex}.png')
        # cv2.imwrite(output_img_path, cv2.cvtColor(resized_image, cv2.COLOR_RGB2BGR))
        cv2.imwrite(output_img_path, cv2.cvtColor(resized_image, cv2.COLOR_RGB2BGR), [cv2.IMWRITE_JPEG_QUALITY, 80])

    resized_files = os.listdir(output_folder)

    if len(resized_files) > target_count:
        print(f"Too many images in {output_folder}. Deleting excess images...")
        excess_files = random.sample(resized_files, len(resized_files) - target_count)
        for file in excess_files:
            os.remove(os.path.join(output_folder, file))
        print(f"Reduced images in {output_folder} to {target_count}.")
        return

    if len(resized_files) < target_count:
        print(f"Generating augmented images for {output_folder}...")
        for i in tqdm(range(target_count - len(resized_files))):
            img_file = random.choice(resized_files)
            img_path = os.path.join(output_folder, img_file)

            image = cv2.imread(img_path)
            if image is None:
                print(f"Failed to read image: {img_path}")
                continue

            image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
            aug_image = augment_image(image)        
            new_filename = os.path.join(output_folder, f'aug_{uuid.uuid4().hex}.png')
            # cv2.imwrite(new_filename, cv2.cvtColor(aug_image, cv2.COLOR_RGB2BGR))
            cv2.imwrite(new_filename, cv2.cvtColor(aug_image, cv2.COLOR_RGB2BGR), [cv2.IMWRITE_JPEG_QUALITY, 80])

    print(f"Processing complete. Total images in {output_folder}: {len(os.listdir(output_folder))}.")

input_base_folder = 'D:\\Major Project\\Dataset\\Krishi'
output_base_folder = 'D:\\Major Project\\Dataset\\Segmented_Augmented\\Augmented_Random_Sampling'

for class_folder in os.listdir(input_base_folder):
    input_folder_path = os.path.join(input_base_folder, class_folder)
    output_folder_path = os.path.join(output_base_folder, class_folder)

    if not os.path.isdir(input_folder_path):
        continue

    process_images(input_folder_path, output_folder_path, target_count=1000)


Resizing and saving images to D:\Major Project\Dataset\Colored_Augmented\Papaya_Healthy...
Generating augmented images for D:\Major Project\Dataset\Colored_Augmented\Papaya_Healthy...


  6%|█████                                                                            | 53/856 [00:02<00:41, 19.55it/s]

In [4]:
# Deleting images according to quality of images if input image > target image
import os
import random
import cv2
import numpy as np
from tqdm import tqdm
import albumentations as A
import shutil
import uuid

# Function to check if an image is sharp
def is_image_sharp(image_path, threshold=100):
    try:
        image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
        laplacian_var = cv2.Laplacian(image, cv2.CV_64F).var()
        return laplacian_var > threshold
    except:
        return False

def resize_image(image, target_width=299, target_height=299):
    transform = A.Compose([
        A.Resize(height=target_height, width=target_width, p=1.0)
    ])
    resized = transform(image=image)
    return resized['image']

def augment_image(image):
    transform = A.Compose([
        A.HorizontalFlip(p=0.5),
        A.VerticalFlip(p=0.5),
        A.RandomBrightnessContrast(p=0.5),
        A.Rotate(limit=20, p=0.5),
        A.RandomScale(scale_limit=0.2, p=0.5),
        A.Resize(height=299, width=299, p=1.0) 
    ])
    augmented = transform(image=image)
    return augmented['image']

def process_images(input_folder, output_folder, target_count=1000, sharpness_threshold=100):
    image_files = [f for f in os.listdir(input_folder) if f.lower().endswith(('.png', '.jpg', '.jpeg'))]
    if len(image_files) == 0:
        print(f"No images found in {input_folder}, skipping.")
        return
    filtered_images = [f for f in image_files if is_image_sharp(os.path.join(input_folder, f), sharpness_threshold)]

    if len(filtered_images) == 0:
        print(f"No sharp images found in {input_folder}, skipping augmentation.")
        return

    print(f"Filtered {len(filtered_images)} sharp images out of {len(image_files)} in {input_folder}.")

    os.makedirs(output_folder, exist_ok=True)
    def save_resized_images(images, destination_folder):
        for img in images:
            img_path = os.path.join(input_folder, img)
            image = cv2.imread(img_path)
            if image is None:
                print(f"Failed to read image: {img_path}")
                continue

            image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
            resized_image = resize_image(image)
            output_path = os.path.join(destination_folder, f'resized_{uuid.uuid4().hex}.png')
            # cv2.imwrite(output_path, cv2.cvtColor(resized_image, cv2.COLOR_RGB2BGR))
            cv2.imwrite(output_path, cv2.cvtColor(resized_image, cv2.COLOR_RGB2BGR), [cv2.IMWRITE_JPEG_QUALITY, 80])

    # If filtered images are more than the target count, randomly sample and resize them
    if len(filtered_images) > target_count:
        selected_images = random.sample(filtered_images, target_count)
        save_resized_images(selected_images, output_folder)
        print(f"Reduced to {target_count} images and saved to {output_folder}.")
        return

    save_resized_images(filtered_images, output_folder)

    print(f"Augmenting images to meet target count of {target_count}...")
    for i in tqdm(range(target_count - len(filtered_images))):
        img_file = random.choice(filtered_images)
        img_path = os.path.join(input_folder, img_file)

        image = cv2.imread(img_path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        aug_image = augment_image(image)
        new_filename = os.path.join(output_folder, f'aug_{uuid.uuid4().hex}.png')
        # cv2.imwrite(new_filename, cv2.cvtColor(aug_image, cv2.COLOR_RGB2BGR))
        cv2.imwrite(new_filename, cv2.cvtColor(aug_image, cv2.COLOR_RGB2BGR), [cv2.IMWRITE_JPEG_QUALITY, 80])

    print(f"Augmentation complete. Total images in {output_folder}: {len(os.listdir(output_folder))}.")

input_base_folder = 'D:\\Major Project\\Dataset\\Krishi_Mitra_Segmented\\Tomato___healthy'
output_base_folder = 'D:\\Major Project\\Dataset\\Colored_Augmented\\Augmented_Based_On_Quality'

for class_folder in os.listdir(input_base_folder):
    input_folder_path = os.path.join(input_base_folder, class_folder)
    output_folder_path = os.path.join(output_base_folder, class_folder)

    if not os.path.isdir(input_folder_path):
        continue

    process_images(input_folder_path, output_folder_path, target_count=1000, sharpness_threshold=100)