In [1]:
import os
import cv2
import numpy as np
from tensorflow.keras.preprocessing.image import ImageDataGenerator


In [2]:
# Set paths
dataset_path = "dataset/data/"
augmented_path = "dataset/augmented_train/"

In [3]:

# Define categories
categories = ["healthy", "powdery_mildew", "new_disease"]

In [4]:
# Create output folders
for category in categories:
    os.makedirs(os.path.join(augmented_path, category), exist_ok=True)

In [5]:
# Data augmentation settings
datagen = ImageDataGenerator(
    rotation_range=40,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    brightness_range=[0.5, 1.5],
    fill_mode="nearest"
)

In [6]:
# Generate images
for category in categories:
    img_folder = os.path.join(dataset_path, category)
    save_folder = os.path.join(augmented_path, category)

    for img_name in os.listdir(img_folder):
        img_path = os.path.join(img_folder, img_name)
        img = cv2.imread(img_path)
        img = cv2.resize(img, (224, 224))  # Resize images to 224x224
        img = np.expand_dims(img, axis=0)  # Add batch dimension

        i = 0
        for batch in datagen.flow(img, batch_size=1, save_to_dir=save_folder, save_prefix="aug", save_format="jpg"):
            i += 1
            if i >= 50:  # Generate 50 new images per original image
                break

print("Data Augmentation Complete")

Data Augmentation Complete


In [7]:
import os
import shutil
import random

# Paths
augmented_dataset = "dataset/augmented_train/"
output_dataset = "dataset_final/"

# Create train/val/test folders
for subset in ["train", "val", "test"]:
    for category in categories:
        os.makedirs(os.path.join(output_dataset, subset, category), exist_ok=True)

# Split dataset
for category in categories:
    img_files = os.listdir(os.path.join(augmented_dataset, category))
    random.shuffle(img_files)

    train_split = int(0.8 * len(img_files))
    val_split = int(0.9 * len(img_files))

    for i, file in enumerate(img_files):
        src_path = os.path.join(augmented_dataset, category, file)

        if i < train_split:
            dest_path = os.path.join(output_dataset, "train", category, file)
        elif i < val_split:
            dest_path = os.path.join(output_dataset, "val", category, file)
        else:
            dest_path = os.path.join(output_dataset, "test", category, file)

        shutil.copy(src_path, dest_path)

print("Dataset Split Completed")

Dataset Split Completed
