In [None]:
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator, img_to_array, load_img
import os
import numpy as np

# Base input directory where your dataset is stored
input_dir = '/content/drive/MyDrive/Malaria2'

# Set up augmentation parameters
datagen = ImageDataGenerator(
    rotation_range=40,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

# Helper function to count images in a directory
def count_images(directory):
    return len([f for f in os.listdir(directory) if f.endswith(('.jpg', '.png', '.tif'))])

# Define paths for each category
categories = {
    'Uninfected': os.path.join(input_dir, 'Uninfected'),
    'Falciparum': os.path.join(input_dir, 'infected/Falciparum'),
    'Vivax': os.path.join(input_dir, 'infected/Vivax'),
    'Ovale': os.path.join(input_dir, 'infected/Ovale'),
    'Malariae': os.path.join(input_dir, 'infected/Malariae')
}

# Define the target number of images per category to reach 3000 total images or more
target_images_per_class = 600  # Adjust this value if you want more than 3000 images in total

# Function to augment images to reach the target count for each category
def augment_directory(directory, target_count):
    current_count = count_images(directory)
    files = [f for f in os.listdir(directory) if f.endswith(('.jpg', '.png', '.tif'))]

    # Loop through files and apply augmentation until the target count is reached
    for filename in files:
        if current_count >= target_count:
            break

        img_path = os.path.join(directory, filename)
        img = load_img(img_path)
        x = img_to_array(img)
        x = np.expand_dims(x, axis=0)

        # Generate images in batches to reach the target count
        for batch in datagen.flow(x, batch_size=1, save_to_dir=directory,
                                  save_prefix='aug', save_format='jpeg'):
            current_count += 1
            if current_count >= target_count:
                break

# Apply augmentation to each category to ensure at least 600 images per category
for category, path in categories.items():
    augment_directory(path, target_images_per_class)


In [None]:
import os

# Base input directory where your dataset is stored
input_dir = '/content/drive/MyDrive/Malaria2'

# Function to count images in a directory and its subdirectories
def count_images_in_dataset(directory):
    total_images = 0
    for root, dirs, files in os.walk(directory):
        total_images += len([f for f in files if f.endswith(('.jpg', '.jpeg', '.png', '.tif'))])
    return total_images

# Count total images in the dataset (including augmented images)
total_images = count_images_in_dataset(input_dir)
print(f"Total number of images in the dataset (including augmented images): {total_images}")


Total number of images in the dataset (including augmented images): 2932
