In [1]:
import os
import shutil
from math import floor

# Input and output directories
input_dir = 'conc'  # Folder containing original images (Negative and Positive folders)
output_dir = 'data_concrete'  # Folder to save the divided datasets
os.makedirs(output_dir, exist_ok=True)

# Create subdirectories for train, val, and test
for split in ['train', 'val', 'test']:
    for category in ['Negative', 'Positive']:
        os.makedirs(os.path.join(output_dir, split, category), exist_ok=True)

# Function to split images
def split_images(input_category_dir, output_dir, category, train_ratio=0.7, val_ratio=0.15):
    images = [img for img in os.listdir(input_category_dir) if img.lower().endswith(('.jpg', '.jpeg', '.png'))]
    total_images = len(images)
    
    # Calculate split sizes
    train_size = floor(total_images * train_ratio)
    val_size = floor(total_images * val_ratio)
    test_size = total_images - train_size - val_size

    # Shuffle images to ensure randomness
    images = sorted(images)  # Or use random.shuffle(images) for true randomness

    # Split the images
    train_images = images[:train_size]
    val_images = images[train_size:train_size + val_size]
    test_images = images[train_size + val_size:]

    # Copy images to respective folders
    for img_name in train_images:
        shutil.copy(os.path.join(input_category_dir, img_name), os.path.join(output_dir, 'train', category, img_name))
    for img_name in val_images:
        shutil.copy(os.path.join(input_category_dir, img_name), os.path.join(output_dir, 'val', category, img_name))
    for img_name in test_images:
        shutil.copy(os.path.join(input_category_dir, img_name), os.path.join(output_dir, 'test', category, img_name))

# Process Negative and Positive folders
for category in ['Negative', 'Positive']:
    category_dir = os.path.join(input_dir, category)
    split_images(category_dir, output_dir, category)

print(f"Images from '{input_dir}' have been divided into train, val, and test sets in '{output_dir}'.")


Images from 'conc' have been divided into train, val, and test sets in 'data_concrete'.
