In [5]:
import os
from sklearn.model_selection import train_test_split
import shutil

def split_dataset(input_folder, output_train_folder, output_test_folder, test_size=0.2, random_state=42):
    # Get the list of classes (subfolders)
    classes = [d for d in os.listdir(input_folder) if os.path.isdir(os.path.join(input_folder, d))]
    
    for class_name in classes:
        class_path = os.path.join(input_folder, class_name)
        images = [f for f in os.listdir(class_path) if f.endswith(('.jpg', '.jpeg', '.png'))]
        
        # Split the images into train and test sets
        train_images, test_images = train_test_split(images, test_size=test_size, random_state=random_state)
        
        # Copy the images to the output folders
        for image in train_images:
            src_path = os.path.join(class_path, image)
            dest_path = os.path.join(output_train_folder, class_name, image)
            os.makedirs(os.path.join(output_train_folder, class_name), exist_ok=True)
            shutil.copy(src_path, dest_path)
        
        for image in test_images:
            src_path = os.path.join(class_path, image)
            dest_path = os.path.join(output_test_folder, image)
            shutil.copy(src_path, dest_path)

# Example usage:
input_folder = "./NWPU-Captions/NWPU_images/"
output_train_folder = "./NWPU-Captions/Train"
output_test_folder = "./NWPU-Captions/Test"

split_dataset(input_folder, output_train_folder, output_test_folder, test_size=0.2, random_state=42)
