In [2]:
import os
import random
import shutil

# === Parameters ===
dataset_dir = "/home/adelb/Documents/Bpartners/revets/dataset1"
images_dir = "/home/adelb/Documents/Bpartners/revets/dataset1/images"
labels_dir = "/home/adelb/Documents/Bpartners/revets/dataset1/labels"
split_ratio = 0.8  # 80% train, 20% val
random.seed(42)

# === Create new output structure ===
for split in ['train', 'val']:
    os.makedirs(os.path.join(dataset_dir, split, 'images'), exist_ok=True)
    os.makedirs(os.path.join(dataset_dir, split, 'labels'), exist_ok=True)

# === Collect and shuffle image files ===
image_files = [f for f in os.listdir(images_dir) if f.endswith(('.jpg'))]
random.shuffle(image_files)

# === Split ===
split_index = int(len(image_files) * split_ratio)
train_files = image_files[:split_index]
val_files = image_files[split_index:]

def move_files(file_list, split):
    for image_file in file_list:
        basename = os.path.splitext(image_file)[0]
        label_file = f"{basename}.txt"

        src_image = os.path.join(images_dir, image_file)
        src_label = os.path.join(labels_dir, label_file)

        dst_image = os.path.join(dataset_dir, split, 'images', image_file)
        dst_label = os.path.join(dataset_dir, split, 'labels', label_file)

        if os.path.exists(src_label):  # Ensure label exists
            shutil.copy(src_image, dst_image)
            shutil.copy(src_label, dst_label)
        else:
            print(f"⚠️  Label not found for: {image_file}")

# === Perform file moves ===
move_files(train_files, 'train')
move_files(val_files, 'val')

print("✅ Dataset split complete with folder structure:")
print("dataset/train/images, dataset/train/labels")
print("dataset/val/images, dataset/val/labels")


✅ Dataset split complete with folder structure:
dataset/train/images, dataset/train/labels
dataset/val/images, dataset/val/labels
