In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import os
import shutil
import random
from pathlib import Path

# === CONFIG ===
dataset_dir = r'/content/drive/MyDrive'  # Root directory where 'images/' and 'labels/' folders from label studio are
image_exts = ['.jpg', '.jpeg', '.png']  # Supported image formats
train_ratio = 0.7
val_ratio = 0.2
test_ratio = 0.1


# === Prepare Split Directories ===
def make_dirs():
    for split in ['train', 'val', 'test']:
        os.makedirs(os.path.join(dataset_dir, split, 'images'), exist_ok=True)
        os.makedirs(os.path.join(dataset_dir, split, 'labels'), exist_ok=True)


# === Get List of Image Files ===
def list_image_files(image_folder):
    return [f for f in os.listdir(image_folder) if Path(f).suffix.lower() in image_exts]


# === Split Dataset ===
def split_files(files):
    random.shuffle(files)
    total = len(files)
    train_end = int(total * train_ratio)
    val_end = train_end + int(total * val_ratio)
    return {
        'train': files[:train_end],
        'val': files[train_end:val_end],
        'test': files[val_end:]
    }


# === Copy Images and Corresponding Labels ===
def copy_files(split_files_dict, image_dir, label_dir):
    for split, files in split_files_dict.items():
        for file in files:
            stem = Path(file).stem
            img_src = os.path.join(image_dir, file)
            lbl_src = os.path.join(label_dir, stem + '.txt')

            img_dst = os.path.join(dataset_dir, split, 'images', file)
            lbl_dst = os.path.join(dataset_dir, split, 'labels', stem + '.txt')

            shutil.copy2(img_src, img_dst)
            if os.path.exists(lbl_src):
                shutil.copy2(lbl_src, lbl_dst)
            else:
                print(f"Warning: Label for {file} not found.")


# === Main Function ===
def main():
    image_folder = os.path.join(dataset_dir, 'images')
    label_folder = os.path.join(dataset_dir, 'labels')

    make_dirs()
    all_images = list_image_files(image_folder)
    splits = split_files(all_images)
    copy_files(splits, image_folder, label_folder)
    print("Dataset successfully split into train/val/test folders.")


if __name__ == '__main__':
    main()


Dataset successfully split into train/val/test folders.
