In [1]:
import os, glob, shutil, random
from collections import defaultdict
from pathlib import Path

In [2]:
# Create a Path object for the old folder
old_folder_path = Path('SkinProblem/train')

# Define the new folder name
new_folder_name = 'SkinProblem/dummy'

# Rename the folder using the .rename() method
try:
    old_folder_path.rename(new_folder_name)
    print(f"Folder '{old_folder_path.name}' has been renamed to '{new_folder_name}'.")
except FileNotFoundError:
    print(f"Error: The folder '{old_folder_path.name}' does not exist.")

Folder 'train' has been renamed to 'SkinProblem/dummy'.


In [3]:
# ------------------------------
# Settings
# ------------------------------
base_dir = "SkinProblem"   # Root folder where final splits will be created
src_dir = os.path.join(base_dir, "dummy")  # Current folder with all images and labels
src_img_dir = os.path.join(src_dir, "images")
src_lbl_dir = os.path.join(src_dir, "labels")

In [4]:
# Split ratios
split_ratio = {"train": 0.85, "valid": 0.13, "test": 0.02}

# ------------------------------
# Collect all label files
# ------------------------------
label_files = glob.glob(os.path.join(src_lbl_dir, "*.txt"))

# Group by class (first annotation in each file)
class_files = defaultdict(list)
for lbl_file in label_files:
    with open(lbl_file) as f:
        first_line = f.readline().strip()
        if first_line:
            cls = int(first_line.split()[0])
            class_files[cls].append(lbl_file)

print("\nðŸ“Š Class distribution before splitting:")
for cls, files in class_files.items():
    print(f"Class {cls}: {len(files)} images")


ðŸ“Š Class distribution before splitting:
Class 1: 2340 images
Class 0: 1764 images
Class 2: 2409 images


In [5]:
# ------------------------------
# Create folders for splits
# ------------------------------
splits = ["train", "valid", "test"]
for split in splits:
    for sub in ["images", "labels"]:
        os.makedirs(os.path.join(base_dir, split, sub), exist_ok=True)

In [6]:
# ------------------------------
# Perform stratified split
# ------------------------------
for cls, files in class_files.items():
    random.shuffle(files)
    n = len(files)
    n_train = int(split_ratio["train"] * n)
    n_valid = int(split_ratio["valid"] * n)

    split_files = {
        "train": files[:n_train],
        "valid": files[n_train:n_train+n_valid],
        "test": files[n_train+n_valid:]
    }

    for split, lbl_list in split_files.items():
        for lbl_file in lbl_list:
            # Copy label
            dst_lbl = os.path.join(base_dir, split, "labels", os.path.basename(lbl_file))
            shutil.copy(lbl_file, dst_lbl)

            # Copy corresponding image
            img_file = os.path.join(src_img_dir, Path(lbl_file).stem + ".jpg")
            dst_img = os.path.join(base_dir, split, "images", os.path.basename(img_file))
            if os.path.exists(img_file):
                shutil.copy(img_file, dst_img)

print("\nâœ… Stratified train/valid/test split complete!")


âœ… Stratified train/valid/test split complete!


In [7]:
import shutil
import os

folder_path = 'SkinProblem/dummy'

# It's a good practice to check if the folder exists before deleting
if os.path.exists(folder_path):
    try:
        shutil.rmtree(folder_path)
        print(f"Successfully deleted the folder: {folder_path}")
    except OSError as e:
        print(f"Error: {folder_path} : {e.strerror}")
else:
    print(f"The folder '{folder_path}' does not exist.")

Successfully deleted the folder: SkinProblem/dummy
