In [1]:
import os
import shutil
import random
from glob import glob

# Dataset paths
dataset_path = "../data"
train_labels = os.path.join(dataset_path, "train", "labels")
train_images = os.path.join(dataset_path, "train", "images")
test_labels = os.path.join(dataset_path, "test", "labels")
test_images = os.path.join(dataset_path, "test", "images")

# Class index for "disgust"
# Check your data.yaml: ["angry", "disgust", "fear", ...] → disgust = 1
target_class = "1"

# Collect all train labels containing "disgust"
disgust_files = []
for label_path in glob(os.path.join(train_labels, "*.txt")):
    with open(label_path, "r") as f:
        lines = f.readlines()
    if any(line.startswith(target_class + " ") or line.strip() == target_class for line in lines):
        disgust_files.append(label_path)

# Shuffle and pick 15%
random.shuffle(disgust_files)
n_move = max(1, int(0.15 * len(disgust_files)))  # at least 1
files_to_move = disgust_files[:n_move]

print(f"Found {len(disgust_files)} 'disgust' samples in train. Moving {n_move} to test...")

# Move files
for label_path in files_to_move:
    base = os.path.splitext(os.path.basename(label_path))[0]

    # Move label
    dest_label = os.path.join(test_labels, os.path.basename(label_path))
    shutil.move(label_path, dest_label)

    # Move image (check extension)
    for ext in [".jpg", ".jpeg", ".png"]:
        img_path = os.path.join(train_images, base + ext)
        if os.path.exists(img_path):
            dest_img = os.path.join(test_images, os.path.basename(img_path))
            shutil.move(img_path, dest_img)
            print(f"Moved {img_path} → {dest_img}")
            break


Found 390 'disgust' samples in train. Moving 58 to test...
Moved ../data/train/images/Training_89701855_jpg.rf.83d24f94f396d9fd311387ce7a1332d5.jpg → ../data/test/images/Training_89701855_jpg.rf.83d24f94f396d9fd311387ce7a1332d5.jpg
Moved ../data/train/images/Training_53679453_jpg.rf.fe826563f144b11d047e933c0479f721.jpg → ../data/test/images/Training_53679453_jpg.rf.fe826563f144b11d047e933c0479f721.jpg
Moved ../data/train/images/1_jpg.rf.e5777d37a413fbdd5c9bd6f5ed120d97.jpg → ../data/test/images/1_jpg.rf.e5777d37a413fbdd5c9bd6f5ed120d97.jpg
Moved ../data/train/images/Training_62572176_jpg.rf.75197cb994176289ba373a11d6b67064.jpg → ../data/test/images/Training_62572176_jpg.rf.75197cb994176289ba373a11d6b67064.jpg
Moved ../data/train/images/Training_65372676_jpg.rf.42f401a2b05d734050012b4dd83010b9.jpg → ../data/test/images/Training_65372676_jpg.rf.42f401a2b05d734050012b4dd83010b9.jpg
Moved ../data/train/images/18_jpg.rf.92c9864e2065368a8dab4a2350b2349b.jpg → ../data/test/images/18_jpg.rf.92