In [None]:
import os
from glob import glob

# Configuration - UPDATE THESE PATHS IF NEEDED
test_images_dir = r'D:\New folder (2)\train\images'  # Use raw string for Windows paths
test_labels_dir = r'D:\New folder (2)\train\labels'  # Use raw string for Windows paths
classes_to_remove = {2}  # guava(2), lemon (3), pomegranate (5)

def process_dataset():
    deleted_pairs = 0
    modified_labels = 0
    total_processed = 0

    # Check if directories exist
    if not os.path.exists(test_images_dir):
        print(f"Error: Image directory not found at {os.path.abspath(test_images_dir)}")
        return
        
    if not os.path.exists(test_labels_dir):
        print(f"Error: Labels directory not found at {os.path.abspath(test_labels_dir)}")
        return

    # Get list of image files with common extensions (e.g., .jpg, .png)
    image_files = glob(os.path.join(test_images_dir, '*.*'))
    print(f"Found {len(image_files)} image files in directory")
    
    for image_path in image_files:
        total_processed += 1
        image_filename = os.path.basename(image_path)
        print(f"\nProcessing: {image_filename}")
        
        # Get corresponding label path
        base_name = os.path.splitext(image_filename)[0]
        label_path = os.path.join(test_labels_dir, f"{base_name}.txt")
        
        if not os.path.exists(label_path):
            print(f"Warning: No label file found for {image_filename}")
            continue

        print(f"Found label: {os.path.basename(label_path)}")

        # Read and filter labels
        try:
            with open(label_path, 'r') as f:
                original_lines = f.readlines()
        except Exception as e:
            print(f"Error reading {label_path}: {str(e)}")
            continue

        filtered_lines = []
        for line in original_lines:
            parts = line.strip().split()
            if not parts:
                continue
            try:
                class_id = int(parts[0])
                if class_id not in classes_to_remove:
                    filtered_lines.append(line)
                else:
                    print(f"Removing class {class_id} from {label_path}")
            except ValueError:
                print(f"Invalid class ID in {label_path}: {parts[0]}")
                continue

        # Track modifications and deletions
        if len(filtered_lines) < len(original_lines):
            if len(filtered_lines) > 0:
                modified_labels += 1
                try:
                    with open(label_path, 'w') as f:
                        f.writelines(filtered_lines)
                    print(f"Updated {label_path}")
                except Exception as e:
                    print(f"Error writing {label_path}: {str(e)}")
            else:
                try:
                    os.remove(label_path)
                    os.remove(image_path)
                    deleted_pairs += 1
                    print(f"Deleted {image_filename} and its label")
                except Exception as e:
                    print(f"Error deleting files: {str(e)}")

    print("\nProcessing Report:")
    print(f"Total images processed: {total_processed}")
    print(f"Deleted image-label pairs: {deleted_pairs}")
    print(f"Modified label files: {modified_labels}")
    print(f"Remaining image-label pairs: {total_processed - deleted_pairs}")

if __name__ == '__main__':
    process_dataset()

Found 937 image files in directory

Processing: -00001_jpeg_jpg.rf.5ca3dee4db63b1f9c5a3436e19a1f8fa.jpg
Found label: -00001_jpeg_jpg.rf.5ca3dee4db63b1f9c5a3436e19a1f8fa.txt

Processing: -00001_jpeg_jpg.rf.8eb70ee2d7584702b20a3b7515f5b457.jpg
Found label: -00001_jpeg_jpg.rf.8eb70ee2d7584702b20a3b7515f5b457.txt

Processing: -00001_jpeg_jpg.rf.bb289c7fc73eaf6f71925d73293afe5e.jpg
Found label: -00001_jpeg_jpg.rf.bb289c7fc73eaf6f71925d73293afe5e.txt

Processing: -00002_jpeg_jpg.rf.6988cb8a83ac5fc96efd63363b7dd287.jpg
Found label: -00002_jpeg_jpg.rf.6988cb8a83ac5fc96efd63363b7dd287.txt

Processing: -00002_jpeg_jpg.rf.cb6155b9ef517b2a97cf94fa1a263685.jpg
Found label: -00002_jpeg_jpg.rf.cb6155b9ef517b2a97cf94fa1a263685.txt

Processing: -00003_jpeg_jpg.rf.0af187eb90c8923b8dddcda7b912e114.jpg
Found label: -00003_jpeg_jpg.rf.0af187eb90c8923b8dddcda7b912e114.txt

Processing: -00003_jpeg_jpg.rf.3969396e62acba36c79b8783955a2de3.jpg
Found label: -00003_jpeg_jpg.rf.3969396e62acba36c79b8783955a2de3.txt

In [5]:
import os
from glob import glob

# Configuration - UPDATE THESE PATHS IF NEEDED
ttest_images_dir = r'D:\fruit dataset COMP5544.v1-fruit-image-recognition.yolov11\train\images'  # Use raw string for Windows paths
test_labels_dir = r'D:\fruit dataset COMP5544.v1-fruit-image-recognition.yolov11\train\labels'  # Use raw string for Windows paths
classes_to_remove = {2, 3 ,5}  # guava(2), lemon (3), pomegranate (5)

def process_dataset():
    deleted_pairs = 0
    modified_labels = 0
    total_processed = 0

    # Check if directories exist
    if not os.path.exists(test_images_dir):
        print(f"Error: Image directory not found at {os.path.abspath(test_images_dir)}")
        return
        
    if not os.path.exists(test_labels_dir):
        print(f"Error: Labels directory not found at {os.path.abspath(test_labels_dir)}")
        return

    # Get list of image files with common extensions (e.g., .jpg, .png)
    image_files = glob(os.path.join(test_images_dir, '*.*'))
    print(f"Found {len(image_files)} image files in directory")
    
    for image_path in image_files:
        total_processed += 1
        image_filename = os.path.basename(image_path)
        print(f"\nProcessing: {image_filename}")
        
        # Get corresponding label path
        base_name = os.path.splitext(image_filename)[0]
        label_path = os.path.join(test_labels_dir, f"{base_name}.txt")
        
        if not os.path.exists(label_path):
            print(f"Warning: No label file found for {image_filename}")
            continue

        print(f"Found label: {os.path.basename(label_path)}")

        # Read and filter labels
        try:
            with open(label_path, 'r') as f:
                original_lines = f.readlines()
        except Exception as e:
            print(f"Error reading {label_path}: {str(e)}")
            continue

        filtered_lines = []
        for line in original_lines:
            parts = line.strip().split()
            if not parts:
                continue
            try:
                class_id = int(parts[0])
                if class_id not in classes_to_remove:
                    filtered_lines.append(line)
                else:
                    print(f"Removing class {class_id} from {label_path}")
            except ValueError:
                print(f"Invalid class ID in {label_path}: {parts[0]}")
                continue

        # Track modifications and deletions
        if len(filtered_lines) < len(original_lines):
            if len(filtered_lines) > 0:
                modified_labels += 1
                try:
                    with open(label_path, 'w') as f:
                        f.writelines(filtered_lines)
                    print(f"Updated {label_path}")
                except Exception as e:
                    print(f"Error writing {label_path}: {str(e)}")
            else:
                try:
                    os.remove(label_path)
                    os.remove(image_path)
                    deleted_pairs += 1
                    print(f"Deleted {image_filename} and its label")
                except Exception as e:
                    print(f"Error deleting files: {str(e)}")

    print("\nProcessing Report:")
    print(f"Total images processed: {total_processed}")
    print(f"Deleted image-label pairs: {deleted_pairs}")
    print(f"Modified label files: {modified_labels}")
    print(f"Remaining image-label pairs: {total_processed - deleted_pairs}")

if __name__ == '__main__':
    process_dataset()

Found 32 image files in directory

Processing: 20190812_183150_jpg.rf.df4b4706c4a19d37be464fd4cf8fabfa.jpg

Processing: 20190812_183518_jpg.rf.7f9c7f595c2c2b73be886a34da2281e8.jpg

Processing: image_20241126_160100_png.rf.dc6c5b55d8bf7911cc2abd4a496deb14.jpg

Processing: image_20241126_160131_png.rf.fb3c386552c0011168bffc950abc878a.jpg

Processing: image_20241126_160141_png.rf.ea75915bdd0c9fe57f2f8307963bfc4c.jpg

Processing: IMG20200728125935_jpg.rf.757c388c76abeb83591bd3697b98dbdb.jpg

Processing: IMG20200728130627_jpg.rf.bdb5b6745bfe445bccad0509ba491153.jpg

Processing: IMG20200728131019_jpg.rf.4fa1690a41dd7c036f29ee8a5861ef6c.jpg

Processing: IMG20200728131112_jpg.rf.bb64f09d0e73c2e902c290d42f3806b6.jpg

Processing: IMG20200728180007_jpg.rf.5d8c848a602d9d7d3ac098b87f0603ab.jpg

Processing: IMG20200728185912_jpg.rf.670a2f4046e1aa34f269ff93cfbf22c3.jpg

Processing: IMG20200728190221_jpg.rf.d95586a1b67f3d23b03971119e7ae592.jpg

Processing: IMG20200729183111_jpg.rf.f7a758d4d7e250335522

In [6]:
import os
from glob import glob

# Configuration - UPDATE THESE PATHS IF NEEDED
test_images_dir = r'D:\fruit dataset COMP5544.v1-fruit-image-recognition.yolov11\valid\images'  # Use raw string for Windows paths
test_labels_dir = r'D:\fruit dataset COMP5544.v1-fruit-image-recognition.yolov11\valid\labels'  # Use raw string for Windows paths
classes_to_remove = {2, 3 ,5}  # guava(2), lemon (3), pomegranate (5)

def process_dataset():
    deleted_pairs = 0
    modified_labels = 0
    total_processed = 0

    # Check if directories exist
    if not os.path.exists(test_images_dir):
        print(f"Error: Image directory not found at {os.path.abspath(test_images_dir)}")
        return
        
    if not os.path.exists(test_labels_dir):
        print(f"Error: Labels directory not found at {os.path.abspath(test_labels_dir)}")
        return

    # Get list of image files with common extensions (e.g., .jpg, .png)
    image_files = glob(os.path.join(test_images_dir, '*.*'))
    print(f"Found {len(image_files)} image files in directory")
    
    for image_path in image_files:
        total_processed += 1
        image_filename = os.path.basename(image_path)
        print(f"\nProcessing: {image_filename}")
        
        # Get corresponding label path
        base_name = os.path.splitext(image_filename)[0]
        label_path = os.path.join(test_labels_dir, f"{base_name}.txt")
        
        if not os.path.exists(label_path):
            print(f"Warning: No label file found for {image_filename}")
            continue

        print(f"Found label: {os.path.basename(label_path)}")

        # Read and filter labels
        try:
            with open(label_path, 'r') as f:
                original_lines = f.readlines()
        except Exception as e:
            print(f"Error reading {label_path}: {str(e)}")
            continue

        filtered_lines = []
        for line in original_lines:
            parts = line.strip().split()
            if not parts:
                continue
            try:
                class_id = int(parts[0])
                if class_id not in classes_to_remove:
                    filtered_lines.append(line)
                else:
                    print(f"Removing class {class_id} from {label_path}")
            except ValueError:
                print(f"Invalid class ID in {label_path}: {parts[0]}")
                continue

        # Track modifications and deletions
        if len(filtered_lines) < len(original_lines):
            if len(filtered_lines) > 0:
                modified_labels += 1
                try:
                    with open(label_path, 'w') as f:
                        f.writelines(filtered_lines)
                    print(f"Updated {label_path}")
                except Exception as e:
                    print(f"Error writing {label_path}: {str(e)}")
            else:
                try:
                    os.remove(label_path)
                    os.remove(image_path)
                    deleted_pairs += 1
                    print(f"Deleted {image_filename} and its label")
                except Exception as e:
                    print(f"Error deleting files: {str(e)}")

    print("\nProcessing Report:")
    print(f"Total images processed: {total_processed}")
    print(f"Deleted image-label pairs: {deleted_pairs}")
    print(f"Modified label files: {modified_labels}")
    print(f"Remaining image-label pairs: {total_processed - deleted_pairs}")

if __name__ == '__main__':
    process_dataset()

Found 124 image files in directory

Processing: 20190809_121624_jpg.rf.b1edb501539139eb361d357405d1599f.jpg
Found label: 20190809_121624_jpg.rf.b1edb501539139eb361d357405d1599f.txt

Processing: 20190812_183513_jpg.rf.77679fa04def5e95fd3e1257e4686660.jpg
Found label: 20190812_183513_jpg.rf.77679fa04def5e95fd3e1257e4686660.txt

Processing: 20190812_185140_jpg.rf.9df0e2529855b38cf30ef43789830313.jpg
Found label: 20190812_185140_jpg.rf.9df0e2529855b38cf30ef43789830313.txt

Processing: 20190813_121119_jpg.rf.fd4ef162b3a2c2380641f1161cba1498.jpg
Found label: 20190813_121119_jpg.rf.fd4ef162b3a2c2380641f1161cba1498.txt
Removing class 2 from D:\fruit dataset COMP5544.v1-fruit-image-recognition.yolov11\valid\labels\20190813_121119_jpg.rf.fd4ef162b3a2c2380641f1161cba1498.txt
Deleted 20190813_121119_jpg.rf.fd4ef162b3a2c2380641f1161cba1498.jpg and its label

Processing: 20190813_131030_jpg.rf.ba0f7952164f7d1c075b7bdbe5c0b03b.jpg
Found label: 20190813_131030_jpg.rf.ba0f7952164f7d1c075b7bdbe5c0b03b.t