In [1]:
import os
import random
from pathlib import Path

def reduce_dataset(images_dir, labels_dir, target_count=1000):
    # Get all image and label files
    image_files = list(Path(images_dir))  # Change extension if needed
    label_files = list(Path(labels_dir))

    # Ensure there is a matching label for each image
    image_files = [img for img in image_files if (labels_dir / f"{img.stem}.txt").exists()]
    label_files = [lbl for lbl in label_files if (images_dir / f"{lbl.stem}.jpg").exists()]

    # Check how many files need to be deleted
    current_count = len(image_files)
    files_to_delete = current_count - target_count

    if files_to_delete <= 0:
        print(f"No files need to be deleted. Current count: {current_count}")
        return

    print(f"Deleting {files_to_delete} files to reduce dataset to {target_count} images.")

    # Randomly select files to delete
    images_to_delete = random.sample(image_files, files_to_delete)

    for img_file in images_to_delete:
        label_file = labels_dir / f"{img_file.stem}.txt"

        # Delete the image file
        os.remove(img_file)

        # Delete the corresponding label file
        if label_file.exists():
            os.remove(label_file)

    print(f"Dataset reduced to {target_count} images.")



In [None]:
from time import time

In [2]:
%%time
# Example usage
reduce_dataset(
    images_dir=Path("C:/Users/User/Downloads/Mask No Mask/train/images"),
    labels_dir=Path("C:/Users/User/Downloads/Mask No Mask/train/labels"),
    target_count=600
)


Deleting 2056 files to reduce dataset to 600 images.
Dataset reduced to 600 images.
CPU times: total: 656 ms
Wall time: 923 ms


In [19]:
%%time
# Example usage
reduce_dataset(
    images_dir=Path("C:/Users/User/Downloads/Mask No Mask/test/images"),
    labels_dir=Path("C:/Users/User/Downloads/Mask No Mask/test/labels"),
    target_count=200
)


Deleting 189 files to reduce dataset to 200 images.
Dataset reduced to 200 images.
CPU times: total: 0 ns
Wall time: 82.1 ms


In [21]:
%%time
# Example usage
reduce_dataset(
    images_dir=Path("C:/Users/User/Downloads/Mask No Mask/valid/images"),
    labels_dir=Path("C:/Users/User/Downloads/Mask No Mask/valid/labels"),
    target_count=100
)


Deleting 100 files to reduce dataset to 100 images.
Dataset reduced to 100 images.
CPU times: total: 0 ns
Wall time: 50.3 ms


In [3]:
reduce_dataset(
    images_dir=Path("C:/Users/User/Downloads/WEAPONS/train/images"),
    labels_dir=Path("C:/Users/User/Downloads/WEAPONS/train/labels"),
    target_count=600
)

Deleting 3239 files to reduce dataset to 600 images.
Dataset reduced to 600 images.


In [16]:
reduce_dataset(
    images_dir=Path("C:/Users/User/Downloads/WEAPONS/test/images"),
    labels_dir=Path("C:/Users/User/Downloads/WEAPONS/test/labels"),
    target_count=200
)

Deleting 59 files to reduce dataset to 200 images.
Dataset reduced to 200 images.


In [17]:
reduce_dataset(
    images_dir=Path("C:/Users/User/Downloads/WEAPONS/valid/images"),
    labels_dir=Path("C:/Users/User/Downloads/WEAPONS/valid/labels"),
    target_count=200
)

No files need to be deleted. Current count: 0


In [4]:
#HELMET+MASK
reduce_dataset(
    images_dir=Path("C:/Users/User/Downloads/HELMET+MASK/train/images"),
    labels_dir=Path("C:/Users/User/Downloads/HELMET+MASK/train/labels"),
    target_count=600
)

Deleting 519 files to reduce dataset to 600 images.
Dataset reduced to 600 images.


In [15]:
#HELMET+MASK
reduce_dataset(
    images_dir=Path("C:/Users/User/Downloads/HELMET+MASK/test/images"),
    labels_dir=Path("C:/Users/User/Downloads/HELMET+MASK/test/labels"),
    target_count=200
)

No files need to be deleted. Current count: 49


In [29]:
reduce_dataset(
    images_dir=Path("C:/Users/User/Downloads/face_cover_one_category SHRINK/test/images"),
    labels_dir=Path("C:/Users/User/Downloads/face_cover_one_category SHRINK/test/labels"),
    target_count=200
)

No files need to be deleted. Current count: 21


In [32]:
reduce_dataset(
    images_dir=Path("C:/Users/User/Downloads/face_cover_one_category SHRINK/train/images"), 
    labels_dir=Path("C:/Users/User/Downloads/face_cover_one_category SHRINK/train/labels"),
    target_count=200
)

Deleting 1505 files to reduce dataset to 200 images.
Dataset reduced to 200 images.


In [33]:
reduce_dataset(
    images_dir=Path("C:/Users/User/Downloads/face_cover_one_category SHRINK/valid/images"),
    labels_dir=Path("C:/Users/User/Downloads/face_cover_one_category SHRINK/valid/labels"),
    target_count=100
)

Deleting 371 files to reduce dataset to 100 images.
Dataset reduced to 100 images.


#### remove files with class i dont need

In [10]:
import os
from pathlib import Path

def remove_class(dataset_dir, labels_dir, unwanted_class_id):
    # Get all label files
    label_files = list(Path(labels_dir).glob("*.txt"))
    images_dir = Path(dataset_dir)
    
    # Iterate through label files
    print(len(label_files))
    for label_file in label_files:
        with open(label_file, 'r') as f:
            lines = f.readlines()
        
        # Check if the label file contains only the unwanted class
        has_unwanted_class = all(line.startswith(str(unwanted_class_id)) for line in lines)
        
        if has_unwanted_class:
            # Find the corresponding image file
            image_file = images_dir / f"{label_file.stem}.jpg"  # Adjust extension if needed
            
            # Delete the label file
            os.remove(label_file)
            
            # Delete the image file if it exists
            if image_file.exists():
                os.remove(image_file)
                print(f"Deleted: {image_file} and {label_file}")

    print(f"Finished removing files with class {unwanted_class_id}.")



In [13]:
# Example Usage
remove_class(
    dataset_dir="C:/Users/User/Downloads/HELMET+MASK/train/images",   # Path to images directory
    labels_dir="C:/Users/User/Downloads/HELMET+MASK/train/labels",   # Path to labels directory
    unwanted_class_id=0                   # Replace with the ID of the class you want to remove
)

600
Deleted: C:\Users\User\Downloads\HELMET+MASK\train\images\10_jpg.rf.37bff2ab655e75ac4d0bce34d16bb0c8.jpg and C:\Users\User\Downloads\HELMET+MASK\train\labels\10_jpg.rf.37bff2ab655e75ac4d0bce34d16bb0c8.txt
Deleted: C:\Users\User\Downloads\HELMET+MASK\train\images\10_jpg.rf.4d8afaf857988355851fa6a99befee6e.jpg and C:\Users\User\Downloads\HELMET+MASK\train\labels\10_jpg.rf.4d8afaf857988355851fa6a99befee6e.txt
Deleted: C:\Users\User\Downloads\HELMET+MASK\train\images\10_jpg.rf.67c06379861d7cbf43ced41b14d8b954.jpg and C:\Users\User\Downloads\HELMET+MASK\train\labels\10_jpg.rf.67c06379861d7cbf43ced41b14d8b954.txt
Deleted: C:\Users\User\Downloads\HELMET+MASK\train\images\10_jpg.rf.8e4e54e1635aa7d6708998dbc1df77ba.jpg and C:\Users\User\Downloads\HELMET+MASK\train\labels\10_jpg.rf.8e4e54e1635aa7d6708998dbc1df77ba.txt
Deleted: C:\Users\User\Downloads\HELMET+MASK\train\images\11_jpg.rf.0043bddaf746d86ce23bea49d0e8bd60.jpg and C:\Users\User\Downloads\HELMET+MASK\train\labels\11_jpg.rf.0043bdda

In [14]:
# Example Usage
remove_class(
    dataset_dir="C:/Users/User/Downloads/HELMET+MASK/test/images",   # Path to images directory
    labels_dir="C:/Users/User/Downloads/HELMET+MASK/test/labels",   # Path to labels directory
    unwanted_class_id=0                   # Replace with the ID of the class you want to remove
)

162
Deleted: C:\Users\User\Downloads\HELMET+MASK\test\images\10_jpg.rf.4191cd25c0b7580275d8f188e22b550c.jpg and C:\Users\User\Downloads\HELMET+MASK\test\labels\10_jpg.rf.4191cd25c0b7580275d8f188e22b550c.txt
Deleted: C:\Users\User\Downloads\HELMET+MASK\test\images\10_jpg.rf.e0919f2fd65e45fcc0a9e8e1c82a0e31.jpg and C:\Users\User\Downloads\HELMET+MASK\test\labels\10_jpg.rf.e0919f2fd65e45fcc0a9e8e1c82a0e31.txt
Deleted: C:\Users\User\Downloads\HELMET+MASK\test\images\11_jpg.rf.75ffb9a115f4c017f502be5850ce7081.jpg and C:\Users\User\Downloads\HELMET+MASK\test\labels\11_jpg.rf.75ffb9a115f4c017f502be5850ce7081.txt
Deleted: C:\Users\User\Downloads\HELMET+MASK\test\images\11_jpg.rf.7c825121119a3b639aeb772adfa9c1c7.jpg and C:\Users\User\Downloads\HELMET+MASK\test\labels\11_jpg.rf.7c825121119a3b639aeb772adfa9c1c7.txt
Deleted: C:\Users\User\Downloads\HELMET+MASK\test\images\13_jpg.rf.18f454436e4da73b84d488066138fe38.jpg and C:\Users\User\Downloads\HELMET+MASK\test\labels\13_jpg.rf.18f454436e4da73b84

In [12]:
# Example Usage
remove_class(
    dataset_dir="C:/Users/User/Downloads/HELMET+MASK/valid/images",   # Path to images directory
    labels_dir="C:/Users/User/Downloads/HELMET+MASK/valid/labels",   # Path to labels directory
    unwanted_class_id=0                   # Replace with the ID of the class you want to remove
)

324
Deleted: C:\Users\User\Downloads\HELMET+MASK\valid\images\10_jpg.rf.3138b9c5fb49d3c15ecbf563240a3efd.jpg and C:\Users\User\Downloads\HELMET+MASK\valid\labels\10_jpg.rf.3138b9c5fb49d3c15ecbf563240a3efd.txt
Deleted: C:\Users\User\Downloads\HELMET+MASK\valid\images\10_jpg.rf.9400e67b2e82c5c89f87f3fda1668638.jpg and C:\Users\User\Downloads\HELMET+MASK\valid\labels\10_jpg.rf.9400e67b2e82c5c89f87f3fda1668638.txt
Deleted: C:\Users\User\Downloads\HELMET+MASK\valid\images\11_jpg.rf.86e1e8a7aa1a12bf0c4b59f28151fffa.jpg and C:\Users\User\Downloads\HELMET+MASK\valid\labels\11_jpg.rf.86e1e8a7aa1a12bf0c4b59f28151fffa.txt
Deleted: C:\Users\User\Downloads\HELMET+MASK\valid\images\11_jpg.rf.fd2192376f36785b53837c97fee17ca0.jpg and C:\Users\User\Downloads\HELMET+MASK\valid\labels\11_jpg.rf.fd2192376f36785b53837c97fee17ca0.txt
Deleted: C:\Users\User\Downloads\HELMET+MASK\valid\images\12_jpg.rf.1552edcfc74400567e82815affcba979.jpg and C:\Users\User\Downloads\HELMET+MASK\valid\labels\12_jpg.rf.1552edcf