In [85]:
import os
import shutil
from random import choices

In [86]:
image_extensions = ['.jpg', '.jpeg']  # Add more extensions if needed
test_size = 0.1
test_size_ratio = int(1 / test_size)

In [87]:
def show_progress(current=0, total=30, size=None, item='file'):
    if current == total:
        ans = 'Completed!'
        return print(f"{ans:_^50}")
    size = total if not size else size
    string_pointer = int(current / total * size)
    label = f"|{item} {current}|"
    label = f"{label:->{string_pointer + len(label)}}"
    label = f"{label:><{size - string_pointer + len(label)}}"
    label += f"|of {total}|"
    print("\n", label)

In [88]:
def get_random_images(path, attempt=1):
    """
    Function to choose k random images for each class.
    For class containing N images, k = N * test_size.
    So, we split dataset to `k` per test and `N - k` per train
    :return:
    """
    if attempt > 10:
        raise RecursionError(f"BAD FILES IN {path}")
    images_list = os.listdir(path)
    k = len(images_list) // test_size_ratio
    files = choices(os.listdir(path), k=k)

    for file in files:
        if not any([file.lower().endswith(ext) for ext in image_extensions]):
            print("FFFFFFF with ", path)
            return get_random_images(path, attempt+1)
    return files

In [89]:
def copy_images(source_dir, destination_dir='test/', action='move'):
    assert action in ['move', 'copy']
    os.makedirs(destination_dir, exist_ok=True)
    total_number_of_folders = len(os.listdir(source_dir))

    executor = shutil.move if action == 'move' else shutil.copy2
    for folder_n, brand in enumerate(os.listdir(source_dir), start=1):
        show_progress(folder_n, total_number_of_folders)

        for model in os.listdir(os.path.join(source_dir, brand)):

            files = get_random_images(os.path.join(source_dir, brand, model))

            destination_path = os.path.join(destination_dir, brand, model)
            os.makedirs(destination_path, exist_ok=True)
            for file in files:
                source_path = os.path.join(source_dir, brand, model, file)
                try:
                    executor(source_path, destination_path)
                except shutil.Error:
                    pass  # Skip
    return

In [90]:
dataset_directory = "../data/images/"
test_directory = '../data/MADE_TEST_DATASET/'

In [91]:
copy_images(dataset_directory, test_directory, action='copy')


 -|file 1|>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>|of 46|

 --|file 2|>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>|of 46|

 ---|file 3|>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>|of 46|

 ----|file 4|>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>|of 46|

 -----|file 5|>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>|of 46|

 ------|file 6|>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>|of 46|

 -------|file 7|>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>|of 46|

 --------|file 8|>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>|of 46|

 ---------|file 9|>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>|of 46|

 ----------|file 10|>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>|of 46|

 -----------|file 11|>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>|of 46|

 ------------|file 12|>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>|of 46|

 ------------|file 13|>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>|of 46|

 --------------|file 14|>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>|of 46|

 ---------------|file 15|>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>|of 46|

 ----------------|file 16|>>>>>>>