In [1]:
import os
import shutil

def create_subset(input_dir, output_dir, num_files=10):
    """
    Copy a subset of files from each category directory in input_dir to output_dir.
    Only num_files files are copied from each category.
    """
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    for category in os.listdir(input_dir):
        category_path = os.path.join(input_dir, category)
        if os.path.isdir(category_path):
            output_category_path = os.path.join(output_dir, category)
            if not os.path.exists(output_category_path):
                os.makedirs(output_category_path)

            # Get files and copy a subset
            files = [f for f in os.listdir(category_path) if os.path.isfile(os.path.join(category_path, f))]
            files = files[:num_files]  # Get only the first num_files files
            
            for file in files:
                src_file_path = os.path.join(category_path, file)
                dest_file_path = os.path.join(output_category_path, file)
                shutil.copy(src_file_path, dest_file_path)

# Define base directories for test and train
base_dir = 'merged_resized_pngs_splited'
target_dir = 'subset_merged_resized_pngs_splited'
test_input_dir = os.path.join(base_dir, 'test')
train_input_dir = os.path.join(base_dir, 'train')
test_output_dir = os.path.join(target_dir, 'test')
train_output_dir = os.path.join(target_dir, 'train')

# Create subsets for both test and train directories
create_subset(test_input_dir, test_output_dir)
create_subset(train_input_dir, train_output_dir)

print('Subsets created successfully.')


Subsets created successfully.
