## Take given number of images from dataset and save in separate folder

In [1]:
import os
import random
import shutil
import glob

In [2]:
def sample_images(source_dir, dest_dir, sample_size=400):
    # Create destination folder if it doesn't exist
    if not os.path.exists(dest_dir):
        os.makedirs(dest_dir)
        print(f"Created folder: {dest_dir}")

    # Get list of all images
    image_extensions = ['*.jpg', '*.jpeg', '*.png']
    all_images = []

    # Add all files with the image extenions to the list all_images
    for ext in image_extensions:
        all_images.extend(glob.glob(os.path.join(source_dir, ext)))

    print(f"Total images found in source: {len(all_images)}")

    # Check if we have enough images to sample
    if len(all_images) < sample_size:
        print(f"Warning: Only {len(all_images)} images found. Copying all of them.")
        sample_size = len(all_images)

    # Pick random images
    sampled_list = random.sample(all_images, sample_size)

    # Copy the files
    print(f"Copying {sample_size} images")
    for img_path in sampled_list:
        file_name = os.path.basename(img_path)
        dest_path = os.path.join(dest_dir, file_name)
        shutil.copy2(img_path, dest_path) # copy2 preserves metadata

    print(f"Done. {sample_size} images are now in '{dest_dir}'")

In [3]:
source_folder = data_dir = "../dataset/all_original"
destination_folder = "../dataset/all_small_dataset"

sample_images(source_folder, destination_folder, 1000)

Total images found in source: 3798
Copying 1000 images
Done. 1000 images are now in '../dataset/all_small_dataset'
