## Distribute images into tarin, test, val datasets

### Imports

In [2]:
#Import libraries
import os
import random
import shutil

In [3]:
# Define root directory
ROOT_PATH = os.path.normpath(os.getcwd() + os.sep + os.pardir)

In [4]:
#Path to original images
original_img_path = ROOT_PATH + '\\data\\' + '_Circuit1'

#Path to `train`, `test`, `val` datasets
train_path = ROOT_PATH + '\\data\\train\\images'
val_path = ROOT_PATH + '\\data\\val\\images'
test_path = ROOT_PATH + '\\data\\test\\images'

In [6]:
def distribute_images(n, original_img_path, train_path, val_path, test_path, seed=42):

    ''' Distibute n select images from source into train|val|test folder with the ratio of 70|15|15 '''
    
    random.seed(seed)
    
    #make directories
    os.makedirs(train_path, exist_ok=True)
    os.makedirs(val_path, exist_ok=True)
    os.makedirs(test_path, exist_ok=True)
    
    #get iamges in original folder
    img_files = os.listdir(original_img_path)
    
    #if n is larger than the number images, take all images
    if n > len(img_files):
        n = len(img_files)
     
    #randomly select `n` images
    selected_files = random.sample(img_files, n)
    

    #Set numbr of train and val dataset percentage
    train_size = int(n * 0.7)
    val_size = int(n * 0.15)


    # Genereate files name for each size
    train_files = selected_files[:train_size]
    val_files = selected_files[train_size:train_size + val_size]
    test_files = selected_files[train_size + val_size:]

    #copy radomly selected images to defined folders
    for img_file in train_files:
        shutil.copy(os.path.join(original_img_path, img_file), os.path.join(train_path, img_file))

    for img_file in val_files:
        shutil.copy(os.path.join(original_img_path, img_file), os.path.join(val_path, img_file))

    for img_file in test_files:
        shutil.copy(os.path.join(original_img_path, img_file), os.path.join(test_path, img_file))

In [None]:
# Number of images to process
n = 100

#Distibute `n` select images from source into (train:val:test) = (70:15:15)
distribute_images(n, original_img_path, train_path, val_path, test_path)