In [None]:
import os
import sys
os.chdir('..')
sys.path.append('src')

In [None]:
import os
from datetime import datetime

from pathlib import Path
import shutil
import numpy as np

In [None]:
def get_images(root):
    imgs = list((root/'imgs').glob('*'))
    for img in imgs:
        filenames = list(img.glob('*'))
        yield filenames
        
def get_maskname_for_img(img_name):
    im_root = img_name.parent.parent
    mask_name  = img_name.parent.parent.parent / 'masks' / img_name.relative_to(im_root)
    return mask_name

def create_split(filenames, pct=.05):
    n = int(len(filenames) * pct)
    split = np.random.choice(filenames, n, replace=False).tolist()
    main_part = [f for f in filenames if f not in split]
    return main_part,  split

def copy_split(split, root, dst_path):
    p = dst_path / split.relative_to(root)
    os.makedirs(str(p.parent), exist_ok=True)
    shutil.copy(str(split), str(p))
    
def create_save_splits(root, dst_path, split_pct):
    '''
        takes root folder path with 2 folders inside: imgs, masks.
        for each subfolder in imgs, masks , i.e. 1e2425f28:
            splits images in subfolder in two groups randomly by split_pct:
            split_pct = 0.05
            len(p1) == .95 * len(p)
            len(p2) == .05 * len(p)
        and saves them into dst_path WITH TIMESTAMP 
        p1 is train folder, p2 is val folder
    '''
    for img_cuts in get_images(root):
        split_imgs_1, split_imgs_2 = create_split(img_cuts, pct=val_pct)
        print(len(split_imgs_1), len(split_imgs_2))

        for i in split_imgs_1:
            m = get_maskname_for_img(i)
            copy_split(i, root, dst_path/'train')
            copy_split(m, root, dst_path/'train')

        for i in split_imgs_2:
            m = get_maskname_for_img(i)
            copy_split(i, root, dst_path/'val')
            copy_split(m, root, dst_path/'val')


In [None]:
root = Path('input/CUTS/cuts1024x25/')
timestamp = '{:%Y_%b_%d_%H_%M_%S}'.format(datetime.now())
dst_path = root.parent / f'SPLITS/split1024x25_{timestamp}/'
val_pct = 0.05

In [None]:
create_save_splits(root, dst_path, val_pct)