In [19]:
import os
import shutil

from sklearn.model_selection import train_test_split

In [20]:
path_to_angiocells = './AngioCells'

In [21]:
def copyfile(src, dst, file_names):
    os.makedirs(dst, exist_ok=True)
    
    for file_name in file_names:
        src_file = os.path.join(src, file_name)
        dst_file = os.path.join(dst, file_name)
        shutil.copyfile(src_file, dst_file)

In [22]:
types = ['Dark', 'Defective', 'Different', 'Good']

train_size = 0.68 
test_size = 0.32

for type in types:
    path_to_images = os.path.join(f'{path_to_angiocells}/prepared_dataset/', type, 'images')
    path_to_masks = os.path.join(f'{path_to_angiocells}/prepared_dataset/', type, 'masks')
    names = sorted(os.listdir(path_to_images))
    train_names, test_names = train_test_split(names, test_size=test_size, random_state=42, shuffle=True)
    print(f'{type}'.ljust(20), f'{len(names)}')
    print(f'train'.ljust(20), f'{len(train_names)}')
    print(f'test'.ljust(20), f'{len(test_names)}', end='\n\n')
    
    # copy images for test
    copyfile(src=path_to_images, 
             dst=os.path.join(f'{path_to_angiocells}/prepared_dataset/', type, 'train', 'images'),
             file_names=train_names)
    
    # copy images for train
    copyfile(src=path_to_images, 
             dst=os.path.join(f'{path_to_angiocells}/prepared_dataset/', type, 'test', 'images'),
             file_names=test_names)
    
    train_names = list(map(lambda x: x.replace('.jpg', '.png'), train_names))
    test_names = list(map(lambda x: x.replace('.jpg', '.png'), test_names))
    
    # copy masks for test
    copyfile(src=path_to_masks, 
             dst=os.path.join(f'{path_to_angiocells}/prepared_dataset/', type, 'train', 'masks'),
             file_names=train_names)
    
    # copy masks for train
    copyfile(src=path_to_masks, 
             dst=os.path.join(f'{path_to_angiocells}/prepared_dataset/', type, 'test', 'masks'),
             file_names=test_names)



Dark                 54
train                36
test                 18

Defective            79
train                53
test                 26

Different            28
train                19
test                 9

Good                 114
train                77
test                 37



In [23]:
# split for All images and masks

test_images = 0
train_images = 0
for type in types:
    for folder in 'images', 'masks':
        for train_test in 'train', 'test':
            srs = os.path.join(f'{path_to_angiocells}/prepared_dataset/', type, train_test, folder)
            names = os.listdir(srs)
            copyfile(src=srs, 
                     dst=os.path.join(f'{path_to_angiocells}/prepared_dataset/', 'All', train_test, folder),
                     file_names=names)
            if train_test == 'train' and folder == 'images':
                train_images += len(names)
            elif folder == 'images':
                test_images += len(names)
                
print(f'{train_images = }')
print(f'{test_images = }')

train_images = 186
test_images = 92
