In [6]:
''' 
Load libs 
'''
import os, shutil
import pandas as pd

In [7]:
'''
Specify folders
'''
original_dataset_dir = 'ISIC'
original_images_dir = os.path.join(original_dataset_dir, 'images')
original_masks_dir = os.path.join(original_dataset_dir, 'masks')

base_dir = 'data'

train_dir = os.path.join(base_dir, 'train')
validation_dir = os.path.join(base_dir, 'validation')
test_dir = os.path.join(base_dir, 'test')


train_images_dir = os.path.join(train_dir, 'images')
train_masks_dir = os.path.join(train_dir, 'masks')

validation_images_dir = os.path.join(validation_dir, 'images')
validation_masks_dir = os.path.join(validation_dir, 'masks')

test_images_dir = os.path.join(test_dir, 'images')
test_masks_dir = os.path.join(test_dir, 'masks')


In [8]:
'''
Create folders
'''
def mkdir(dir_path): 
    if not os.path.exists(dir_path): os.makedirs(dir_path)

mkdir(base_dir)

mkdir(train_dir)
mkdir(validation_dir)
mkdir(test_dir)

mkdir(train_images_dir)
mkdir(train_masks_dir)
mkdir(validation_images_dir)
mkdir(validation_masks_dir)
mkdir(test_images_dir)
mkdir(test_masks_dir)

In [9]:
'''
Get names of images and masks with split
'''
split = {'train':0.7, 'validation':0.1, 'test':0.2} # must sumup to 1

masksinfo_file_path = os.path.join(original_dataset_dir, 'masksdata.csv')
masks_info = pd.read_csv(masksinfo_file_path)

images_names = list(masks_info['name'])
# masks_filenames = [name+'_mask' for name in images_names] 

train_split = int(len(images_names) * split['train'])
validation_split = int(len(images_names) * split['validation'])
test_split = int(len(images_names) * split['test'])

train_images_filenames = images_names[:train_split]
validation_images_filenames = images_names[train_split:train_split+validation_split]
test_images_filenames = images_names[train_split+validation_split:]

print('Total images:     {}'.format(len(images_names)))
print('Train split:      {}'.format(len(train_images_filenames)))
print('Validation split: {}'.format(len(validation_images_filenames)))
print('Test split:       {}'.format(len(test_images_filenames)))
print('-'*23)
print('Split sum:        {}'.format(len(train_images_filenames)+len(validation_images_filenames)+len(test_images_filenames)))

Total images:     4520
Train split:      3164
Validation split: 452
Test split:       904
-----------------------
Split sum:        4520


In [None]:
'''
Copy images and masks into the folders
'''
# Copy train images and masks
for image_name in train_images_filenames:
    src_img = os.path.join(original_images_dir, image_name+'.png')
    dst_img = os.path.join(train_images_dir, image_name+'.png')
    
    src_mask = os.path.join(original_masks_dir, image_name+'_mask.png')
    dst_mask = os.path.join(train_masks_dir, image_name+'_mask.png')
    
    shutil.copyfile(src_img, dst_img) 
    shutil.copyfile(src_mask, dst_mask) 
    
# Copy validation images and masks
for image_name in validation_images_filenames:
    src_img = os.path.join(original_images_dir, image_name+'.png')
    dst_img = os.path.join(validation_images_dir, image_name+'.png')
    
    src_mask = os.path.join(original_masks_dir, image_name+'_mask.png')
    dst_mask = os.path.join(validation_masks_dir, image_name+'_mask.png')
    
    shutil.copyfile(src_img, dst_img) 
    shutil.copyfile(src_mask, dst_mask) 
    
# Copy test images and masks
for image_name in test_images_filenames:
    src_img = os.path.join(original_images_dir, image_name+'.png')
    dst_img = os.path.join(test_images_dir, image_name+'.png')
    
    src_mask = os.path.join(original_masks_dir, image_name+'_mask.png')
    dst_mask = os.path.join(test_masks_dir, image_name+'_mask.png')
    
    shutil.copyfile(src_img, dst_img) 
    shutil.copyfile(src_mask, dst_mask) 

print('Done')

In [12]:
print('Total training images:   {}'.format(len(os.listdir(train_images_dir))))
print('Total training masks:    {}'.format(len(os.listdir(train_masks_dir))))
print('Total validation images: {}'.format(len(os.listdir(validation_images_dir))))
print('Total validation masks:  {}'.format(len(os.listdir(validation_masks_dir))))
print('Total test images:       {}'.format(len(os.listdir(test_images_dir))))
print('Total test masks:        {}'.format(len(os.listdir(test_masks_dir))))

Total training images:   3164
Total training masks:    3164
Total validation images: 452
Total validation masks:  452
Total test images:       904
Total test masks:        904
