In [3]:
from glob import glob
from skimage.morphology import skeletonize
import cv2
import numpy as np
from skimage import measure, morphology
import random
from pathlib import Path
import albumentations

In [4]:
# change mask_paths to match paths for the original datasets
mask_paths = glob('set_*/train/masks/*.png')

In [5]:
mask_paths = [fp for fp in mask_paths if not fp.endswith('_processed.png')]
random.shuffle(mask_paths)
skeleton_split_idx = int(len(mask_paths) // 2)
masks_to_skeletonize = mask_paths[:skeleton_split_idx]
im_names = [Path(fp).name for fp in mask_paths]
train_set_split_idx = int(len(im_names) // (4/3))
test_set_split_idx = int(len(im_names) // 14)
train_set = im_names[:train_set_split_idx]
val_set = im_names[train_set_split_idx:-test_set_split_idx]
test_set = im_names[-test_set_split_idx:]

In [6]:
def remove_small_islands(mask):
    inverse_mask = 1 - mask
    labeled_regions_inverse = measure.label(inverse_mask, connectivity=1)
    labeled_inverse_regions = morphology.remove_small_objects(labeled_regions_inverse, min_size=5)
    mask[labeled_inverse_regions == 0] = 1
    labeled_regions = measure.label(mask, connectivity=1)
    labeled_regions = morphology.remove_small_objects(labeled_regions, min_size=50)
    mask[labeled_regions == 0] = 0
    return mask

In [None]:
new_dset_paths = ['all_skeleton', 'mix_skeleton', 'no_skeleton']
for new_dset_path in new_dset_paths:
    for split in ['train', 'val', 'test']:
        for category in ['img', 'mask']:
            new_path = Path(new_dset_path) / split / category
            new_path.mkdir(parents=True, exist_ok=True)

In [8]:
for new_dset_path in new_dset_paths:
	for i in range(0, len(mask_paths)):
		mask_path = mask_paths[i]
		mask = cv2.imread(mask_path, 0)
		img_path = mask_path.replace('masks', 'images')
		img = cv2.imread(img_path, cv2.IMREAD_ANYDEPTH)
		kernel = np.ones((2, 2), np.uint8)
		mask = cv2.dilate(mask, kernel, iterations=1)
		mask[mask==255] = 1
		mask = remove_small_islands(mask)
		if (
            (mask_path in masks_to_skeletonize and new_dset_path=='mix_skeleton')
            or (mask_path == 'all_skeleton')
		):
			mask = skeletonize(mask, method='lee')
			kernel = np.ones((3, 3), np.uint8)
			mask = cv2.dilate(mask, kernel, iterations=1)
		mask[mask==1] = 255
		im_name = Path(mask_path).name
		if im_name in train_set:
			train_val_test = 'train'
		elif im_name in val_set:
			train_val_test = 'val'
		elif im_name in test_set:
			train_val_test = 'test'
		else:
			raise ValueError
		mask_save_path = f'{new_dset_path}/{train_val_test}/mask_{i}'
		img_save_path = f'{new_dset_path}/{train_val_test}/img{i}'
        # Downscale mask and image to 1024x1024
		dim = (1024, 1024)
		mask = cv2.resize(mask, dim, cv2.INTER_AREA)
		img = cv2.resize(img, dim, cv2.INTER_AREA)
		cv2.imwrite(mask_save_path, mask)
		cv2.imwrite(img_save_path, img)

NameError: name 'new_dset_paths' is not defined

In [None]:
# Augmentations
'''
https://albumentations.ai/docs/getting_started/image_augmentation/
Expand dataset ~1000x (to 28k image/mask pairs in total for train+val+test sets)
Compose:
- piecewise affine (p=0.9)
- one of (probability=0.8): (not on mask)
    - randomgamma (probability=0.25)
    - randombrightnesscontrast (probability=0.25)
    - randomtonecurve (probability=0.25)
    - clahe (probability=0.25)
- one of (probability=0.75):
    - verticalflip (probability=0.5)
    - randomrotate90 (probability=0.5)
'''

transform = albumentations.Compose([
	albumentations.PiecewiseAffine(p=0.9), 
	albumentations.OneOf([albumentations.RandomGamma(p=0.25), albumentations.RandomBrightnessContrast(p=0.25), albumentations.RandomToneCurve(p=0.25), albumentations.CLAHE(p=0.25)], 0.8),
	albumentations.OneOf([albumentations.VerticalFlip(p=0.5), albumentations.RandomRotate90(p=0.5)], 0.75)
])