Directories

In [None]:
import os

def folder_structure(target_dir='../data/'):
    """Generates folder structure

    Args:
        target_dir (str, optional): The data directory path relative to the directory you run this function from. Defaults to '../data/'.
    """

    dir_lst = ['data_original', 'data_temp', 'data_train']
    subdir_lst = ['_images/','_masks/']
    parent_dir = os.getcwd()

    # make main directories
    for dir in dir_lst:
        os.makedirs(os.path.join(parent_dir, target_dir, dir), exist_ok=True)
        
    # make subdirectories
    for i in ['train','val','test']:
        for subdir in subdir_lst:
            os.makedirs(os.path.join(parent_dir, target_dir+dir_lst[2], i+subdir+i), exist_ok=True)
    return

Copy images and masks to folders

In [None]:
source_dir = '/Users/aaron/test/'
target_dir = '/Users/aaron/target/'
extensions = ['.tif','.shp', '.shx', '.dbf']

image_dir = target_dir + 'image/'
mask_dir = target_dir + 'mask/'

for foldername, subfolders, filenames in os.walk(source_dir):
    print('Searching files in %s...' % (foldername))
    #counter+=1
    for filename in filenames:
        if filename.endswith('.tif'):
            totalCopyPath = os.path.join(foldername, filename)
            newName = os.path.join(image_dir, str(counter)+'.tif')
            print(totalCopyPath)
            print('Copying ' + filename + ' to ' + newName)
            shutil.copy(totalCopyPath, image_dir)
            #os.rename(image_dir+filename, newName)
        elif filename.endswith(tuple(extensions)):
            totalCopyPath = os.path.join(foldername, filename)
            newName = os.path.join(mask_dir, str(counter))
            print(newName)
            print(totalCopyPath)
            print('Copying ' + filename + ' to ' + newName)
            shutil.copy(totalCopyPath, mask_dir)
            #os.rename(mask_dir+filename, '1')
print('Done.')

In [None]:
# Go through subfolders of source directory and copy every .tif file into 'target_dir/images/' folder
counter=1
for foldername, subfolders, filenames in os.walk(source_dir):
    print('Searching files in %s...' % (foldername))
    for filename in filenames:
        if filename.endswith('.tif'):
            totalCopyPath = os.path.join(foldername, filename)
            newName = str(counter) +'.tif'
            counter+=1
            print('Copying ' + filename + ' to ' + newName)
            shutil.copy(totalCopyPath, image_dir)
print('Done.')

Create smaller images

In [None]:
import cv2
from matplotlib import pyplot as plt
from patchify import patchify

In [None]:
def create_tiles(size=512, step=512, source_dir='../data/data_original/', target_dir='../data/data_temp/'):
    """_summary_

    Args:
        size (int, optional): Size of the patches in pixels. Defaults to 512. Make sure the original image dimension are multiples of this size.
        source_dir (str, optional): Directory where the large images are stored. Defaults to '../data/data_original/'.
        target_dir (str, optional): Directory . Defaults to '../data/data_temp/'.
    """
    # Create image tiles
    working_dir = os.getcwd()
    print(working_dir)
    counter=1

    for root, dirs, files in os.walk(source_dir):
        for filename in files:
            if filename.endswith('.tif'):
                print(root)
                print(filename)
                large_image = cv2.imread(root+'/'+filename, 0)
                patches = patchify(large_image, (size, size), step=step)
                for i in range(patches.shape[0]):
                    for j in range(patches.shape[1]):
                        single_patch_img = patches[i, j]
                        if not cv2.imwrite(target_dir + 'image_' + str(counter) + '_'+ str(i)+'_'+str(j)+'.tif', single_patch_img):
                            raise Exception("Could not write the image")
                counter+=1
    return

Train/Val split

In [None]:
import splitfolders

def split_folders(input_dir='../data/data_temp', output_dir='..data/data_train', ratio=(0.8,0.2), seed=42):
    """Splits available data according to a given ratio and copies it to a specified folder.

    Args:
        input_dir (str, optional): Path to directory containing the image tiles. Defaults to '../data/data_temp'.
        output_dir (str, optional): Path to the training directory. Defaults to '..data/data_train'.
        ratio (tuple, optional): Train/validation/test split ratio. Defaults to (0.8,0.2).
        seed (int, optional): Random seed. Defaults to 42.
    """
    splitfolders.ratio(input_dir, output=output_dir, seed=seed, ratio=ratio, group_prefix=None)
    return