## Imports & Installs

In [0]:
pip install elasticdeform

Collecting elasticdeform
  Downloading https://files.pythonhosted.org/packages/41/67/931371b1434b919537c43867ef45dae8af985a7331ae5b6d0e47bddfc875/elasticdeform-0.4.6.tar.gz
Building wheels for collected packages: elasticdeform
  Building wheel for elasticdeform (setup.py) ... [?25l[?25hdone
  Created wheel for elasticdeform: filename=elasticdeform-0.4.6-cp36-cp36m-linux_x86_64.whl size=70442 sha256=8dd40aae66c71a03ae8fc3fbd2dd25545f036aa54ee256e8c326d82a3c805e6b
  Stored in directory: /root/.cache/pip/wheels/42/3a/94/a1d69f8b9da44826a171395e67f9f7a117f89af18f206481dd
Successfully built elasticdeform
Installing collected packages: elasticdeform
Successfully installed elasticdeform-0.4.6


In [0]:
from PIL import Image, ImageSequence
import elasticdeform
import math
import numpy as np
import os
import glob
import time

np.random.seed(1)

## Data Augmentation

The following folders are required for each dataset:

- For the ISBI segmentation challenge dataset: `segmentation_challenge_data`
- For the DIC-HeLa dataset: `cell_tracking_challenge_data/DIC-C2DH-HeLa_train` and `cell_tracking_challenge_data/DIC-C2DH-HeLa_test`
- For the PhC-U373 dataset: `cell_tracking_challenge_data/PhC-C2DH-U373_train` and `cell_tracking_challenge_data/PhC-C2DH-U373_test`

In [0]:
def makedir(path: str):
    """
    Creates a directory with the specified path if it does not exist yet.

    Params
    ------
    path: str
        the file path of the directory that should be created
    """
    current_directory = os.getcwd()
    final_directory = os.path.join(current_directory, rf'{path}')
    if not os.path.exists(final_directory):
        os.makedirs(final_directory)


def make_data_dirs(path: str, factor: int) -> Tuple[str, str, str, str]:
    """
    Creates the directories for the training and test datasets.

    Params
    ------
    path: str
        the file path to the directory in which the new directories should be created
    factor: int
        the factor with which the size of the dataset is increased
    """
    train_output_path = f'{path}/augmented/train_{factor}x'
    train_label_output_path = f'{path}/augmented/train_labels_{factor}x'
    test_output_path = f'{path}/augmented/test_{factor}x'
    test_label_output_path = f'{path}/augmented/test_labels_{factor}x'
    makedir(train_output_path)
    makedir(train_label_output_path)
    makedir(test_output_path)
    makedir(test_label_output_path)

    return train_output_path, train_label_output_path, test_output_path, test_label_output_path


def crop_center(image: Image, width: int, height: int) -> Image:
    """
    Crops an image from the center to have the specified width and height.

    Params
    ------
    image: Image
        a PIL.Image that represents the image to be cropped
    width: int
        the width the image should be cropped to
    height: int
        the height the image should be cropped to
    Returns
    ------
    Image
        the cropped image
    """
    y, x = image.shape
    start_x = x // 2 - (width // 2)
    start_y = y // 2 - (height // 2)
    return image[start_y:start_y + height, start_x:start_x + width]


def augment_image(image: Image, label: Image, output_size: Tuple[int, int] = (700, 700), rotation: bool = False, angle:int = 45) -> Tuple[np.array, np.array]:
    """ Augments an image and its label using the same transformations. First the images are padded by mirroring the
    image along the borders. This is to ensure that the pixels in the border region of the image will have enough
    context during convolution.

    If rotation=True, the images will be rotated with the specified angle.

    Elastic deformations are performed over a 3x3 grid. The displacements are sampled from a Gaussian distribution with
    a standard deviation of 10 pixels.

    Finally, the images are cropped to the output size to remove any artifacts on the image borders caused by the
    elastic deformation.

    Params
    ------
    image: Image
        a PIL.Image that represents the image to be augmented
    label: Image
        a PIL.Image that represents the label of the image to be augmented
    output_size: (int, int)
        represents the size the augmented images should be when they are returned
    rotation: bool
        whether to apply rotations on the image and label
    angle: int
        the angle the images should be rotated
    Returns
    ------
    image_array, label_array: Tuple[np.array, np.array]
        the augmented image and label
    """
    image_array = np.array(image)
    label_array = np.array(label)

    # Ensure there is enough padding in case output size = image size
    pad_width = output_size[0] - image_array.shape[0] + 200
    pad_height = output_size[1] - image_array.shape[1] + 200

    image_array = np.pad(image_array, pad_width=[(pad_width, pad_width), (pad_height, pad_height)], mode='symmetric')
    label_array = np.pad(label_array, pad_width=[(pad_width, pad_width), (pad_height, pad_height)], mode='symmetric')

    if rotation:
        image_array = rotate(image_array, angle=angle)
        label_array = rotate(label_array, angle=angle)

    image_array, label_array = elasticdeform.deform_random_grid([image_array, label_array], sigma=10, points=3,
                                                                order=[3, 0])

    image_array = crop_center(image_array, output_size[0], output_size[1])
    label_array = crop_center(label_array, output_size[0], output_size[1])

    return image_array, label_array


def augment_segmentation_dataset(factor: int, rotation: bool = False):
    """ Augments the ISBI Challenge Segmentation dataset.

    Params
    ------
    factor: int
        the factor with which the size of the dataset should increase
    rotation: bool
        whether to apply rotations on the images
    """
    train, train_label, test, test_label = make_data_dirs("segmentation_challenge_data", factor)

    training_image_volume = Image.open("segmentation_challenge_data/train-volume.tif")
    training_label_volume = Image.open("segmentation_challenge_data/train-labels.tif")

    images = []
    labels = []
    for image in ImageSequence.Iterator(training_image_volume):
        images.append(image.copy())

    for label in ImageSequence.Iterator(training_label_volume):
        labels.append(label.copy())

    augment_dataset(factor, images, labels, train, train_label, test, test_label, rotation)


def augment_phc_dataset(factor: int, rotation: bool = False):
    """ Augments the PhC-U373 dataset.

    Params
    ------
    factor: int
        the factor with which the size of the dataset should increase
    rotation: bool
        whether to apply rotations on the images
    """
    train, train_label, test, test_label = make_data_dirs("cell_tracking_challenge_data/PhC-C2DH-U373_train", factor)

    images = list(map(Image.open, glob.glob('cell_tracking_challenge_data/PhC-C2DH-U373_train/01/*.tif')))
    images.extend(list(map(Image.open, glob.glob('cell_tracking_challenge_data/PhC-C2DH-U373_train/02/*.tif'))))
    labels = list(map(Image.open, glob.glob('cell_tracking_challenge_data/PhC-C2DH-U373_train/01_GT/*.tif')))
    labels.extend(list(map(Image.open, glob.glob('cell_tracking_challenge_data/PhC-C2DH-U373_train/02_GT/*.tif'))))

    augment_dataset(factor, images, labels, train, train_label, test, test_label, rotation)


def augment_dic_dataset(factor: int, rotation: bool = False):
    """ Augments the DIC-HeLa dataset.

    Params
    ------
    factor: int
        the factor with which the size of the dataset should increase
    rotation: bool
        whether to apply rotations on the images
    """
    train, train_label, test, test_label = make_data_dirs("cell_tracking_challenge_data/DIC-C2DH-HeLa_train", factor)

    images = list(map(Image.open, glob.glob('cell_tracking_challenge_data/DIC-C2DH-HeLa_train/01/*.tif')))
    images.extend(list(map(Image.open, glob.glob('cell_tracking_challenge_data/DIC-C2DH-HeLa_train/02/*.tif'))))
    labels = list(map(Image.open, glob.glob('cell_tracking_challenge_data/DIC-C2DH-HeLa_train/01_GT/*.tif')))
    labels.extend(list(map(Image.open, glob.glob('cell_tracking_challenge_data/DIC-C2DH-HeLa_train/02_GT/*.tif'))))

    augment_dataset(factor, images, labels, train, train_label, test, test_label, rotation)


def augment_dataset(factor: int, images: [Image], labels: [Image], train_path: str, train_label_path: str,
                    test_path: str, test_label_path: str, rotation: bool = False):
    """ Augments a dataset by augmenting each image in the dataset a `factor` number of times. The dataset is split in
    a train and test set and all augmented images are saved in their respective folders.

    Params
    ------
    factor: int
        the factor with which the size of the dataset should increase
    images: [Image]
        an array of PIL.Image that represents the images to be augmented
    labels: [Image]
        an array of PIL.Image that represents the labels of the images to be augmented
    train_path: str
        the path the images from the training set should be saved to
    train_label_path: str
        the path the labels from the training set should be saved to
    test_path: str
        the path the images from the test set should be saved to
    test_label_path: str
        the path the labels from the test set should be saved to
    rotation: bool
        whether to apply rotations on the images
    """
    test_cut = math.floor(len(images) * 0.1)

    for i, (image, label) in enumerate(zip(images, labels)):
        if i < test_cut:
            for j in range(factor):
                augmented_image, augmented_label = augment_image(image, label, rotation=rotation)
                Image.fromarray(augmented_image).save(f"{test_path}/test_{j}_{i}.tif", "tiff")
                Image.fromarray(augmented_label).save(f"{test_label_path}/label_{j}_{i}.tif", "tiff")
        else:
            for j in range(factor):
                augmented_image, augmented_label = augment_image(image, label, rotation=rotation)
                Image.fromarray(augmented_image).save(f"{train_path}/train_{j}_{i}.tif", "tiff")
                Image.fromarray(augmented_label).save(f"{train_label_path}/label_{j}_{i}.tif", "tiff")


In [0]:
augment_segmentation_dataset(50)
augment_phc_dataset(50)
augment_dic_dataset(50)