In [1]:
%matplotlib inline

from pathlib import Path
import itertools
import shutil

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import tifffile
import joblib
import cv2
import tqdm

import read_roi

import sys
sys.path.append("../../../")
import maskflow
from maskflow import simuscope

import imgaug as ia
from imgaug import augmenters as iaa

root_dir = Path("/home/hadim/.data/Neural_Network/Maskflow/Microtubule")

data_dir = root_dir / "Data"
data_dir.mkdir(parents=True, exist_ok=True)

image_dir = data_dir / "Image"
image_dir.mkdir(parents=True, exist_ok=True)
mask_dir = data_dir / "Mask"
mask_dir.mkdir(parents=True, exist_ok=True)
class_dir = data_dir / "Class"
class_dir.mkdir(parents=True, exist_ok=True)

config = maskflow.load_config("config.yml")
class_names = config["CLASS_NAMES"]

# Copy config next to data folder
maskflow.save_config(config, root_dir / "config.yml")

# When drawing microtubule on the mask
line_thickness = 4

n_augm = 25

Using TensorFlow backend.


# Dataset

The dataset consists of a root folder containing 3 sub-folders:

- `Image/`: it contains the original images stored as TIF files. Shape is [H, W, C].
- `Mask/`: it contains the masks stored as TIF files. Shape is [H, W, N].
- `Class/`: Class ids as csv files.

## Generate Fake Microtubule Image

In [2]:
model_name = "simple_microtubule"
model = simuscope.Model.load_model(model_name)

model.acquisition.n_frames = 1
model.acquisition.channels.pop("channel_2")

builder = model.get_builder()
print(builder)

# Setup image generation parameter
snr_range = np.arange(1.3, 4, 0.6)
n_mts_range = np.arange(10, 200, 50)
n = 50

size_range = (512, 1280)
length_loc = 6
length_scale = 5

total_images = snr_range.shape[0] * n_mts_range.shape[0] * n
print(total_images)

Image shape: (1, 1, 512, 512)
Image memory size: 2.00 MB
Channels: ['channel_1']
Objects: [<maskflow.simuscope.builder.object_builder.microtubule_builder.SimpleMicrotubuleBuilder object at 0x7f72501fb550>]

1000


In [3]:
# Useful functions

def get_line(x):
    d = {}
    d["start_x"] = x[x.type == "seed"]["start_x"].values[0]
    d["start_y"] = x[x.type == "seed"]["start_y"].values[0]
    d["end_x"] = x[x.type == "seed"]["end_x"].values[0]
    d["end_y"] = x[x.type == "seed"]["end_y"].values[0]
    return pd.DataFrame([d])


def draw_line(image, line, line_thickness):
    line = np.round(line).astype("int16")
    p1 = (line["start_x"], line["start_y"])
    p2 = (line["end_x"], line["end_y"])
    _, p1, p2 = cv2.clipLine((0, 0, image.shape[0], image.shape[1]), p1, p2)
    image = cv2.line(image, p1, p2, (1,), line_thickness)
    return image


def convert_rois_to_json(rois):
    json_roi = {"microtubule": {}}
    mt = json_roi["microtubule"]
    mt["end_x"] = {}
    mt["end_y"] = {}
    mt["frame"] = {}
    mt["mt_id"] = {}
    mt["start_x"] = {}
    mt["start_y"] = {}
    mt["type"] = {}

    for i, (roi_name, roi) in enumerate(rois.items()):
        mt["type"][str(i)] = "seed"
        mt["frame"][str(i)] = 0
        mt["mt_id"][str(i)] = i
        
        if "x1" in roi.keys():
            mt["end_x"][str(i)] = roi["x2"]
            mt["end_y"][str(i)] = roi["y2"]
            mt["start_y"][str(i)] = roi["y1"]
            mt["start_x"][str(i)] = roi["x1"]
        else:
            mt["end_x"][str(i)] = roi["x"][-1]
            mt["end_y"][str(i)] = roi["y"][-1]
            mt["start_y"][str(i)] = roi["y"][0]
            mt["start_x"][str(i)] = roi["x"][0]
    return mt

In [3]:
# Generate the dataset

def create(*args):
    snr, n_mts = args[0]
    
    model.acquisition.channels["channel_1"].snr = snr
    mt_obj = model.objects["microtubule"]
    mt_obj.parameters["nucleation_rate"]["parameters"]["loc"] = 0
    mt_obj.parameters["n_microtubules"]["parameters"]["loc"] = n_mts
    mt_obj.parameters["initial_length"]["parameters"]["loc"] = length_loc
    mt_obj.parameters["initial_length"]["parameters"]["scale"] = length_scale

    for i in range(n):
        basename = f"image_snr_{snr:.1f}_n-mts_{n_mts}_id_{i}"

        random_size = np.random.randint(*size_range)
        model.microscope.camera.chip_size_height = random_size
        model.microscope.camera.chip_size_width = random_size
        
        builder = model.get_builder()
        images = builder.build(keep_images=False)
        
        # Save image
        builder.save_image(str(image_dir / (basename + ".tif")))
        
        objects = builder.get_objects_as_dict()
        n_objects = len(objects["microtubule"]["mt_id"])

        # All object are of class "1" for a microtubule
        class_ids = np.repeat(1, n_objects)

        # Save class ids
        class_ids_path = class_dir / (basename + ".csv")
        pd.Series(class_ids).to_csv(class_ids_path, index=False)
        
        data = pd.DataFrame.from_dict(objects["microtubule"])

        width = model.microscope.camera.chip_size_width
        height = model.microscope.camera.chip_size_height

        lines = data.groupby("mt_id").apply(get_line).reset_index(drop=True)
        count = lines.shape[0]

        mask = np.zeros((width, height, count), dtype=np.uint8)
        for i, line in lines.iterrows():
            mask[:, :, i] = draw_line(mask[:, :, i].copy(), line, line_thickness)

        # Handle occlusions
        handle_occlusion = True
        if handle_occlusion:
            occlusion = np.logical_not(mask[:, :, -1]).astype(np.uint8)
            for i in range(count - 2, -1, -1):
                mask[:, :, i] = mask[:, :, i] * occlusion
                occlusion = np.logical_and(occlusion, np.logical_not(mask[:, :, i]))

        # Save mask as tiff file
        mask_path = mask_dir / (basename + ".tif")
        tifffile.imsave(str(mask_path), mask)
    
parameters = list(itertools.product(snr_range, n_mts_range))
p = joblib.Parallel(n_jobs=8, verbose=1)
_ = p(map(joblib.delayed(create), parameters))

[Parallel(n_jobs=8)]: Done  20 out of  20 | elapsed: 22.5min finished


## Process Manually Annotated Dataset

Here we copy a manually annotated dataset to the final training dataset. We also use augmentation on the data.

In [4]:
manual_data_dir = root_dir / "Manual Training Dataset"
processed_data_dir = root_dir / "Manual Training Dataset/Processed"

fnames = [fname.with_suffix(".tif") for fname in processed_data_dir.glob("*.zip")]
#fnames = fnames[:5]

# Sometimes(0.5, ...) applies the given augmenter in 50% of all cases,
# e.g. Sometimes(0.5, GaussianBlur(0.3)) would blur roughly every second image.
sometimes = lambda aug: iaa.Sometimes(0.5, aug)

seq = iaa.Sequential(
    [
        # apply the following augmenters to most images
        iaa.Fliplr(0.5), # horizontally flip 50% of all images
        iaa.Flipud(0.5), # vertically flip 20% of all images
        
        sometimes(iaa.Affine(
            # scale images to 80-120% of their size, individually per axis
            scale={"x": (0.8, 1.2), "y": (0.8, 1.2)},
            # translate by -2 to +2 percent (per axis)
            translate_percent={"x": (-0.02, 0.02), "y": (-0.02, 0.02)},
            # rotate by -15 to +15 degrees
            rotate=(-15, 15),
            # shear by -5 to +5 degrees
            shear=(-5, 5),
            # use nearest neighbour or bilinear interpolation (fast)
            order=[0, 1],
            # use any of scikit-image's warping modes (see 2nd image from the top for examples)
            mode=ia.ALL 
        )),
    ])

for fname in tqdm.tqdm(fnames, total=len(fnames)):
    
    basename = fname.stem
    
    # Copy image file
    new_image_path = shutil.copy(fname, image_dir)
    
    # Convert ZIP rois to JSON rois
    rois = read_roi.read_roi_zip(fname.with_suffix(".zip"))
    objects = convert_rois_to_json(rois)

    im = tifffile.imread(str(fname))
    width = im.shape[1]
    height = im.shape[0]

    data = pd.DataFrame.from_dict(objects)
    
    lines = data.groupby("mt_id").apply(get_line).reset_index(drop=True)
    count = lines.shape[0]

    mask = np.zeros((height, width, count), dtype=np.uint8)
    for i, line in lines.iterrows():
        mask[:, :, i] = draw_line(mask[:, :, i].copy(), line, line_thickness)

    # Handle occlusions
    handle_occlusion = True
    if handle_occlusion:
        occlusion = np.logical_not(mask[:, :, -1]).astype(np.uint8)
        for i in range(count - 2, -1, -1):
            mask[:, :, i] = mask[:, :, i] * occlusion
            occlusion = np.logical_and(occlusion, np.logical_not(mask[:, :, i]))

    # All object are of class "1" for a microtubule
    class_ids = np.repeat(1, mask.shape[-1])
            
    # Save mask as tiff file
    mask_path = mask_dir / (basename + ".tif")
    tifffile.imsave(str(mask_path), mask)
    
    # Save class ids 
    class_ids_path = class_dir / (basename + ".csv")
    pd.Series(class_ids).to_csv(class_ids_path, index=False)
    
    # If th mask is too lage we just copy the datum instead of augmenting it to avoid memory error.
    if mask.shape[-1] < 500:
        
        # Augmentation: we create new images from the above one.
        for i in range(n_augm):
            new_image_path = image_dir / (basename + f"_AUGMENTED_{i}.tif")
            new_mask_path = mask_dir / (basename + f"_AUGMENTED_{i}.tif")
            new_class_ids_path = class_dir / (basename + f"_AUGMENTED_{i}.csv")

            seq_det = seq.to_deterministic()

            new_im = seq_det.augment_image(im)
            new_mask = seq_det.augment_image(mask)

            tifffile.imsave(str(new_image_path), new_im)
            tifffile.imsave(str(new_mask_path), new_mask)
            pd.Series(class_ids).to_csv(new_class_ids_path, index=False)
            
    else:
        
        for i in range(n_augm):
            new_image_path = image_dir / (basename + f"_AUGMENTED_{i}.tif")
            new_mask_path = mask_dir / (basename + f"_AUGMENTED_{i}.tif")
            new_class_ids_path = class_dir / (basename + f"_AUGMENTED_{i}.csv")
            
            shutil.copyfile(str(fname), str(new_image_path))
            shutil.copyfile(str(mask_path), str(new_mask_path))
            shutil.copyfile(str(class_ids_path), str(new_class_ids_path))

  6%|▌         | 1/17 [00:13<03:41, 13.83s/it]

KeyboardInterrupt: 