## Stain normalization

In [None]:
# https://github.com/wanghao14/Stain_Normalization
!wget https://raw.githubusercontent.com/wanghao14/Stain_Normalization/master/stain_utils.py
!wget https://raw.githubusercontent.com/wanghao14/Stain_Normalization/master/stainNorm_Macenko.py
!pip install spams

--2022-12-22 07:35:23--  https://raw.githubusercontent.com/wanghao14/Stain_Normalization/master/stain_utils.py
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 4255 (4.2K) [text/plain]
Saving to: ‘stain_utils.py’


2022-12-22 07:35:23 (34.7 MB/s) - ‘stain_utils.py’ saved [4255/4255]

--2022-12-22 07:35:23--  https://raw.githubusercontent.com/wanghao14/Stain_Normalization/master/stainNorm_Macenko.py
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.110.133, 185.199.111.133, 185.199.109.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.110.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 2663 (2.6K) [text/plain]
Saving to: ‘stainNorm_Macenko.py’


2022-12-22 07:35:23 (34.3 

In [25]:
# !pwd

In [26]:
# %cd drive/MyDrive/nuclei_segmentation

In [27]:
# !ls

In [None]:
import os
import cv2
import glob
import shutil
import numpy as np
from tqdm import tqdm
from PIL import Image
import matplotlib.pyplot as plt

import stain_utils as utils
import stainNorm_Macenko

In [None]:
def  macenko_normalize(img_dir: str, ref_img_dir:str):
    # read image
    img = utils.read_image(img_dir)
    n = stainNorm_Macenko.Normalizer()

    # fit macenko normallizer on reference image
    n.fit(np.array(Image.open(ref_img_dir))) 

    # stain normalize H&E image
    normalized_img = n.transform(img)

    return normalized_img

In [None]:
def create_path(path):
    if not os.path.exists(path):
        os.makedirs(path)

In [None]:
train_dir = "dataset/monuseg/original/train/tissue_images"
test_dir = "dataset/monuseg/original/test/tissue_images"

In [None]:
train_stain_normalized_images_path = "dataset/monuseg/stain_normalized/train/tissue_images"
create_path(train_stain_normalized_images_path)

test_stain_normalized_images_path = "dataset/monuseg/stain_normalized/test/tissue_images"
create_path(test_stain_normalized_images_path)

# reference image path 
ref_img_dir = "dataset/monuseg/original/train/tissue_images/TCGA-AR-A1AS-01Z-00-DX1.tif"

In [None]:
for image_path in tqdm(glob.glob(os.path.join(train_dir, "*")), total=len(os.listdir(train_dir))):
    name = os.path.basename(image_path)
    normI= macenko_normalize(image_path, ref_img_dir)
    normI = Image.fromarray(normI.astype(np.uint8))
    normI.save(os.path.join(train_stain_normalized_images_path, name))

100%|██████████| 30/30 [01:53<00:00,  3.79s/it]


In [None]:
for image_path in tqdm(glob.glob(os.path.join(test_dir, "*")), total=len(os.listdir(test_dir))):
    name = os.path.basename(image_path)
    normI= macenko_normalize(image_path, ref_img_dir)
    normI = Image.fromarray(normI.astype(np.uint8))
    normI.save(os.path.join(test_stain_normalized_images_path, name))

100%|██████████| 14/14 [00:49<00:00,  3.54s/it]


## Modifying GT

In [None]:
from xml.dom import minidom
from skimage.draw import polygon, polygon_perimeter

In [None]:
# https://github.com/rshwndsz/hover-net
# generate binary masks
def generate_labelled_array(xml_file, shape, binary=True):
    """
    Given the image shape and path to annotations(xml file),
    generate a bit mask with the region inside a contour being white
    shape: The image shape on which bit mask will be made
    xml_file: path relative to the current working directory
    where the xml file is present
    Returns: A image of given shape with region inside contour being white..
    """
    # DOM object created by minidom
    xDoc = minidom.parse(xml_file)

    # list of all region tags
    regions = xDoc.getElementsByTagName("Region")

    # List which will store the vertices for each region
    xy = []
    for region in regions:
        # Loading all the vertices in the region
        vertices = region.getElementsByTagName("Vertex")
        # Vertices of a region will be stored in an array
        vw = np.zeros((len(vertices), 2))

        for index, vertex in enumerate(vertices):
            # Storing the values of x and y coordinate
            vw[index][0] = float(vertex.getAttribute("X"))
            vw[index][1] = float(vertex.getAttribute("Y"))

        # Append the vertices of a region
        xy.append(np.int32(vw))

    # Creating a completely black image
    mask = np.zeros(shape, np.float32)
    # generate the bit mask
    for i, contour in enumerate(xy):
        r, c = polygon(np.array(xy[i])[:, 1]-1, np.array(xy[i])[:, 0]-1, shape=shape)
        if binary:
            mask[r, c] = 1
        else:
            mask[r, c] = i
    return mask

# https://github.com/bnsreenu/python_for_microscopists/blob/master/tips_tricks_31_generating_borders_around_objects.py
# a function to generate border
def generate_boarder(_mask, boarder_size=5, n_erosions=1):
    # Define a kernel for erosion
    erosion_kernel = np.ones((3, 3), dtype=np.uint8)
    eroded_mask = cv2.erode(_mask, erosion_kernel, iterations=n_erosions)

    # Define kernel size for dilation
    kernel_size = 2 * boarder_size + 1
    dilation_kernel = np.ones((kernel_size, kernel_size), dtype=np.uint8)
    dilated_mask = cv2.dilate(eroded_mask, dilation_kernel, iterations=1)

    dilated_127 = np.where(dilated_mask == 255, 127, 0)

    mask_with_boarders = np.where(eroded_mask > 0, 255, dilated_127)

    return mask_with_boarders

def generate_masks_with_boarders(xml_file, shape):
    """
    Given the image shape and path to annotations(xml file),
    generate a bit mask with the region inside a contour being white
    this function will remove overlapping areas and save the genrated mask
    shape: The image shape on which bit mask will be made
    xml_file: path relative to the current working directory
    where the xml file is present
    save_dir: directory to save the masks
    """
    xDoc = minidom.parse(xml_file)

    # list of all region tags
    regions = xDoc.getElementsByTagName("Region")

    # List which will store the vertices for each region
    xy = []
    for region in regions:
        # Loading all the vertices in the region
        vertices = region.getElementsByTagName("Vertex")
        # Vertices of a region will be stored in an array
        vw = np.zeros((len(vertices), 2))

        for index, vertex in enumerate(vertices):
            # Storing the values of x and y coordinate
            vw[index][0] = float(vertex.getAttribute("X"))
            vw[index][1] = float(vertex.getAttribute("Y"))

        # Append the vertices of a region
        xy.append(np.int32(vw))

    # Creating a completely black image
    mask = np.zeros(shape, np.float32)
    # generate the bit mask
    for i, contour in enumerate(xy):
        try:
            r1, c1 = polygon(np.array(xy[i])[:, 1], np.array(xy[i])[:, 0], shape=shape)
            mask[r1, c1] = 1

            r2, c2 = polygon_perimeter(np.array(xy[i])[:, 1], np.array(xy[i])[:, 0], shape=shape)
            mask[r2, c2] = 0
        except:
            continue

    # remove overlapping areas
    mask[mask == 1] = 255
    mask = generate_boarder(mask)

    return mask

In [None]:
train_xml_dir = "dataset/monuseg/original/train/annotations"
test_xml_dir = "dataset/monuseg/original/test/annotations"
shape = (1000, 1000)

In [None]:
train_instance_mask_path = "dataset/monuseg/stain_normalized/train/instance_masks"
create_path(train_instance_mask_path)
train_binary_mask_path = "dataset/monuseg/stain_normalized/train/binary_masks"
create_path(train_binary_mask_path)
train_modified_mask_path = "dataset/monuseg/stain_normalized/train/modified_masks"
create_path(train_modified_mask_path)

test_instance_mask_path = "dataset/monuseg/stain_normalized/test/instance_masks"
create_path(test_instance_mask_path)
test_binary_mask_path = "dataset/monuseg/stain_normalized/test/binary_masks"
create_path(test_binary_mask_path)
test_modified_mask_path = "dataset/monuseg/stain_normalized/test/modified_masks"
create_path(test_modified_mask_path)

In [None]:
for xml_path in tqdm(glob.glob(os.path.join(train_xml_dir, "*")), total=len(os.listdir(train_xml_dir))):
    name = os.path.basename(xml_path)

    binary_mask = generate_labelled_array(xml_path, shape, binary=True)
    cv2.imwrite(os.path.join(train_binary_mask_path, name.replace("xml", "png")), binary_mask*255)

    instance_mask = generate_labelled_array(xml_path, shape, binary=False)
    np.save(os.path.join(train_instance_mask_path, name.replace("xml", "npy")), instance_mask)

    modified_mask = generate_masks_with_boarders(xml_path, shape)
    cv2.imwrite(os.path.join(train_modified_mask_path, name.replace("xml", "png")), modified_mask)

100%|██████████| 30/30 [07:15<00:00, 14.51s/it]


In [None]:
for xml_path in tqdm(glob.glob(os.path.join(test_xml_dir, "*")), total=len(os.listdir(test_xml_dir))):
    name = os.path.basename(xml_path)

    binary_mask = generate_labelled_array(xml_path, shape, binary=True)
    cv2.imwrite(os.path.join(test_binary_mask_path, name.replace("xml", "png")), binary_mask*255)

    instance_mask = generate_labelled_array(xml_path, shape, binary=False)
    np.save(os.path.join(test_instance_mask_path, name.replace("xml", "npy")), instance_mask)

    modified_mask = generate_masks_with_boarders(xml_path, shape)
    cv2.imwrite(os.path.join(test_modified_mask_path, name.replace("xml", "png")), modified_mask)

100%|██████████| 14/14 [01:12<00:00,  5.17s/it]
