## The aim is to register each tissue images in the stained and unstained WSIs. To do this, we first need to crop and save each individual tissue in each WSI and save them as a png file. Then we can pad the two images together and register them by editing the registration code from image_registration.ipynb.
## The path of the images are in: \\shelter\Kyu\unstain2stain\biomax_images

In [1]:
import numpy as np
import time
from PIL import Image
Image.MAX_IMAGE_PIXELS = None
import cv2
import os
from matplotlib import pyplot as plt
from skimage.morphology import remove_small_objects, remove_small_holes
from skimage.color import rgb2gray
from skimage.transform import warp
from skimage.registration import optical_flow_tvl1, optical_flow_ilk
import openslide
import os
import cv2
os.environ["OPENCV_IO_MAX_IMAGE_PIXELS"] = pow(2,40).__str__()
import xml.etree.ElementTree as ET
import numpy as np
import pandas as pd
import skimage.measure
import scipy.stats as stats
import torchvision.transforms as transforms
from glob import glob
from time import time
from skimage.measure import label, regionprops_table


In [2]:
def _time(f):
    """
    Helper function: Measures time taken for a certain function that we suspect that takes long. Simply run _time(function_name)
    """
    def wrapper(*args,**kwargs):
        start=time()
        r=f(*args,**kwargs)
        end=time()
        print("%s timed %f" %(f.__name__,end-start))
        return r
    return wrapper

In [3]:
def xml_to_df(xml_filepath):
    """
    Helper function from xml2trainingdata.ipynb
    input: annotation xml file
    output: dataframe of X,Y coordinates of the contours
    Can be used for multiclass and just simple ROI as well.
    """
    tree = ET.parse(xml_filepath)
    root = tree.getroot()
    append_df = []
    for index, Annotation in enumerate(root.iter("Annotation")):
        for Region in Annotation.iter('Region'):
            x = np.array([Vertex.get('X') for Vertex in Region.iter('Vertex')])
            y = np.array([Vertex.get('Y') for Vertex in Region.iter('Vertex')])
            id = np.array([int(Region.get('Id'))])
            classnames = index + 1
            coord_dict = {"ClassNames": [classnames], "X": [x], "Y": [y], "ID": [id]}
            df = pd.DataFrame(data = coord_dict)
            df.ID = df.ID.astype(int)
            append_df.append(df)
    coord_df = pd.concat(append_df).reset_index(drop=True)
    return(coord_df)

In [4]:
def df_to_image_mask(xml_filepath, ndpi_filepath,downsample_factor, general_save_path):
    """
    Helper function from xml2trainingdata.ipynb
    input: xml file path and ndpi file path of the wsi image, and the desired downsample factor
    output: binary mask of the wsi image
    """
    coord_df = xml_to_df(xml_filepath)
    slide = openslide.open_slide(ndpi_filepath)
    rgb_dim = slide.dimensions
    print("Dimension of level 0 of the image is: ",rgb_dim)
    slide_level_dim = slide.level_dimensions
    num_levels = len(slide_level_dim)
    print("Number of levels in this image are:",num_levels)
    factors = slide.level_downsamples
    print("Each level is downsampled by:",factors)
    target_level = slide.get_best_level_for_downsample(downsample_factor)
    target_dim = slide.level_dimensions[target_level]
    rsf = [x/y for x,y in zip(rgb_dim,target_dim)]
    blank = np.zeros((target_dim[1],target_dim[0]), dtype = np.uint8) #white
    for idx, row in coord_df.iterrows():
        xx = row.X.astype('int')
        yy = row.Y.astype('int')
        xx = [round(x/rsf[0]) for x in xx]
        yy = [round(x/rsf[0]) for x in yy]
        xy = list(zip(xx,yy))
        contours = np.array(xy)
        mask = cv2.fillPoly(blank, pts=[contours.astype(int)], color=255)
    # image_resized = slide.read_region(location=(0,0),level=target_level,size=target_dim)
    # mask_resized = cv2.resize(mask,[_//round(rsf[0]) for _ in mask.shape],interpolation=cv2.INTER_NEAREST)
    # mask = np.rot90(mask, k = 1, axes= (1,0))
    tmp = os.path.basename(ndpi_filepath)
    ndpi_name = os.path.splitext(tmp)[0]
    save_path = os.path.join(general_save_path,ndpi_name + ".png")
    cv2.imwrite(save_path,mask) #save binary mask
    return ("Image saved successfully!")

### for stained: downsample by 8x to find the bounding box for each tissue image in WSI, and then get those bounding box coordinates multiplied by 8x again, and use those coordinates to read region with no downsample, but only that region of interest and then crop. (modify read_region function to read at full resolution, but only the bounding box by changing location and size). But use binary mask of the unstained to find the order, so we can save the images from top left to bottom right.

### for unstained: use xml coordinates and just find the bounding box from there and then read region with no downsample and crop. But use binary mask of the unstained to find the order, so we can save the images from top left to bottom right.

### To find order: get both binary images/masks of the raw ndpi, and because their relative positions are so different, we crop both images so that only the tissues are shown, meaning that there is no "white space" in the image. And then if we normalize the centroids with respect to the size of the cropped image, the centroids should be the same

In [7]:
# use df_to_image_mask to create binarymask of unstained with downsample 16x:
df_to_image_mask(r'\\shelter\Kyu\unstain2stain\biomax_images\unstained\sk244a.xml', r'\\shelter\Kyu\unstain2stain\biomax_images\unstained\sk244a.ndpi',16,r'\\shelter\Kyu\unstain2stain\biomax_images\unstained\entire_binary_masks')
df_to_image_mask(r'\\shelter\Kyu\unstain2stain\biomax_images\unstained\sk481.xml', r'\\shelter\Kyu\unstain2stain\biomax_images\unstained\sk481.ndpi',16,r'\\shelter\Kyu\unstain2stain\biomax_images\unstained\entire_binary_masks')
df_to_image_mask(r'\\shelter\Kyu\unstain2stain\biomax_images\unstained\skn1001.xml', r'\\shelter\Kyu\unstain2stain\biomax_images\unstained\skn1001.ndpi',16,r'\\shelter\Kyu\unstain2stain\biomax_images\unstained\entire_binary_masks')

Dimension of level 0 of the image is:  (199680, 98560)
Number of levels in this image are: 8
Each level is downsampled by: (1.0, 2.0, 4.0, 8.0, 16.0, 32.0, 64.0, 128.0)
Dimension of level 0 of the image is:  (184320, 98560)
Number of levels in this image are: 8
Each level is downsampled by: (1.0, 2.0, 4.0, 8.0, 16.0, 32.0, 64.0, 128.0)
Dimension of level 0 of the image is:  (192000, 98560)
Number of levels in this image are: 8
Each level is downsampled by: (1.0, 2.0, 4.0, 8.0, 16.0, 32.0, 64.0, 128.0)


'Image saved successfully!'

In [5]:
# for stained:
def stained_crop(stain_ndpi_filepath, general_save_path, binary_mask_path, save = True):
    slide = openslide.open_slide(stain_ndpi_filepath)
    tmp = os.path.basename(stain_ndpi_filepath)
    ndpi_name = os.path.splitext(tmp)[0]

# since image_resized is read with downsample, now find the coordinates of the bounding box for each image
# to do that, first create a binary mask (which is already found in binary_image_path and then use label to find bbox of each label (image).

    binary_img = Image.open(binary_mask_path)
    labeledbw = label(np.array(binary_img))
    props = regionprops_table(labeledbw, properties = ('bbox','label'))
    # find bbox to crop
    b0 = props.get('bbox-0') #xmin
    b1 = props.get('bbox-1') #ymin
    b2 = props.get('bbox-2') #xmax
    b3 = props.get('bbox-3') #ymax
    xmin = np.min(b0)
    ymin = np.min(b1)
    xmax = np.max(b2)
    ymax = np.max(b3)
    newlabeledbw = labeledbw[xmin:xmax,ymin:ymax] #crop image so no white space
    size = newlabeledbw.shape
    plt.imshow(newlabeledbw.astype(np.int32))
    newprops = regionprops_table(newlabeledbw, properties = ('centroid','label'))
    c0 = newprops.get('centroid-0')
    c1 = newprops.get('centroid-1')
    c0 = [round(x / size[0],4) for x in c0]
    c1 = [round(x / size[1],4) for x in c1]
    stained_centroid_tuple = list(zip(c0,c1)) #return centroid_tuple so that this can be used for unstain
    id_list = np.arange(0,np.max(labeledbw))
    stained_centroid_tuple_list = list(zip(stained_centroid_tuple,id_list))
    if save:
        for id in range(1,np.max(labeledbw)):
            bwtarget = labeledbw == id
            ind = np.argwhere(bwtarget)
            xx_min = np.min(ind[:,1])
            xx_max = np.max(ind[:,1])
            yy_min = np.min(ind[:,0])
            yy_max = np.max(ind[:,0])
            x_min = round(xx_min*16) #downsample factor used to create binary_masks in both stained and unstained was 16.
            x_max = round(xx_max*16)
            y_min = round(yy_min*16)
            y_max = round(yy_max*16)
            location = (x_min,y_min)
            dim = (x_max-x_min,y_max-y_min)
            fin_image = slide.read_region(location= location,level=0,size=dim)
            fin_image = np.array(fin_image)
            save_path = os.path.join(general_save_path,ndpi_name)
            save_path = os.path.join(save_path, ndpi_name + str(id - 1) + ".png")
            Image.fromarray(fin_image).save(save_path)
        print("Image saved successfully!")
    else:
        return stained_centroid_tuple_list

In [82]:
stained_crop(r'\\shelter\Kyu\unstain2stain\biomax_images\stained\sk244ahe.ndpi', r'\\shelter\Kyu\unstain2stain\biomax_images\stained\images', r'\\shelter\Kyu\unstain2stain\biomax_images\stained\entire_binary_masks\sk244a.png', save = True)

Image saved successfully!


In [None]:
stained_crop(r'\\shelter\Kyu\unstain2stain\biomax_images\stained\sk481he.ndpi', r'\\shelter\Kyu\unstain2stain\biomax_images\stained\images', r'\\shelter\Kyu\unstain2stain\biomax_images\stained\entire_binary_masks\sk481.png', save = True)

In [None]:
stained_crop(r'\\shelter\Kyu\unstain2stain\biomax_images\stained\skn1001he.ndpi', r'\\shelter\Kyu\unstain2stain\biomax_images\stained\images', r'\\shelter\Kyu\unstain2stain\biomax_images\stained\entire_binary_masks\skn1001.png', save = True)

In [6]:
#compare the two centroids:
def find_id (list1, list2):
    if len(list1) != len(list2):
        print("Two lists must have the same length, please fix!")
        return
    else:
        difference_ra = []
        order_ra = []
        for idx in range(0,len(list1)):
            for idx1 in range(0,len(list1)):
                difference_tup = np.subtract(list1[idx][0],list2[idx1][0]) #list1 is reference.
                abs_difference = abs(difference_tup[0]) + abs(difference_tup[1])
                difference_ra.append(abs_difference)
            matched_id = np.argmin(difference_ra)
            difference_ra = []
            order_ra.append(matched_id)
    return list(order_ra)

In [7]:
def unstained_crop(xml_filepath, unstain_ndpi_filepath, stain_ndpi_filepath, unstained_binary_mask_filepath, stained_binary_mask_filepath, general_save_path):
    """
    Function to crop the unstained ndpi to individual tissue png files in order from top left to bottom right.
    input: xml file path, ndpi file path of the unstained version. Also need binary mask of the same ndpi file path, which can simply be generated by downsampling the raw file (done elsewhere, make sure to know downsample factor of the binary mask)
    output: the cropped images
    """
    coord_df = xml_to_df(xml_filepath)
    slide = openslide.open_slide(unstain_ndpi_filepath)
    rgb_dim = slide.dimensions
    print("Dimension of level 0 of the unstained image is: ",rgb_dim)
    slide_level_dim = slide.level_dimensions
    num_levels = len(slide_level_dim)
    print("Number of levels in this unstained image are:",num_levels)
    factors = slide.level_downsamples
    print("For unstained image, each level is downsampled by:",factors)
    tmp = os.path.basename(xml_filepath)
    ndpi_name = os.path.splitext(tmp)[0]
    stained_centroid_tuple_list = stained_crop(stain_ndpi_filepath, general_save_path, stained_binary_mask_filepath, save = False) #get centroid tuples for stained
    unstained_binary_mask = Image.open(unstained_binary_mask_filepath)
    unstained_binary_mask = np.array(unstained_binary_mask)
    dim = unstained_binary_mask.shape
    binary_img = unstained_binary_mask > 0
    labeledbw = label(np.array(binary_img))
    props = regionprops_table(labeledbw, properties = ('bbox','label'))
    # find bbox to crop
    b0 = props.get('bbox-0') #xmin
    b1 = props.get('bbox-1') #ymin
    b2 = props.get('bbox-2') #xmax
    b3 = props.get('bbox-3') #ymax
    xmin = np.min(b0)
    ymin = np.min(b1)
    xmax = np.max(b2)
    ymax = np.max(b3)
    cX_ra = []
    cY_ra = []
    for idx, row in coord_df.iterrows(): # now get centroid tuple for unstained first
        # to get centroid tuple:
        xx = row.X.astype('int')
        yy = row.Y.astype('int')
        x_ratio = ymin * 16
        xxx = [n - x_ratio for n in xx]
        y_ratio = xmin * 16
        yyy = [n - y_ratio for n in yy]
        xy = list(zip(xxx,yyy))
        xy = np.array(xy)
        moments = cv2.moments(xy)
        cX = int(moments["m10"] / moments["m00"])
        cY = int(moments["m01"] / moments["m00"])
        cX_ra.append(cX)
        cY_ra.append(cY)
    x_ratio1 = (dim[1]-(ymax-ymin)) * 16
    y_ratio1 = (dim[0]-(xmax-xmin)) * 16
    cX_ra1 = [round(p / (rgb_dim[0] - x_ratio1),4) for p in cX_ra]
    cY_ra1 = [round(p / (rgb_dim[1]- y_ratio1),4) for p in cY_ra]
    unstained_centroid_tuple = list(zip(cY_ra1,cX_ra1))
    id_list = np.arange(0,coord_df.shape[0])
    unstained_centroid_tuple_list = list(zip(unstained_centroid_tuple,id_list))
    # print(unstained_centroid_tuple_list)
    order = find_id(stained_centroid_tuple_list,unstained_centroid_tuple_list)
    print("order that is getting saved is {}".format(order))

    # to crop with same order as stained:
    for idx, _ in coord_df.iterrows():
        xx = coord_df.X[order[idx]].astype('int')
        yy = coord_df.Y[order[idx]].astype('int')
        x_min = np.min(xx)
        x_max = np.max(xx)
        y_min = np.min(yy)
        y_max = np.max(yy)
        location = (x_min,y_min)
        dim = (x_max-x_min,y_max-y_min)
        image_resized = slide.read_region(location= location,level=0,size=dim)
        image_resized = np.array(image_resized)
        final_img = Image.fromarray(image_resized)
        save_path = os.path.join(general_save_path,ndpi_name)
        save_path = os.path.join(save_path, ndpi_name + str(idx) + ".png")
        final_img.save(save_path)
    print("Images saved successfully!")


In [None]:
unstained_crop(r'\\shelter\Kyu\unstain2stain\biomax_images\unstained\sk244a.xml', r'\\shelter\Kyu\unstain2stain\biomax_images\unstained\sk244a.ndpi',r'\\shelter\Kyu\unstain2stain\biomax_images\stained\sk244ahe.ndpi',r'\\shelter\Kyu\unstain2stain\biomax_images\unstained\entire_binary_masks\sk244a.png',r'\\shelter\Kyu\unstain2stain\biomax_images\stained\entire_binary_masks\sk244a.png',r'\\shelter\Kyu\unstain2stain\biomax_images\unstained\images')

Dimension of level 0 of the unstained image is:  (199680, 98560)
Number of levels in this unstained image are: 8
For unstained image, each level is downsampled by: (1.0, 2.0, 4.0, 8.0, 16.0, 32.0, 64.0, 128.0)
order that is getting saved is [2, 0, 18, 24, 17, 22, 5, 3, 11, 20, 10, 19, 4, 9, 6, 12, 21, 14, 1, 7, 13, 15, 16, 8, 23]


In [None]:
unstained_crop(r'\\shelter\Kyu\unstain2stain\biomax_images\unstained\sk481.xml', r'\\shelter\Kyu\unstain2stain\biomax_images\unstained\sk481.ndpi',r'\\shelter\Kyu\unstain2stain\biomax_images\stained\sk481he.ndpi',r'\\shelter\Kyu\unstain2stain\biomax_images\unstained\entire_binary_masks\sk481.png',r'\\shelter\Kyu\unstain2stain\biomax_images\stained\entire_binary_masks\sk481.png',r'\\shelter\Kyu\unstain2stain\biomax_images\unstained\images')

In [None]:
unstained_crop(r'\\shelter\Kyu\unstain2stain\biomax_images\unstained\sk244a.xml', r'\\shelter\Kyu\unstain2stain\biomax_images\unstained\sk244a.ndpi',r'\\shelter\Kyu\unstain2stain\biomax_images\stained\sk244ahe.ndpi',r'\\shelter\Kyu\unstain2stain\biomax_images\unstained\entire_binary_masks\sk244a.png',r'\\shelter\Kyu\unstain2stain\biomax_images\stained\entire_binary_masks\sk244a.png',r'\\shelter\Kyu\unstain2stain\biomax_images\unstained\images')

In [3]:
# function to pad images to same size:
def pad_images_to_same_size(images):
    """
    :param images: sequence of images
    :return: list of images padded so that all images have same width and height (max width and height are used)
    """
    width_max = 0
    height_max = 0
    for img in images:
        h, w = img.shape[:2]
        width_max = max(width_max, w)
        height_max = max(height_max, h)

    images_padded = []
    for img in images:
        h, w = img.shape[:2]
        diff_vert = height_max - h
        pad_top = diff_vert//2
        pad_bottom = diff_vert - pad_top
        diff_hori = width_max - w
        pad_left = diff_hori//2
        pad_right = diff_hori - pad_left
        img_padded = cv2.copyMakeBorder(img, pad_top, pad_bottom, pad_left, pad_right, cv2.BORDER_CONSTANT, value=(255,255,255))
        assert img_padded.shape[:2] == (height_max, width_max)
        images_padded.append(img_padded)

    return images_padded

In [4]:
pad_images_to_same_size = _time(pad_images_to_same_size)
optical_flow_tvl1 = _time(optical_flow_tvl1)

In [None]:
# pad the two images:


### Now register the two padded images:

In [None]:
def registrate_two_images(reference_image_path, moving_image_path, save_path):
    """
    Note: The order of the files saved in the ref_img_path and mov_img_path must be the same so that you are registering the same images!
    """
    ref_img_path = [_ for _ in os.listdir(reference_image_path) if _.endswith(".png")]
    ref_img_path_complete = [os.path.join(ref_img_path, x) for x in ref_img_path]
    ref_img_path_1 = [x.replace('.png','') for x in ref_img_path]

    mov_img_path = [_ for _ in os.listdir(moving_image_path) if _.endswith(".png")]
    # mov_img_path_complete = [os.path.join(mov_img_path, x) for x in mov_img_path]
    # mov_img_path_1 = [x.replace('.png','') for x in mov_img_path]

    if int(len(ref_img_path)) != int(len(mov_img_path)):
        print("Number of images in reference and moving file paths are not equal, please fix and try again!")
        return

    num = int(len(ref_img_path))
    start = time()
    for idx in range(num,len(ref_img_path_complete)):
        if idx == len(ref_img_path_complete) - 1:
            break
        ref_img = np.array(Image.open(ref_img_path))
        mov_img = np.array(Image.open(mov_img_path))
        ref_img_g = rgb2gray(ref_img)
        mov_img_g = rgb2gray(mov_img)
        v, u = optical_flow_tvl1(ref_img_g, mov_img_g)
        nr, nc = ref_img_g.shape
        row_coords, col_coords = np.meshgrid(np.arange(nr), np.arange(nc),
                                             indexing='ij')
        mov_img_warp_ra =[]
        for i in range(3):
            mov_img_warp = warp(mov_img[:,:,i], np.array([row_coords + v, col_coords + u]),mode='edge')
            mov_img_warp_ra.append(mov_img_warp)
        r = np.array(mov_img_warp_ra[0]*255).astype('uint8')
        g = np.array(mov_img_warp_ra[1]*255).astype('uint8')
        b = np.array(mov_img_warp_ra[2]*255).astype('uint8')
        rgb = np.stack([r,g,b],axis=2)
        reg_img = Image.fromarray(rgb)
        reg_img.save(save_path + str(ref_img_path_1[idx]) + '.png')

    end = time()
    print("time it took to register: "+  str((end-start)/60) + " minutes")
