In [3]:
#importing openslide:
OPENSLIDE_PATH = r'C:\Users\Kevin\Desktop\openslide-win64-20221217\bin'

import os

if hasattr(os, 'add_dll_directory'):
    # Python >= 3.8 on Windows
    with os.add_dll_directory(OPENSLIDE_PATH):
        import openslide
else:
    import openslide

In [4]:
import numpy as np
import os
from PIL import Image
Image.MAX_IMAGE_PIXELS = None
import cv2
import os
from matplotlib import pyplot as plt
from skimage.morphology import remove_small_objects, remove_small_holes
from skimage.color import rgb2gray
from skimage.transform import warp
from skimage.registration import optical_flow_tvl1, optical_flow_ilk
import xml.etree.ElementTree as ET
import pandas as pd
import skimage.measure
import scipy.stats as stats
from time import time
from skimage.measure import label, regionprops_table

In [5]:
#stitch tiles into wsi
def stitch(tiles,img_height,img_width,img_height2,img_width2,channels):
    wsi = tiles.swapaxes(1,2)
    wsi = wsi.reshape(img_height2,img_width2,channels) #tiles are padded, so use padded image size to stitch
    wsi = wsi[:img_height,:img_width,:] #remove pad
    return np.squeeze(wsi)

In [None]:
# src = r'\\shelter\Kyu\unstain2stain\unstain2stain_wsi\Unstained'
# imnm = 'OTS_14684_3.ndpi'
# imobj = openslide.open_slide(os.path.join(src,imnm))
# imobj = imobj.read_region(location=(0,0),level=1,size=imobj.level_dimensions[1])
# imobj.level_dimensions[1]

In [6]:
#wsi to tiles
def reshape_split(image:np.ndarray,kernel_size:tuple):
    img_height,img_width,channels=image.shape
    tile_height,tile_width = kernel_size
    tiled_array = image.reshape(img_height//tile_height,
                                tile_height,
                                img_width//tile_width,
                                tile_width,
                                channels)
    tiled_array = tiled_array.swapaxes(1,2)
    return tiled_array

In [14]:
src = r'\\shelter\Kyu\unstain2stain\unstain2stain_wsi\Unstained'
imnm = 'OTS_14684_3.ndpi'
imobj = openslide.open_slide(os.path.join(src,imnm))
imobj = imobj.read_region(location=(0,0),level=1,size=imobj.level_dimensions[1])

# Image to Array
imnp = np.array(imobj)
imobj.close()
h,w,_=imnp.shape
tile_height, tile_width = (1024,1024)
# Padding
imnpr = np.pad(imnp, pad_width=[(0, tile_height-h%tile_height),(0, tile_width-w%tile_width),(0, 0)], mode='constant', constant_values=0)
# imnpr = imnpr / 127.5 - 1 #normalize [-1 1]
# imnpr = imnpr / 255 #normalize [0 1]
img_height2,img_width2,channels=imnpr.shape
# Tile
tiles = reshape_split(imnpr, (1024,1024))

In [15]:
save_src = r'\\shelter\Kyu\unstain2stain\tiles\not_registrated\Unstained'
for idx in range(0,tiles.shape[0]):
    for idx1 in range(0,tiles.shape[1]):
        tmp_tile = tiles[idx][idx1]
        save_path = os.path.join(save_src,str(idx)+str(idx1)+'tile'+'.png')
        Image.fromarray(tmp_tile).save(save_path)

In [16]:
src = r'\\shelter\Kyu\unstain2stain\unstain2stain_wsi\HE'
imnm = 'OTS_14684_3_he.ndpi'
imobj = openslide.open_slide(os.path.join(src,imnm))
imobj = imobj.read_region(location=(0,0),level=1,size=imobj.level_dimensions[1])

# Image to Array
imnp = np.array(imobj)
imobj.close()
h,w,_=imnp.shape
tile_height, tile_width = (1024,1024)
# Padding
imnpr = np.pad(imnp, pad_width=[(0, tile_height-h%tile_height),(0, tile_width-w%tile_width),(0, 0)], mode='constant', constant_values=0)
# imnpr = imnpr / 127.5 - 1 #normalize [-1 1]
# imnpr = imnpr / 255 #normalize [0 1]
img_height2,img_width2,channels=imnpr.shape
# Tile
tiles = reshape_split(imnpr, (1024,1024))

In [18]:
save_src = r'\\shelter\Kyu\unstain2stain\tiles\not_registrated\HE'
for idx in range(0,tiles.shape[0]):
    for idx1 in range(0,tiles.shape[1]):
        tmp_tile = tiles[idx][idx1]
        save_path = os.path.join(save_src,str(idx)+str(idx1)+'tile'+'.png')
        Image.fromarray(tmp_tile).save(save_path)

### The tile generation code above is unregistered, I ran a code in matlab to generate the registered tiles, which have almost the same # of tiles, except that there are some missing images. Find the difference:

In [23]:
unstain_tile_src = r'\\shelter\Kyu\unstain2stain\tiles\registrated_tiles\HE\OTS_14832_3_he'
stain_tile_src = r'\\shelter\Kyu\unstain2stain\tiles\registrated_tiles\Unstained\OTS_14832_3'

In [27]:
unstain_tile_name = os.listdir(unstain_tile_src)
stain_tile_name = os.listdir(stain_tile_src)

In [31]:
print(len(stain_tile_name))
print(len(unstain_tile_name))

10996
10999


In [43]:
unstain_tile_base = [os.path.basename(x) for x in unstain_tile_name]
stain_tile_base = [os.path.basename(x) for x in stain_tile_name]
unstain_list = [ x[-10:-4] for x in unstain_tile_base]
stain_list = [ x[-10:-4] for x in stain_tile_base]

In [44]:
not_in_stain = [x for x in unstain_list if x not in stain_list]
not_in_stain

['xy0143', 'xy0718', 'xy1118']

### Found the three differences- delete them from unstained, and run inference on pix2pix project. After inference, stitch back image:

In [7]:
inferred_src = r'C:\Users\Kevin\PycharmProjects\pix2pix\pytorch-CycleGAN-and-pix2pix\results\unstain2stain_pix2pix\test_pix2pix_latest\images'
inferred_list = os.listdir(inferred_src)
inferred_name = [os.path.basename(x) for x in inferred_list]
real_inferred2 = [x for x in inferred_name if x[-10:-4]  == 'real_B']
real_inferred_src = [os.path.join(inferred_src,x) for x in real_inferred2]
print(len(real_inferred_src)) #10995

10995


In [None]:
all_tile_ra = np.array(Image.open(real_inferred_src[0]))
for idx in range(1,len(real_inferred_src)):
    tile_img = Image.open(real_inferred_src[idx])
    tile_ra = np.array(tile_img)
    all_tile_ra = np.concatenate((all_tile_ra,tile_ra),axis=0)
all_tile_ra

In [None]:
all_tile_ra.shape

In [None]:
stitch(all_tile_ra,78848,99840,1024,1024,3)

In [11]:
### Try registering the two tiles with optical flow to compare with matlab rigid registration:
def registrate_two_images(reference_image_path, moving_image_path, save_path):
    """
    Note: The order of the files saved in the ref_img_path and mov_img_path must be the same so that you are registering the same images!
    """
    ref_img_path = [_ for _ in os.listdir(reference_image_path) if _.endswith(".png")]
    ref_img_path_complete = [os.path.join(reference_image_path, x) for x in ref_img_path]
    mov_img_path = [_ for _ in os.listdir(moving_image_path) if _.endswith(".png")]
    mov_img_path_complete = [os.path.join(moving_image_path, x) for x in mov_img_path]
    mov_img_name = [x.replace('.png','') for x in mov_img_path]
    if int(len(ref_img_path)) != int(len(mov_img_path)):
        print("Number of images in reference and moving file paths are not equal, please fix and try again!")
        return

    start = time()
    for idx in range(0,len(ref_img_path_complete)):
        ref_img = Image.open(ref_img_path_complete[idx])
        mov_img = Image.open(mov_img_path_complete[idx])
        ref_img = np.array(ref_img)
        mov_img = np.array(mov_img)
        ref_img_g = cv2.cvtColor(ref_img,cv2.COLOR_RGBA2GRAY)
        mov_img_g = cv2.cvtColor(mov_img,cv2.COLOR_RGBA2GRAY)
        v, u = optical_flow_tvl1(ref_img_g, mov_img_g)
        nr, nc = ref_img_g.shape
        row_coords, col_coords = np.meshgrid(np.arange(nr), np.arange(nc),
                                             indexing='ij')
        mov_img_warp_ra =[]
        for i in range(3):
            mov_img_warp = warp(mov_img[:,:,i], np.array([row_coords + v, col_coords + u]),mode='edge')
            mov_img_warp_ra.append(mov_img_warp)
        r = np.array(mov_img_warp_ra[0]*255).astype('uint8')
        g = np.array(mov_img_warp_ra[1]*255).astype('uint8')
        b = np.array(mov_img_warp_ra[2]*255).astype('uint8')
        rgb = np.stack([r,g,b],axis=2)
        reg_img = Image.fromarray(rgb)
        print(idx)
        reg_img.save(os.path.join(save_path,str(mov_img_name[idx]) + '.png'))

    end = time()
    print("time it took to register: "+  str((end-start)/60) + " minutes")


In [None]:
### Possible dummy code to recognize H&E images and discard images that are not H&E:

In [None]:
import cv2
import os

# Set path to the folder containing the images
image_folder = 'path/to/folder'

# Set threshold for H&E stained images (adjust as needed)
threshold = 0.1

# Loop through each image in the folder
for image_file in os.listdir(image_folder):
    # Read the image
    image = cv2.imread(os.path.join(image_folder, image_file))

    # Convert the image to grayscale
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

    # Calculate the blue-green ratio (BGR) using the H&E staining method
    blue = image[:, :, 0].astype(float)
    green = image[:, :, 1].astype(float)
    red = image[:, :, 2].astype(float)
    bgr = (blue - (red + green) / 2) / (blue + green + red)
    bgr_mean = bgr.mean()

    # If the BGR ratio is below the threshold, delete the image
    if bgr_mean < threshold:
        os.remove(os.path.join(image_folder, image_file))


In [None]:
### Code to drop out low MI tiles: