In [1]:
# Import necessary parameters 
import os
import glob
import cv2
import numpy as np
import openslide
import shutil
import matplotlib.pyplot as plt
from openslide import OpenSlide

Define parameters and function for contouring algorithm

In [2]:
# Parameters
bilateral1_args={"d":9,"sigmaColor":10000,"sigmaSpace":150}
bilateral2_args={"d":90,"sigmaColor":5000,"sigmaSpace":5000}
bilateral3_args={"d":90,"sigmaColor":10000,"sigmaSpace":10000}
bilateral4_args={"d":90,"sigmaColor":10000,"sigmaSpace":100}
thresh1_args={"thresh":0,"maxval":255,"type":cv2.THRESH_TRUNC+cv2.THRESH_OTSU}
thresh2_args={"thresh":0,"maxval":255,"type":cv2.THRESH_OTSU}

In [None]:
#define the function for contouring the lymph node units in a whole slide image (wsis)
def get_lymphnode_contours(wsi):
    

    #filter the colour
    #conver the image to HSV colour space
    img_hsv = cv2.cvtColor(wsi,cv2.COLOR_RGB2HSV)
    #define the lower and upper bounds of the red colours HSV
    lower_red = np.array([120,0,0])
    upper_red = np.array([180,255,255])

    #create mask to filter our red colour
    mask = cv2.inRange(img_hsv,lower_red,upper_red)

    #change back to RGB colour zone
    img_hsv = cv2.cvtColor(img_hsv,cv2.COLOR_HSV2RGB)
    
    #apply maks back to the original image
    m = cv2.bitwise_and(wsi,wsi,mask=mask)
    #get backround colour and to fill it back 
    im_fill = np.where(m==0,233,m) 
    #build black square same size as thumbnail
    mask = np.zeros(wsi.shape) 
    #convert mask to 2d (0,1)
    gray = cv2.cvtColor(im_fill,cv2.COLOR_BGR2GRAY)
    
    #generate the blur
    blur1 = cv2.bilateralFilter(np.bitwise_not(gray),**bilateral1_args)
    #step2: make the pixeldist and sigma space larger so that the content can be linked together
    blur2 = cv2.bilateralFilter(np.bitwise_not(blur1),**bilateral2_args)
    #step3: make each lymph node looks mor like a group
    blur3 = cv2.bilateralFilter(np.bitwise_not(blur2),**bilateral3_args)
    #step4: contain more color as possible
    blur4 = cv2.bilateralFilter(np.bitwise_not(blur3),**bilateral4_args)
    #invert the final blurred image
    blur_final = 255-blur4
    #threshold twice
    _,thresh = cv2.threshold(blur_final,**thresh1_args)
    _,thresh = cv2.threshold(thresh,**thresh2_args)
    #find contours
    contours, _ = cv2.findContours(thresh,cv2.RETR_EXTERNAL,cv2.CHAIN_APPROX_NONE)
    #filter small contours
    contours = list(filter(lambda x: cv2.contourArea(x) > 5000, contours))
    
    #return the filtered contours
    return contours

Get binary masks for each WSIs stored in a folder as .NDPI f

In [1]:
# Define the folder path with WSIs of NDPI format
folder_path = "path_to_folder/*"
image_paths = glob.glob(folder_path)


# Open list to store WSIs names 
wsis_names = []

# Loop through the list of WSIs paths
for name in image_paths:
    # Get the basename of the pathway (picture name)
    basename = os.path.basename(name)
    # Modify the name to exclude the ".ndpi" extension at the end
    mod_basename = basename[:-5]
    # Append the modified name to the list
    wsis_names.append(mod_basename)

# Check if the folder and the list have the same length
if len(image_paths) == len(wsis_names):
    print("Same length")
    print(len(image_paths))
    
# Zip the WSIs names and image paths
zipped = zip(image_paths,wsis_names)


# Store pathway to the output folder in variable
output_folder = "path/to/output/folder"

# Loop through the zipped list to create a binary mask for each image
for path, name in zipped: 
    # Open the NDPI file using openslide
    ndpi_file = openslide.OpenSlide(path)

    # Read a specific region from the image (e.g., the entire slide)
    image_data = ndpi_file.get_thumbnail(size = ndpi_file.level_dimensions[6])
    image_np = np.array(image_data.convert('RGB'))

    #checkpoint
    print("Converting image to numpy array", name)
    
    # Convert the image data to a NumPy array and get the size of the image
    image_np = np.array(image_data)

    # Close the NDPI file
    ndpi_file.close()

    # Checkpoint
    print("getting the contours", name)
    
    # Get the contours using the get_lymphnode_contours function
    contours = get_lymphnode_contours(image_np) 

    # Create an empty binary mask with the same dimensions as the original image (black image)
    binary_mask = np.zeros_like(image_np)
    
    # Draw the contours on the binary mask
    cv2.drawContours(binary_mask, contours, -1, (255, 255, 255), thickness=cv2.FILLED)

    # Generate a new file name for the masked image
    new_file_name = f"{name}_masked.png"

    # Save the binary mask to the destination folder
    cv2.imwrite(os.path.join(output_folder, new_file_name), binary_mask)
    
    # Print a message indicating that the file has been copied
    print("Done", name)

#print a final message when all WSIs are processed
print("All WSIs done")    

Same length
0
All WSIs done
