# Script to generate binary masks of each cell-type
# Created by Ruchika Verma

This code will create separate folder for each patient and subfolders for annotated images under each patient's folder.

Each sub-folder corresponding to sub-images under each patient will contain 4 sub-sub-folders (Epithelial, Lymphocyte, Neutrophil and Macrophage) to save their corresponding binary-masks with value 255 for each cell-type and background 0

# Input
data_path: Specify the path of downloaded images

destination_path = Specify the path to save corresponding binary masks

# Output
MoNuSAC_masks directory in the destination_path

Binary masks will be saved in each sub-sub-folder

Folder -- Patient name

Sub-folder -- Sub-images under each patient

Sub-Sub-folder -- Annotated cell-type on each sub-image


In [None]:
#Process whole slide images
import os
from xml.dom import minidom
import numpy as np
from glob import glob
import cv2
import matplotlib.pyplot as plt
import scipy.io as sio
from PIL import Image
import scipy
import scipy.ndimage
from shapely.geometry import Polygon
from skimage import draw
import xml.etree.ElementTree as ET

os.environ['PATH'] = 'F:/Programs/openslide-win64-20171122/bin' + ';' + os.environ['PATH']
import openslide
from openslide import open_slide

In [None]:
# Read svs files from the desired path
count = 0
data_path = 'f:/Projects/__running_projects/MoNuSAC/data/MoNuSAC_images_and_annotations' #Path to read data from
destination_path = 'f:/Projects/__running_projects/MoNuSAC/data/MoNuSAC_mrcnn_masks' # Path to save binary masks corresponding to xml files

try:
    os.makedirs(destination_path, exist_ok=True)
except OSError:
    print ("Creation of the mask directory %s failed" % destination_path)

patients = glob(data_path + '/*')
#Total patients in the data_path
print(len(patients))

In [None]:
patient_counter = 1
for patient_loc in patients:
#     print('******    %d   *******' % patient_counter)
#     patient_name = patient_loc[len(data_path)+1:]#Patient name
    patient_name = os.path.basename(patient_loc)#Patient name
    
    ## To make patient's name directory in the destination folder
    try:
        os.makedirs(os.path.join(destination_path, patient_name), exist_ok=True)
    except OSError:
        print("\n Creation of the patient's directory %s failed" % patient_name)
    else:
        print("Successfully created the directory %s " % patient_name)
        
    ## Read sub-images of each patient in the data path        
    sub_images = glob(patient_loc+'/*.svs')
    
    sub_images_counter = 1
    for sub_image_loc in sub_images:
        count = count+1
#         print("========= %d.%d ==========" % (patient_counter, sub_images_counter))
        sub_image_name, _ = os.path.splitext(os.path.basename(sub_image_loc))
        
        ## To make sub_image directory under the patient's folder
        sub_image_path = os.path.join(destination_path, patient_name, sub_image_name) #Destination path
        
        try:
            os.makedirs(sub_image_path, exist_ok=True)
        except OSError:
            print("\n Creation of the patient's directory %s failed" % sub_image_name)
        else:
            print("Successfully created the directory %s " % sub_image_name)
            
        image_name = sub_image_loc
        img = openslide.OpenSlide(image_name)
                                  
        label = 'Original'
        sub_path = os.path.join(sub_image_path, 'Original')
        try:
            os.makedirs(sub_path, exist_ok=True)
        except OSError:
            print ("Creation of the directory %s failed" % label)
        else:
            print ("Successfully created the directory %s " % label) 
        
        
        # If svs image needs to save in tif
        cv2.imwrite(os.path.join(sub_path,sub_image_name+'.tif'), np.array(img.read_region((0,0),0,img.level_dimensions[0])))      
   
        # Read xml file
        xml_file_name = sub_image_loc[:-4]+'.xml'
        tree = ET.parse(xml_file_name)
        root = tree.getroot()
        
        class_counter = 1
        #Generate binary mask for each cell-type                         
        for k in range(len(root)):
            
#             print("--------- %d.%d.%d -----------" % (patient_counter, sub_images_counter, class_counter))
            
            label = [x.attrib['Name'] for x in root[k][0]]
            label = label[0]
            
            for child in root[k]:
                mask_counter = 0
                for x in child:
                    r = x.tag
                    if r == 'Attribute':
                        
                        label = x.attrib['Name']
                        binary_mask = np.transpose(np.zeros((img.read_region((0,0),0,img.level_dimensions[0]).size))) 
                        
                        # Create directory for each label
                        sub_path = os.path.join(sub_image_path, label)
                        
                        try:
                            os.makedirs(sub_path, exist_ok=True)
                        except OSError:
                            print ("Creation of the directory %s failed" % label)
                        else:
                            print ("Successfully created the directory %s " % label) 
                                          
                        
                    if r == 'Region':
                        regions = []
                        vertices = x[1]
                        coords = np.zeros((len(vertices), 2))
                        for i, vertex in enumerate(vertices):
                            coords[i][0] = vertex.attrib['X']
                            coords[i][1] = vertex.attrib['Y']        
                        regions.append(coords)
                        poly = Polygon(regions[0])  
                        
                        vertex_row_coords = regions[0][:,0]
                        vertex_col_coords = regions[0][:,1]
                        fill_row_coords, fill_col_coords = draw.polygon(vertex_col_coords, vertex_row_coords, binary_mask.shape)
                        
                        mask = np.zeros(binary_mask.shape)
                        mask[fill_row_coords, fill_col_coords] = 255
                        m_path = os.path.join(sub_path, '{:03d}_mask.tif'.format(mask_counter))
                        cv2.imwrite(m_path, mask)
                        mask_counter += 1
                        
                        binary_mask[fill_row_coords, fill_col_coords] = 255
                        
                if mask_counter>0:
                    mask_path = os.path.join(sub_path, '{:03d}_mask_semantic.tif'.format(count))
                    cv2.imwrite(mask_path, binary_mask)
                        
#             class_counter += 1
#         sub_images_counter += 1
#     patient_counter += 1