# Script to generate binary masks of each cell-type
# Created by Ruchika Verma

This code will create separate folder for each patient and subfolders for annotated images under each patient's folder.

Each sub-folder corresponding to sub-images under each patient will contain 4 sub-sub-folders (Epithelial, Lymphocyte, Neutrophil and Macrophage) to save their corresponding binary-masks with value 255 for each cell-type and background 0

# Input
data_path: Specify the path of downloaded images

destination_path = Specify the path to save corresponding binary masks

# Output
MoNuSAC_masks directory in the destination_path

Binary masks will be saved in each sub-sub-folder

Folder -- Patient name

Sub-folder -- Sub-images under each patient

Sub-Sub-folder -- Annotated cell-type on each sub-image


In [1]:
OPENSLIDE_PATH = r'C:\openslides\openslide-win64-20221217\bin'

#Process whole slide images
import os
if hasattr(os, 'add_dll_directory'):
    # Python >= 3.8 on Windows
    with os.add_dll_directory(OPENSLIDE_PATH):
        import openslide
else:
    import openslide
from xml.dom import minidom
import numpy as np
import openslide
from openslide import open_slide  
from glob import glob
import cv2
import matplotlib.pyplot as plt
import scipy.io as sio
from PIL import Image
import scipy
import scipy.ndimage
from shapely.geometry import Polygon
from skimage import draw
import xml.etree.ElementTree as ET

In [2]:
# Read svs files from the desired path
count = 0
data_path = r'C:\Users\vstef\Desktop\MA3\segmentation\MoNuSAC_images_and_annotations' #Path to read data from
destination_path = r'C:\Users\vstef\Desktop\MA3\segmentation' # Path to save binary masks corresponding to xml files
os.chdir(destination_path)

try:
    os.mkdir(destination_path+'\MoNuSAC_masks')
except OSError:
    print ("Creation of the mask directory %s failed" % destination_path)
    
os.chdir(destination_path+'\MoNuSAC_masks')#Create folder named as MoNuSAC_masks
patients = [x[0] for x in os.walk(data_path)]#Total patients in the data_path
len(patients)

22

In [3]:
for patient_loc in patients:
    patient_name = patient_loc[len(data_path)+1:]#Patient name
    print(patient_name)
    
    ## To make patient's name directory in the destination folder
    try:
        os.mkdir(patient_name)
    except OSError:
        print ("\n Creation of the patient's directory %s failed" % patient_name)
        
    ## Read sub-images of each patient in the data path        
    sub_images = glob(patient_loc+'/*.svs')
    for sub_image_loc in sub_images:
        sub_image_name = sub_image_loc[len(data_path)+len(patient_name)+1:-4]        
        print(sub_image_name)
        
        ## To make sub_image directory under the patient's folder
        sub_image = './'+patient_name+'/'+sub_image_name #Destination path
        try:
            os.mkdir(sub_image)
        except OSError:
            print ("\n Creation of the patient's directory %s failed" % sub_image)
            
        image_name = sub_image_loc
        img = openslide.OpenSlide(image_name)
                                  
        # If svs image needs to save in tif
        cv2.imwrite(sub_image_loc[:-4]+'.tif', np.array(img.read_region((0,0),0,img.level_dimensions[0])))      
   
        # Read xml file
        xml_file_name  = image_name[:-4]
        xml_file_name = xml_file_name+'.xml'
        tree = ET.parse(xml_file_name)
        root = tree.getroot()
        
        #Generate binary mask for each cell-type                         
        for k in range(len(root)):
            label = [x.attrib['Name'] for x in root[k][0]]
            label = label[0]
            
            for child in root[k]:
                for x in child:
                    r = x.tag
                    if r == 'Attribute':
                        count = count+1
                        print(count)
                        label = x.attrib['Name']
                        binary_mask = np.transpose(np.zeros((img.read_region((0,0),0,img.level_dimensions[0]).size)))
                        print(label)
                        
                        # Create directory for each label
                        sub_path = sub_image+'/'+label
                        
                        try:
                            os.mkdir(sub_path)
                        except OSError:
                            print ("Creation of the directory %s failed" % label)
                        else:
                            print ("Successfully created the directory %s " % label) 
                                          
                        
                    if r == 'Region':
                        regions = []
                        vertices = x[1]
                        coords = np.zeros((len(vertices), 2))
                        for i, vertex in enumerate(vertices):
                            coords[i][0] = vertex.attrib['X']
                            coords[i][1] = vertex.attrib['Y']        
                        regions.append(coords)
                        poly = Polygon(regions[0])  
                        
                        vertex_row_coords = regions[0][:,0]
                        vertex_col_coords = regions[0][:,1]
                        fill_row_coords, fill_col_coords = draw.polygon(vertex_col_coords, vertex_row_coords, binary_mask.shape)
                        binary_mask[fill_row_coords, fill_col_coords] = 255
                        mask_path = sub_path+'/'+str(count)+'_mask.png'
                        cv2.imwrite(mask_path, binary_mask)
    



 Creation of the patient's directory  failed
TCGA-2Z-A9JG-01Z-00-DX1
\TCGA-2Z-A9JG-01Z-00-DX1_1
1
Epithelial
Successfully created the directory Epithelial 
2
Lymphocyte
Successfully created the directory Lymphocyte 
3
Neutrophil
Successfully created the directory Neutrophil 
4
Macrophage
Successfully created the directory Macrophage 
5
Ambiguous
Successfully created the directory Ambiguous 
\TCGA-2Z-A9JG-01Z-00-DX1_2
6
Epithelial
Successfully created the directory Epithelial 
7
Lymphocyte
Successfully created the directory Lymphocyte 
8
Neutrophil
Successfully created the directory Neutrophil 
9
Macrophage
Successfully created the directory Macrophage 
10
Ambiguous
Successfully created the directory Ambiguous 
\TCGA-2Z-A9JG-01Z-00-DX1_3
11
Epithelial
Successfully created the directory Epithelial 
12
Lymphocyte
Successfully created the directory Lymphocyte 
13
Neutrophil
Successfully created the directory Neutrophil 
14
Macrophage
Successfully created the directory Macrophage 
15
Ambi

121
Epithelial
Successfully created the directory Epithelial 
122
Lymphocyte
Successfully created the directory Lymphocyte 
123
Neutrophil
Successfully created the directory Neutrophil 
124
Macrophage
Successfully created the directory Macrophage 
125
Ambiguous
Successfully created the directory Ambiguous 
TCGA-78-7152-01Z-00-DX1
\TCGA-78-7152-01Z-00-DX1_001
126
Epithelial
Successfully created the directory Epithelial 
127
Lymphocyte
Successfully created the directory Lymphocyte 
128
Neutrophil
Successfully created the directory Neutrophil 
129
Macrophage
Successfully created the directory Macrophage 
130
Ambiguous
Successfully created the directory Ambiguous 
\TCGA-78-7152-01Z-00-DX1_002
131
Epithelial
Successfully created the directory Epithelial 
132
Lymphocyte
Successfully created the directory Lymphocyte 
133
Neutrophil
Successfully created the directory Neutrophil 
134
Macrophage
Successfully created the directory Macrophage 
135
Ambiguous
Successfully created the directory Ambig

246
Epithelial
Successfully created the directory Epithelial 
247
Lymphocyte
Successfully created the directory Lymphocyte 
248
Neutrophil
Successfully created the directory Neutrophil 
249
Macrophage
Successfully created the directory Macrophage 
250
Ambiguous
Successfully created the directory Ambiguous 
TCGA-G9-6367-01Z-00-DX1
\TCGA-G9-6367-01Z-00-DX1_1
251
Epithelial
Successfully created the directory Epithelial 
252
Lymphocyte
Successfully created the directory Lymphocyte 
253
Neutrophil
Successfully created the directory Neutrophil 
254
Macrophage
Successfully created the directory Macrophage 
255
Ambiguous
Successfully created the directory Ambiguous 
\TCGA-G9-6367-01Z-00-DX1_2
256
Epithelial
Successfully created the directory Epithelial 
257
Lymphocyte
Successfully created the directory Lymphocyte 
258
Neutrophil
Successfully created the directory Neutrophil 
259
Macrophage
Successfully created the directory Macrophage 
260
Ambiguous
Successfully created the directory Ambiguous

367
Lymphocyte
Successfully created the directory Lymphocyte 
368
Neutrophil
Successfully created the directory Neutrophil 
369
Macrophage
Successfully created the directory Macrophage 
370
Ambiguous
Successfully created the directory Ambiguous 
TCGA-VP-A87H-01Z-00-DX1
\TCGA-VP-A87H-01Z-00-DX1_1
371
Epithelial
Successfully created the directory Epithelial 
372
Lymphocyte
Successfully created the directory Lymphocyte 
373
Neutrophil
Successfully created the directory Neutrophil 
374
Macrophage
Successfully created the directory Macrophage 
375
Ambiguous
Successfully created the directory Ambiguous 
\TCGA-VP-A87H-01Z-00-DX1_2
376
Epithelial
Successfully created the directory Epithelial 
377
Lymphocyte
Successfully created the directory Lymphocyte 
378
Neutrophil
Successfully created the directory Neutrophil 
379
Macrophage
Successfully created the directory Macrophage 
380
Ambiguous
Successfully created the directory Ambiguous 
\TCGA-VP-A87H-01Z-00-DX1_5
381
Epithelial
Successfully creat