Prerequiste:

install [ASAP](https://github.com/computationalpathologygroup/ASAP/releases)

install openslide

In [2]:
import sys
sys.path.append("/opt/ASAP/bin")
import os
import glob
import multiresolutionimageinterface as mir
from joblib import Parallel, delayed

DATASET_DIR = "/media/boxi/Dataset/CAMELYON16"

# preprocess tumor mask for training

In [None]:
def process_mask(slide_no, overwrite=False):
    slide_no_str = '{:03}'.format(slide_no)
    slide_path = DATASET_DIR+"/training/tumor/tumor_"+slide_no_str+".tif"
    slide_annotation_path = DATASET_DIR+"/training/tumor/tumor_"+slide_no_str+".xml"
    output_path = DATASET_DIR+"/training/tumor/mask/tumor_"+slide_no_str+"_mask.tif"

    if not os.path.isfile(output_path) or (overwrite):

        reader = mir.MultiResolutionImageReader()
        mr_image = reader.open(slide_path)
        annotation_list = mir.AnnotationList()
        xml_repository = mir.XmlRepository(annotation_list)
        xml_repository.setSource(slide_annotation_path)
        xml_repository.load()
        annotation_mask = mir.AnnotationToMask()
        camelyon17_type_mask = False
        label_map = {'metastases': 1, 'normal': 2} if camelyon17_type_mask else {
            '_0': 1, '_1': 1, '_2': 0}
        conversion_order = ['metastases', 'normal'] if camelyon17_type_mask else [
            '_0', '_1', '_2']
        print("process #"+slide_no_str)
        annotation_mask.convert(annotation_list, output_path, mr_image.getDimensions(
        ), mr_image.getSpacing(), label_map, conversion_order)
    return slide_no

In [None]:
Parallel(n_jobs=60)(delayed(process_mask)(i) for i in range(1,112))

# preprocess tumor mask for testing

In [None]:
TEST_DIR = DATASET_DIR+'/testing/'
slide_dir = TEST_DIR+'images/'
slide_annotation_dir = TEST_DIR+'lesion_annotations/'
output_dir = TEST_DIR+'mask/'

In [26]:
annotation_paths = glob.glob(slide_annotation_dir+'*')
tumor_slide_basenames = sorted([os.path.basename(annotation_path).split(
    '.')[0] for annotation_path in annotation_paths])

In [43]:
slide_basename = tumor_slide_basenames[1]

slide_path = slide_dir+slide_basename + '.tif'
slide_path

'/media/boxi/V2/test/images/test_002.tif'

In [44]:
slide_annotation_path = slide_annotation_dir+slide_basename + '.xml'
slide_annotation_path

'/media/boxi/V2/test/lesion_annotations/test_002.xml'

In [45]:
output_path = output_dir+slide_basename + '.tif'
output_path

'/media/boxi/V2/test/mask/test_002.tif'

In [None]:
def process_mask(slide_basename, overwrite=False):
    slide_path = slide_dir+slide_basename + '.tif'
    slide_annotation_path = slide_annotation_dir+slide_basename + '.xml'
    output_path = output_dir+slide_basename + '.tif'

    if not os.path.isfile(output_path) or (overwrite):

        reader = mir.MultiResolutionImageReader()
        mr_image = reader.open(slide_path)
        annotation_list = mir.AnnotationList()
        xml_repository = mir.XmlRepository(annotation_list)
        xml_repository.setSource(slide_annotation_path)
        xml_repository.load()
        annotation_mask = mir.AnnotationToMask()
        camelyon17_type_mask = False
        label_map = {'metastases': 1, 'normal': 2} if camelyon17_type_mask else {
            '_0': 1, '_1': 1, '_2': 0}
        conversion_order = ['metastases', 'normal'] if camelyon17_type_mask else [
            '_0', '_1', '_2']
        print("process #"+slide_basename)
        annotation_mask.convert(annotation_list, output_path, mr_image.getDimensions(
        ), mr_image.getSpacing(), label_map, conversion_order)
    return slide_basename

Parallel(n_jobs=60)(delayed(process_mask)(slide_basename) for slide_basename in tumor_slide_basenames)