# Extracts the tumor image-patches dataset

In [None]:
from camelyon16_dataloader import *

magnification=0.625
scale = get_scale_by_magnification(magnification)
tile_size=20
tile_size_original = int(scale * tile_size)

cls = "tumor"
dataset_types = ["training", "testing"]
for dataset_type in dataset_types:
    
    camelyon16_dir = "../../datasets/CAMELYON16"
    annotations_dir = "{}/{}/lesion_annotations".format(camelyon16_dir, dataset_type)
    wsi_images_dir = "{}/{}/{}/wsi".format(camelyon16_dir, dataset_type, cls)
    patch_images_dir = "{}/{}/{}/patch/640x640".format(camelyon16_dir, dataset_type, cls)

    for r, d, f in sorted(os.walk(wsi_images_dir)):
        for wsi_file in sorted(f):

            wsi_image_file = "{}/{}".format(r, wsi_file)
            wsi_image_number = wsi_file.replace(".tif", "")

            file_is_tif = wsi_image_file.lower().endswith('.tif')
            if file_is_tif:

                logger.info("Processing wsi '{}'".format(wsi_file))

                # check directory to save image-patches
                dir_to_save = "{}/{}".format(patch_images_dir, wsi_image_number)
                if not os.path.exists(dir_to_save):
                    os.makedirs("{}/01-roi/01-original".format(dir_to_save))
                    os.makedirs("{}/01-roi/02-mask".format(dir_to_save))
                    os.makedirs("{}/01-roi/03-roi".format(dir_to_save))

                # tumor annotations file
                annotation_file = "{}/{}.xml".format(annotations_dir, wsi_image_number)
                cont_tumor_regions, contours = find_annotation_contours(annotation_file)

                # tumor tissue region
                np_scaled_down_image, np_regions_label, np_tumor_mask, np_tumor_masked = extract_tumor_region_from_wsi(contours, wsi_image_file, magnification)
                pil_scaled_down_image = np_to_pil(np_scaled_down_image)
                pil_tumor_mask = np_to_pil(np_tumor_mask)
                pil_tumor_masked = np_to_pil(np_tumor_masked)

                # normal tissue region 
                np_normal_mask, np_normal_masked = extract_normal_region_from_wsi(wsi_image_file, np_scaled_down_image, np_tumor_mask)
                pil_normal_mask = np_to_pil(np_normal_mask)
                pil_normal_masked = np_to_pil(np_normal_masked)
                pil_background = np_to_pil(np_normal_mask | np_tumor_mask)

                blend_tumor = blend_image(pil_scaled_down_image, pil_tumor_mask, foreground='red', inverse=True)
                blend_tissue = blend_image(blend_tumor, pil_normal_mask, foreground='green', inverse=True)
                blend_background = blend_image(blend_tissue, pil_background, foreground='blue', inverse=False)

                pil_scaled_down_image.save('{}/{}_1.png'.format(dir_to_save, wsi_image_number))
                blend_background.save('{}/{}_2.png'.format(dir_to_save, wsi_image_number))
                pil_normal_masked.save('{}/{}_3.png'.format(dir_to_save, wsi_image_number))
                pil_tumor_masked.save('{}/{}_4.png'.format(dir_to_save, wsi_image_number))

                #heat grid normal
                pil_img_normal_result, heat_grid_normal, number_of_tiles_normal = draw_heat_grid(np_normal_masked, tile_size)
                pil_img_normal_result.save('{}/{}_5.png'.format(dir_to_save, wsi_image_number))

                # extract and save normal image-patches
                count_tiles_normal = 0
                for idx, (position, row, column, location, size, color) in enumerate(heat_grid_normal):
                    if color != GREEN_COLOR:                    
                        count_tiles_normal += 1
                    ##############

                # heat grid tumor
                pil_img_result, heat_grid, number_of_tiles = draw_heat_grid(np_tumor_masked, tile_size)
                pil_img_result.save('{}/{}_6.png'.format(dir_to_save, wsi_image_number))

                # extract and save tumor imag-patches
                count_tiles_tumor = 0
                for idx, (position, row, column, location, size, color) in enumerate(heat_grid):

                    if color != GREEN_COLOR:  # color == RED_COLOR or color == ORANGE_COLOR:

                        r_s = row * tile_size
                        r_e = r_s + tile_size
                        c_s = column * tile_size
                        c_e = c_s + tile_size
                        np_tile_masked = np_tumor_masked[r_s:r_e, c_s:c_e]
                        np_tile_mask = np_tumor_mask[r_s:r_e, c_s:c_e]

                        # only tile with valid size
                        if np_tile_masked.shape[0] == tile_size and np_tile_masked.shape[1] == tile_size:

                            tile_pil, tile_np = read_region(wsi_image_file, column, row, magnification, tile_size)
                            left = (column * tile_size_original)
                            top = (row * tile_size_original)

                            pil_mask = np_to_pil(np.zeros((tile_np.shape[0], tile_np.shape[1]), dtype=np.uint8))
                            draw = ImageDraw.Draw(pil_mask)
                            for idx, (region_name, annotation_type, group, color, points) in enumerate(contours):
                                if group != "_2" and group != "Exclusion" and len(points) > 1:
                                    points_scaled = [(p[0]-left, p[1]-top) for p in points]
                                    draw.polygon(points_scaled, outline=None, fill=1)

                            for idx, (region_name, annotation_type, group, color, points) in enumerate(contours):
                                if group == "_2" or group == "Exclusion" and len(points) > 1:
                                    points_scaled = [(p[0]-left, p[1]-top) for p in points]
                                    draw.polygon(points_scaled, outline=None, fill=0)

                            np_tile_mask = pil_to_np(pil_mask).astype(bool)
                            np_tile_masked = mask_rgb(tile_np, np_tile_mask)

                            pil_tile_roi = blend_image(np_to_pil(tile_np), np_to_pil(np_tile_mask), foreground='red', inverse=True)
                            #pil_tile_roi = blend_image(pil_tile_roi, np_to_pil(np_tile_mask), foreground='green', inverse=False)

                            # save the extracted tumor image-patch
                            tile_pil.save('{}/01-roi/{}/{}_r{}c{}.png'.format(dir_to_save, "01-original", wsi_image_number, row, column))
                            np_to_pil(np_tile_mask).save('{}/01-roi/{}/{}_r{}c{}.png'.format(dir_to_save, "02-mask", wsi_image_number, row, column))
                            pil_tile_roi.save('{}/01-roi/{}/{}_r{}c{}.png'.format(dir_to_save, "03-roi", wsi_image_number, row, column))

                            count_tiles_tumor += 1

                logger.info("\t {} tumor regions.".format(cont_tumor_regions))
                logger.info("\t {} tumor regions.".format(cont_tumor_regions))
                logger.info("\t {} patches of 640x640 size.".format(len(heat_grid_normal)))
                logger.info("\t\t {} patches of 640x640 (normal tissue).".format(count_tiles_normal))
                logger.info("\t\t {} patches of 640x640 (tumor).".format(count_tiles_tumor))
                logger.info("-")

# Extracts the normal image-patches dataset

In [None]:
from camelyon16_dataloader import *

magnification=0.625
scale = get_scale_by_magnification(magnification)
tile_size=20
tile_size_original = int(scale * tile_size)


qtd_core_patches_per_img = int((43012/81)/2)
qtd_border_patches_per_img = int((43012/81)/2)

sobra_core = 0
sobra_border = 0
count_total_border = 0
count_total_tissue = 0

cls = "normal"
dataset_types = ["training", "testing"]
for dataset_type in dataset_types:
    
    camelyon16_dir = "../../datasets/CAMELYON16"
    wsi_images_dir = "{}/{}/{}/wsi".format(camelyon16_dir, dataset_type, cls)
    patch_images_dir = "{}/{}/{}/patch/640x640".format(camelyon16_dir, dataset_type, cls)

    for r, d, f in sorted(os.walk(wsi_images_dir)):
        for wsi_file in sorted(f):

            wsi_image_file = "{}/{}".format(r, wsi_file)
            wsi_image_number = wsi_file.replace(".tif", "")

            file_is_tif = wsi_image_file.lower().endswith('.tif')
            if file_is_tif:

                logger.info("Processing wsi '{}'".format(wsi_file))

                # check directory to save tile images
                dir_to_save = "{}/{}".format(patch_images_dir, wsi_image_number)
                if not os.path.exists(dir_to_save):
                    os.makedirs("{}/02-non_roi/01-original".format(dir_to_save))
                    os.makedirs("{}/02-non_roi/02-mask".format(dir_to_save))


                # scale down image
                pil_scaled_down_image, scale = scale_down_camelyon16_img(wsi_image_file, magnification)
                np_scaled_down_image = pil_to_np(pil_scaled_down_image)

                # normal tissue region 
                np_normal_mask, np_normal_masked = extract_normal_region_from_wsi(wsi_image_file, np_scaled_down_image, None)
                pil_normal_mask = np_to_pil(np_normal_mask)
                pil_normal_masked = np_to_pil(np_normal_masked)

                blend_tissue = blend_image(pil_scaled_down_image, pil_normal_mask, foreground='green', inverse=True)
                blend_tissue = blend_image(blend_tissue, pil_normal_mask, foreground='blue', inverse=False)

                pil_scaled_down_image.save('{}/{}_1.png'.format(dir_to_save, wsi_image_number))
                blend_tissue.save('{}/{}_2.png'.format(dir_to_save, wsi_image_number))
                pil_normal_masked.save('{}/{}_3.png'.format(dir_to_save, wsi_image_number))

                #heat grid normal
                pil_img_normal_result, heat_grid_normal, number_of_tiles_normal = draw_heat_grid(np_normal_masked, tile_size)
                pil_img_normal_result.save('{}/{}_4.png'.format(dir_to_save, wsi_image_number))

                tiles_core_tissue = []
                tiles_border_tissue = []
                tiles_background = []
                for idx, (position, row, column, location, size, color) in enumerate(heat_grid_normal):

                    tile = (position, row, column, location, size, color)

                    if color == GREEN_COLOR:
                        tiles_background.append(tile)

                    elif color == YELLOW_COLOR or color == ORANGE_COLOR:
                        tiles_border_tissue.append(tile)

                    elif color == RED_COLOR:
                        tiles_core_tissue.append(tile)

                logger.info("\t {} patches of 640x640 size.".format(len(heat_grid_normal)))
                logger.info("\t\t {} patches of 640x640 (background/normal tissue).".format(len(tiles_border_tissue)))
                logger.info("\t\t {} patches of 640x640 (core tissue).".format(len(tiles_core_tissue)))
                logger.info("\t\t {} patches of 640x640 (background).".format(len(tiles_background)))

                # extract and save border tiles
                tiles_border_len = (qtd_border_patches_per_img + sobra_border) if len(tiles_border_tissue) >= (qtd_border_patches_per_img + sobra_border) else len(tiles_border_tissue)
                sobra_border = sobra_border + (qtd_border_patches_per_img -tiles_border_len)
                sorted_tiles_idx = random.sample(range(len(tiles_border_tissue)),  tiles_border_len)
                for idx in sorted_tiles_idx:

                    (position, row, column, location, size, color) = tiles_border_tissue[idx]
                    r_s = row * tile_size
                    r_e = r_s + tile_size
                    c_s = column * tile_size
                    c_e = c_s + tile_size
                    np_tile_masked = np_normal_masked[r_s:r_e, c_s:c_e]
                    np_tile_mask = np_normal_mask[r_s:r_e, c_s:c_e]

                    # only tile with valid size
                    if np_tile_masked.shape[0] == tile_size and np_tile_masked.shape[1] == tile_size:

                        tile_pil, tile_np = read_region(wsi_image_file, column, row, magnification, tile_size)
                        left = (column * tile_size_original)
                        top = (row * tile_size_original)

                        pil_mask = np_to_pil(np.zeros((tile_np.shape[0], tile_np.shape[1]), dtype=np.uint8))
                        np_tile_mask = pil_to_np(pil_mask).astype(bool)

                        # save the extracted normal tile
                        tile_pil.save('{}/02-non_roi/{}/{}_r{}c{}.png'.format(dir_to_save, "01-original", wsi_image_number, row, column))
                        np_to_pil(np_tile_mask).save('{}/02-non_roi/{}/{}_r{}c{}.png'.format(dir_to_save, "02-mask", wsi_image_number, row, column))


                # extract and save core tiles   
                tiles_core_len = (qtd_border_patches_per_img + sobra_core) if len(tiles_core_tissue) >= (qtd_border_patches_per_img + sobra_core) else len(tiles_core_tissue)
                sobra_core = sobra_core + (qtd_border_patches_per_img -tiles_core_len)
                sorted_tiles_idx = random.sample(range(len(tiles_core_tissue)),  tiles_core_len)
                for idx in sorted_tiles_idx:

                    (position, row, column, location, size, color) = tiles_core_tissue[idx]
                    r_s = row * tile_size
                    r_e = r_s + tile_size
                    c_s = column * tile_size
                    c_e = c_s + tile_size
                    np_tile_masked = np_normal_masked[r_s:r_e, c_s:c_e]
                    np_tile_mask = np_normal_mask[r_s:r_e, c_s:c_e]

                    # only tile with valid size
                    if np_tile_masked.shape[0] == tile_size and np_tile_masked.shape[1] == tile_size:

                        tile_pil, tile_np = read_region(wsi_image_file, column, row, magnification, tile_size)
                        left = (column * tile_size_original)
                        top = (row * tile_size_original)

                        pil_mask = np_to_pil(np.zeros((tile_np.shape[0], tile_np.shape[1]), dtype=np.uint8))
                        np_tile_mask = pil_to_np(pil_mask).astype(bool)

                        # save the extracted normal tile
                        tile_pil.save('{}/02-non_roi/{}/{}_r{}c{}.png'.format(dir_to_save, "01-original", wsi_image_number, row, column))
                        np_to_pil(np_tile_mask).save('{}/02-non_roi/{}/{}_r{}c{}.png'.format(dir_to_save, "02-mask", wsi_image_number, row, column))

                count_total_border += tiles_border_len
                count_total_tissue += (tiles_border_len + tiles_core_len)

logger.info("-")            
logger.info("{} image-patches of background/normal tissue region.".format(count_total_border))
logger.info("{} image-patches of normal tissue region.".format(count_total_tissue))

# Extracts the evaluation masks

In [5]:
from camelyon16_dataloader import *

magnification=0.625
scale = get_scale_by_magnification(magnification)
tile_size = 20
tile_size_original = int(scale * tile_size)

dataset_type = "testing"
camelyon16_dir = "../../datasets/CAMELYON16"
annotations_dir = "{}/{}/lesion_annotations".format(camelyon16_dir, dataset_type)
evaluation_masks_dir = "{}/{}/evaluation_masks".format(camelyon16_dir, dataset_type)
    
classes = ["normal", "tumor"]
for cls in classes:
    
    wsi_images_dir = "{}/{}/{}/wsi".format(camelyon16_dir, dataset_type, cls)
    for r, d, f in sorted(os.walk(wsi_images_dir)):
        for wsi_file in sorted(f):
        
            wsi_image_file = "{}/{}".format(r, wsi_file)
            wsi_image_number = wsi_file.replace(".tif", "")
            file_is_tif = wsi_image_file.lower().endswith('.tif')
            
            if file_is_tif and int(wsi_image_number.split("_")[1]) != 114:
            
                logger.info("Processing wsi '{}'".format(wsi_file))
                if cls == "tumor":

                    annotation_file = "{}/{}.xml".format(annotations_dir, wsi_image_number)
                    cont_tumor_regions, contours = find_annotation_contours(annotation_file)

                    np_scaled_down_image, np_regions_label, np_tumor_mask, np_tumor_masked = extract_tumor_region_from_wsi(contours, wsi_image_file, magnification)
                    cv2.imwrite('{}/{}_evaluation_mask.png'.format(evaluation_masks_dir, wsi_image_number), np_regions_label)

                else:

                    pil_scaled_down_image, scale = scale_down_camelyon16_img(wsi_image_file, magnification)
                    np_to_pil(np.zeros_like(pil_scaled_down_image)).save('{}/{}_evaluation_mask.png'.format(evaluation_masks_dir, wsi_image_number))


2021-01-05 21:58:40,984 :: INFO <module> :: Processing wsi 'test_003.tif'
2021-01-05 21:58:42,976 :: INFO <module> :: Processing wsi 'test_001.tif'
2021-01-05 21:59:05,190 :: INFO extract_tumor_region_from_wsi :: 	 Extracting tumor regions from wsi image: 'test_001.tif'
