# Extracts the image-patches dataset

In [14]:
from orca_dataloader import *

magnification=0.625
scale = get_scale_by_magnification(magnification)
tile_size=20
tile_size_original = int(scale * tile_size)


cls = "tumor"
dataset_types = ["training", "testing"]
for dataset_type in sorted(dataset_types):
    
    logger.info("{} images".format(dataset_type))
    
    orca_dir = "../../datasets/ORCA"
    annotations_dir = "{}/{}/lesion_annotations".format(orca_dir, dataset_type)
    wsi_images_dir = "{}/{}/{}/wsi".format(orca_dir, dataset_type, cls)
    patch_images_dir = "{}/{}/{}/patch/640x640".format(orca_dir, dataset_type, cls)
    
    for r, d, f in sorted(os.walk(wsi_images_dir)):
        for wsi_file in sorted(f):
            
            wsi_image_file = "{}/{}".format(r, wsi_file)
            wsi_image_number = wsi_file.replace(".png", "")
            
            file_is_png = wsi_image_file.lower().endswith('.png')
            if file_is_png:

                logger.info("Processing tma '{}'".format(wsi_file))
                
                # check directory to save image-patches
                dir_to_save = "{}/{}".format(patch_images_dir, wsi_image_number)
                if not os.path.exists(dir_to_save):
                    os.makedirs("{}/01-roi/01-original".format(dir_to_save))
                    os.makedirs("{}/01-roi/02-mask".format(dir_to_save))
                    os.makedirs("{}/01-roi/03-roi".format(dir_to_save))             
                    os.makedirs("{}/02-non_roi/01-original".format(dir_to_save))
                    os.makedirs("{}/02-non_roi/02-mask".format(dir_to_save))
                
                # tumor annotations mask
                annotation_file = "{}/{}_mask.png".format(annotations_dir, wsi_image_number)
                wsi_mask_pil = load_pil_image(annotation_file, gray=True)
                
                # tumor tissue region
                wsi_image = open_wsi(wsi_image_file)
                max_w, max_h = wsi_image.dimensions
                wsi_image_pil = load_pil_image(wsi_image_file, gray=False)
                pil_scaled_down_image = scale_down_wsi(wsi_image, magnification, False)
                
                np_tumor_mask = np.zeros((wsi_mask_pil.size[0], wsi_mask_pil.size[1]), dtype=bool)
                np_tumor_mask[pil_to_np(wsi_mask_pil) == 255] = True
                pil_tumor_mask = np_to_pil(np_tumor_mask)
                
                np_normal_mask = np.zeros((wsi_mask_pil.size[0], wsi_mask_pil.size[1]), dtype=bool)
                np_normal_mask[pil_to_np(wsi_mask_pil) < 255] = True
                np_normal_mask[pil_to_np(wsi_mask_pil) == 0] = False
                pil_normal_mask = np_to_pil(np_normal_mask)
                
                pil_background = np_to_pil(np_normal_mask | np_tumor_mask)

                wsi_image_np = pil_to_np(wsi_image_pil)
                np_tumor_masked = mask_rgb(wsi_image_np, np_tumor_mask)
                np_tumor_masked = image_resize(np_tumor_masked, height=pil_scaled_down_image.size[1])
                pil_tumor_masked = np_to_pil(np_tumor_masked)
                
                np_normal_masked = mask_rgb(wsi_image_np, np_normal_mask)
                np_normal_masked = image_resize(np_normal_masked, height=pil_scaled_down_image.size[1])
                pil_normal_masked = np_to_pil(np_normal_masked)

                blend_tumor = blend_image(wsi_image_pil, pil_tumor_mask, foreground='red', inverse=True)
                blend_tissue = blend_image(blend_tumor, pil_normal_mask, foreground='green', inverse=True)
                blend_background = blend_image(blend_tissue, pil_background, foreground='blue', inverse=False).resize(pil_tumor_masked.size, Image.ANTIALIAS)

                pil_scaled_down_image.save('{}/{}_1.png'.format(dir_to_save, wsi_image_number))
                blend_background.save('{}/{}_2.png'.format(dir_to_save, wsi_image_number))
                pil_normal_masked.save('{}/{}_3.png'.format(dir_to_save, wsi_image_number))
                pil_tumor_masked.save('{}/{}_4.png'.format(dir_to_save, wsi_image_number))
                
                #heat grid normal
                pil_img_normal_result, heat_grid_normal, number_of_tiles_normal = draw_heat_grid(np_normal_masked, tile_size)
                pil_img_normal_result.save('{}/{}_5.png'.format(dir_to_save, wsi_image_number))

                # heat grid tumor
                pil_img_result, heat_grid, number_of_tiles = draw_heat_grid(np_tumor_masked, tile_size)
                pil_img_result.save('{}/{}_6.png'.format(dir_to_save, wsi_image_number))
                
                used_patches = set()                
                tiles_tumor_tissue = []
                tiles_normal_tissue = []
                tiles_background = []
                for idx, (position, row, column, location, size, color) in enumerate(heat_grid):

                    tile = (position, row, column, location, size, color)                    
                    if color == YELLOW_COLOR or color == ORANGE_COLOR or color == RED_COLOR:
                        tiles_tumor_tissue.append(tile)
                        used_patches.add("r{}c{}".format(row, column))
                
                for idx, (position, row, column, location, size, color) in enumerate(heat_grid_normal):

                    name = "r{}c{}".format(row, column)
                    tile = (position, row, column, location, size, color)
                    if name not in used_patches:
                        if color == GREEN_COLOR:
                            tiles_background.append(tile)
                        else:
                            tiles_normal_tissue.append(tile)
                
                logger.info("\t {} patches of 640x640 size.".format(len(heat_grid_normal)))
                logger.info("\t\t {} patches of 640x640 (normal tissue).".format(len(tiles_normal_tissue)))
                logger.info("\t\t {} patches of 640x640 (tumor tissue).".format(len(tiles_tumor_tissue)))
                logger.info("\t\t {} patches of 640x640 (background).".format(len(tiles_background)))
                
                
                # extract and save normal patches
                for (position, row, column, location, size, color) in tiles_normal_tissue:
                    
                    r_s = row * tile_size_original
                    r_e = r_s + (tile_size_original if (r_s + tile_size_original) <= max_w else (max_w - r_s))
                    c_s = column * tile_size_original
                    c_e = c_s + (tile_size_original if (c_s + tile_size_original) <= max_h else (max_h - c_s))
                    
                    np_tile_mask = np.zeros((tile_size_original, tile_size_original), dtype=bool)
                    np_tile_mask[0:(r_e-r_s), 0:(c_e-c_s)] = np_tumor_mask[r_s:r_e, c_s:c_e]
                    
                    # only tile with valid size
                    #if np_tile_mask.shape[0] == tile_size_original and np_tile_mask.shape[1] == tile_size_original:

                    tile_pil, tile_np = read_region(wsi_image_file, column, row, magnification, tile_size)
                    left = (column * tile_size_original)
                    top = (row * tile_size_original)

                    pil_mask = np_to_pil(np.zeros((tile_np.shape[0], tile_np.shape[1]), dtype=np.uint8))
                    np_tile_mask = pil_to_np(pil_mask).astype(bool)

                    # save the extracted normal tile
                    tile_pil.save('{}/02-non_roi/{}/{}_r{}c{}.png'.format(dir_to_save, "01-original", wsi_image_number, row, column))
                    np_to_pil(np_tile_mask).save('{}/02-non_roi/{}/{}_r{}c{}.png'.format(dir_to_save, "02-mask", wsi_image_number, row, column))

                # extract and save tumor patches
                for (position, row, column, location, size, color) in tiles_tumor_tissue:
                    
                    r_s = row * tile_size_original
                    r_e = r_s + (tile_size_original if (r_s + tile_size_original) <= max_w else (max_w - r_s))
                    c_s = column * tile_size_original
                    c_e = c_s + (tile_size_original if (c_s + tile_size_original) <= max_h else (max_h - c_s))
                    
                    np_tile_mask = np.zeros((tile_size_original, tile_size_original), dtype=bool)
                    np_tile_mask[0:(r_e-r_s), 0:(c_e-c_s)] = np_tumor_mask[r_s:r_e, c_s:c_e]
                    
                    # only tile with valid size
                    #if np_tile_mask.shape[0] == tile_size_original and np_tile_mask.shape[1] == tile_size_original:

                    tile_pil, tile_np = read_region(wsi_image_file, column, row, magnification, tile_size)
                    left = (column * tile_size_original)
                    top = (row * tile_size_original)
                    
                    pil_tile_roi = blend_image(np_to_pil(tile_np), np_to_pil(np_tile_mask), foreground='blue', inverse=True)
                    #pil_tile_roi = blend_image(pil_tile_roi, np_to_pil(np_tile_mask), foreground='green', inverse=False)

                     # save the extracted tumor image-patch
                    tile_pil.save('{}/01-roi/{}/{}_r{}c{}.png'.format(dir_to_save, "01-original", wsi_image_number, row, column))
                    np_to_pil(np_tile_mask).save('{}/01-roi/{}/{}_r{}c{}.png'.format(dir_to_save, "02-mask", wsi_image_number, row, column))
                    pil_tile_roi.save('{}/01-roi/{}/{}_r{}c{}.png'.format(dir_to_save, "03-roi", wsi_image_number, row, column))
                

2021-02-01 15:04:37,153 :: INFO <module> :: testing images
2021-02-01 15:04:37,155 :: INFO <module> :: Processing wsi 'TCGA-CN-4723-01Z-00-DX1.13483e7b-9322-4d39-8cd6-91e898bf2ee9_0.png'
2021-02-01 15:04:41,840 :: INFO <module> :: 	 49 patches of 640x640 size.
2021-02-01 15:04:41,841 :: INFO <module> :: 		 18 patches of 640x640 (normal tissue).
2021-02-01 15:04:41,843 :: INFO <module> :: 		 27 patches of 640x640 (tumor tissue).
2021-02-01 15:04:41,844 :: INFO <module> :: 		 4 patches of 640x640 (background).
2021-02-01 15:05:19,755 :: INFO <module> :: Processing wsi 'TCGA-CN-4723-01Z-00-DX1.13483e7b-9322-4d39-8cd6-91e898bf2ee9_1.png'
2021-02-01 15:05:25,051 :: INFO <module> :: 	 49 patches of 640x640 size.
2021-02-01 15:05:25,053 :: INFO <module> :: 		 3 patches of 640x640 (normal tissue).
2021-02-01 15:05:25,054 :: INFO <module> :: 		 42 patches of 640x640 (tumor tissue).
2021-02-01 15:05:25,055 :: INFO <module> :: 		 4 patches of 640x640 (background).
2021-02-01 15:06:09,541 :: INFO 

2021-02-01 15:17:22,056 :: INFO <module> :: Processing wsi 'TCGA-CN-4734-01Z-00-DX1.9cd35e43-7132-478a-9d08-a88d150b7127_0.png'
2021-02-01 15:17:26,088 :: INFO <module> :: 	 49 patches of 640x640 size.
2021-02-01 15:17:26,089 :: INFO <module> :: 		 1 patches of 640x640 (normal tissue).
2021-02-01 15:17:26,090 :: INFO <module> :: 		 44 patches of 640x640 (tumor tissue).
2021-02-01 15:17:26,091 :: INFO <module> :: 		 4 patches of 640x640 (background).
2021-02-01 15:18:03,776 :: INFO <module> :: Processing wsi 'TCGA-CN-4734-01Z-00-DX1.9cd35e43-7132-478a-9d08-a88d150b7127_1.png'
2021-02-01 15:18:07,980 :: INFO <module> :: 	 49 patches of 640x640 size.
2021-02-01 15:18:07,981 :: INFO <module> :: 		 4 patches of 640x640 (normal tissue).
2021-02-01 15:18:07,982 :: INFO <module> :: 		 41 patches of 640x640 (tumor tissue).
2021-02-01 15:18:07,983 :: INFO <module> :: 		 4 patches of 640x640 (background).
2021-02-01 15:18:47,379 :: INFO <module> :: Processing wsi 'TCGA-CN-4735-01Z-00-DX1.b7d37666

2021-02-01 15:30:02,067 :: INFO <module> :: 	 49 patches of 640x640 size.
2021-02-01 15:30:02,068 :: INFO <module> :: 		 8 patches of 640x640 (normal tissue).
2021-02-01 15:30:02,069 :: INFO <module> :: 		 37 patches of 640x640 (tumor tissue).
2021-02-01 15:30:02,070 :: INFO <module> :: 		 4 patches of 640x640 (background).
2021-02-01 15:30:39,406 :: INFO <module> :: Processing wsi 'TCGA-CN-5355-01Z-00-DX1.8fb23e44-fc59-4191-b825-cc021807a338_1.png'
2021-02-01 15:30:43,039 :: INFO <module> :: 	 49 patches of 640x640 size.
2021-02-01 15:30:43,041 :: INFO <module> :: 		 10 patches of 640x640 (normal tissue).
2021-02-01 15:30:43,042 :: INFO <module> :: 		 35 patches of 640x640 (tumor tissue).
2021-02-01 15:30:43,044 :: INFO <module> :: 		 4 patches of 640x640 (background).
2021-02-01 15:31:19,249 :: INFO <module> :: Processing wsi 'TCGA-CN-5356-01Z-00-DX1.53360c41-d0d2-465e-8b53-ccd1a2ce248f_0.png'
2021-02-01 15:31:23,170 :: INFO <module> :: 	 49 patches of 640x640 size.
2021-02-01 15:31:

2021-02-01 15:42:25,302 :: INFO <module> :: 		 7 patches of 640x640 (normal tissue).
2021-02-01 15:42:25,303 :: INFO <module> :: 		 38 patches of 640x640 (tumor tissue).
2021-02-01 15:42:25,303 :: INFO <module> :: 		 4 patches of 640x640 (background).
2021-02-01 15:43:01,243 :: INFO <module> :: Processing wsi 'TCGA-CN-5366-01Z-00-DX1.dcc14c44-6dd1-48a1-a21c-736c6b9551e1_1.png'
2021-02-01 15:43:06,264 :: INFO <module> :: 	 49 patches of 640x640 size.
2021-02-01 15:43:06,265 :: INFO <module> :: 		 18 patches of 640x640 (normal tissue).
2021-02-01 15:43:06,265 :: INFO <module> :: 		 27 patches of 640x640 (tumor tissue).
2021-02-01 15:43:06,266 :: INFO <module> :: 		 4 patches of 640x640 (background).
2021-02-01 15:43:40,843 :: INFO <module> :: Processing wsi 'TCGA-CN-5367-01Z-00-DX1.5b09e54e-4140-4709-bc60-e201f9a72b24_0.png'
2021-02-01 15:43:44,465 :: INFO <module> :: 	 49 patches of 640x640 size.
2021-02-01 15:43:44,466 :: INFO <module> :: 		 11 patches of 640x640 (normal tissue).
2021-

2021-02-01 15:55:47,741 :: INFO <module> :: 		 28 patches of 640x640 (tumor tissue).
2021-02-01 15:55:47,742 :: INFO <module> :: 		 4 patches of 640x640 (background).
2021-02-01 15:56:29,983 :: INFO <module> :: Processing wsi 'TCGA-CN-6017-01Z-00-DX1.690fa1af-4b51-41c8-a52d-bd1341f990e9_1.png'
2021-02-01 15:56:34,496 :: INFO <module> :: 	 49 patches of 640x640 size.
2021-02-01 15:56:34,497 :: INFO <module> :: 		 13 patches of 640x640 (normal tissue).
2021-02-01 15:56:34,498 :: INFO <module> :: 		 32 patches of 640x640 (tumor tissue).
2021-02-01 15:56:34,498 :: INFO <module> :: 		 4 patches of 640x640 (background).
2021-02-01 15:57:12,271 :: INFO <module> :: Processing wsi 'TCGA-CN-6018-01Z-00-DX1.77855643-92a9-40a7-b4a8-0e32b89fc597_0.png'
2021-02-01 15:57:16,159 :: INFO <module> :: 	 49 patches of 640x640 size.
2021-02-01 15:57:16,160 :: INFO <module> :: 		 19 patches of 640x640 (normal tissue).
2021-02-01 15:57:16,163 :: INFO <module> :: 		 26 patches of 640x640 (tumor tissue).
2021-

2021-02-01 16:10:06,693 :: INFO <module> :: 		 4 patches of 640x640 (background).
2021-02-01 16:11:08,459 :: INFO <module> :: Processing wsi 'TCGA-CN-6989-01Z-00-DX1.dd68e391-1d00-4bfc-9be9-ff5024bea237_1.png'
2021-02-01 16:11:12,205 :: INFO <module> :: 	 49 patches of 640x640 size.
2021-02-01 16:11:12,206 :: INFO <module> :: 		 8 patches of 640x640 (normal tissue).
2021-02-01 16:11:12,207 :: INFO <module> :: 		 37 patches of 640x640 (tumor tissue).
2021-02-01 16:11:12,208 :: INFO <module> :: 		 4 patches of 640x640 (background).
2021-02-01 16:11:52,368 :: INFO <module> :: Processing wsi 'TCGA-CN-6994-01Z-00-DX1.0aa71a4c-25e5-417d-86cf-ce2756e3dd8a_0.png'
2021-02-01 16:11:57,045 :: INFO <module> :: 	 49 patches of 640x640 size.
2021-02-01 16:11:57,047 :: INFO <module> :: 		 11 patches of 640x640 (normal tissue).
2021-02-01 16:11:57,048 :: INFO <module> :: 		 34 patches of 640x640 (tumor tissue).
2021-02-01 16:11:57,049 :: INFO <module> :: 		 4 patches of 640x640 (background).
2021-02-0

2021-02-01 16:24:19,260 :: INFO <module> :: 		 4 patches of 640x640 (background).
2021-02-01 16:25:02,381 :: INFO <module> :: Processing wsi 'TCGA-BA-4078-01Z-00-DX1.B190F43D-3313-496B-BEF7-3798A5DE5E55_1.png'
2021-02-01 16:25:06,170 :: INFO <module> :: 	 49 patches of 640x640 size.
2021-02-01 16:25:06,170 :: INFO <module> :: 		 14 patches of 640x640 (normal tissue).
2021-02-01 16:25:06,171 :: INFO <module> :: 		 31 patches of 640x640 (tumor tissue).
2021-02-01 16:25:06,172 :: INFO <module> :: 		 4 patches of 640x640 (background).
2021-02-01 16:25:45,749 :: INFO <module> :: Processing wsi 'TCGA-BA-5149-01Z-00-DX1.ecb9aaa2-68b0-47e2-b96f-8cd78bf0a6fb_0.png'
2021-02-01 16:25:49,325 :: INFO <module> :: 	 49 patches of 640x640 size.
2021-02-01 16:25:49,326 :: INFO <module> :: 		 2 patches of 640x640 (normal tissue).
2021-02-01 16:25:49,326 :: INFO <module> :: 		 43 patches of 640x640 (tumor tissue).
2021-02-01 16:25:49,327 :: INFO <module> :: 		 4 patches of 640x640 (background).
2021-02-0

2021-02-01 16:36:11,251 :: INFO <module> :: Processing wsi 'TCGA-BA-6868-01Z-00-DX1.edbca656-6680-4c99-92ef-d7467f42c780_1.png'
2021-02-01 16:36:14,996 :: INFO <module> :: 	 49 patches of 640x640 size.
2021-02-01 16:36:14,996 :: INFO <module> :: 		 10 patches of 640x640 (normal tissue).
2021-02-01 16:36:14,997 :: INFO <module> :: 		 35 patches of 640x640 (tumor tissue).
2021-02-01 16:36:14,998 :: INFO <module> :: 		 4 patches of 640x640 (background).
2021-02-01 16:36:52,880 :: INFO <module> :: Processing wsi 'TCGA-BA-6869-01Z-00-DX1.6e58648e-3309-47bb-b2c7-b71bcd9dc69b_0.png'
2021-02-01 16:36:56,603 :: INFO <module> :: 	 49 patches of 640x640 size.
2021-02-01 16:36:56,604 :: INFO <module> :: 		 14 patches of 640x640 (normal tissue).
2021-02-01 16:36:56,606 :: INFO <module> :: 		 31 patches of 640x640 (tumor tissue).
2021-02-01 16:36:56,607 :: INFO <module> :: 		 4 patches of 640x640 (background).
2021-02-01 16:37:30,935 :: INFO <module> :: Processing wsi 'TCGA-BA-6869-01Z-00-DX1.6e5864

2021-02-01 16:48:27,777 :: INFO <module> :: 	 49 patches of 640x640 size.
2021-02-01 16:48:27,778 :: INFO <module> :: 		 20 patches of 640x640 (normal tissue).
2021-02-01 16:48:27,779 :: INFO <module> :: 		 25 patches of 640x640 (tumor tissue).
2021-02-01 16:48:27,780 :: INFO <module> :: 		 4 patches of 640x640 (background).
2021-02-01 16:49:04,961 :: INFO <module> :: Processing wsi 'TCGA-BA-A6DF-01Z-00-DX1.AAA638F5-C794-441B-A034-30AD8CC72573_0.png'
2021-02-01 16:49:08,553 :: INFO <module> :: 	 49 patches of 640x640 size.
2021-02-01 16:49:08,554 :: INFO <module> :: 		 4 patches of 640x640 (normal tissue).
2021-02-01 16:49:08,555 :: INFO <module> :: 		 41 patches of 640x640 (tumor tissue).
2021-02-01 16:49:08,556 :: INFO <module> :: 		 4 patches of 640x640 (background).
2021-02-01 16:49:45,736 :: INFO <module> :: Processing wsi 'TCGA-BA-A6DF-01Z-00-DX1.AAA638F5-C794-441B-A034-30AD8CC72573_1.png'
2021-02-01 16:49:49,190 :: INFO <module> :: 	 49 patches of 640x640 size.
2021-02-01 16:49:

2021-02-01 17:00:49,017 :: INFO <module> :: 		 2 patches of 640x640 (normal tissue).
2021-02-01 17:00:49,018 :: INFO <module> :: 		 43 patches of 640x640 (tumor tissue).
2021-02-01 17:00:49,019 :: INFO <module> :: 		 4 patches of 640x640 (background).
2021-02-01 17:01:35,484 :: INFO <module> :: Processing wsi 'TCGA-BB-7861-01Z-00-DX1.efdd186f-1bbf-4e43-9875-9458cebcd0dd_0.png'
2021-02-01 17:01:35,917 :: INFO <module> :: 	 4 patches of 640x640 size.
2021-02-01 17:01:35,919 :: INFO <module> :: 		 0 patches of 640x640 (normal tissue).
2021-02-01 17:01:35,920 :: INFO <module> :: 		 4 patches of 640x640 (tumor tissue).
2021-02-01 17:01:35,934 :: INFO <module> :: 		 0 patches of 640x640 (background).
2021-02-01 17:01:37,433 :: INFO <module> :: Processing wsi 'TCGA-BB-7861-01Z-00-DX1.efdd186f-1bbf-4e43-9875-9458cebcd0dd_1.png'
2021-02-01 17:01:37,809 :: INFO <module> :: 	 4 patches of 640x640 size.
2021-02-01 17:01:37,810 :: INFO <module> :: 		 2 patches of 640x640 (normal tissue).
2021-02-01

2021-02-01 17:12:46,507 :: INFO <module> :: 		 30 patches of 640x640 (tumor tissue).
2021-02-01 17:12:46,508 :: INFO <module> :: 		 4 patches of 640x640 (background).
2021-02-01 17:13:26,856 :: INFO <module> :: Processing wsi 'TCGA-BB-8601-01Z-00-DX1.A226FE85-28D7-44C1-8330-CC687677EC59_0.png'
2021-02-01 17:13:30,489 :: INFO <module> :: 	 49 patches of 640x640 size.
2021-02-01 17:13:30,490 :: INFO <module> :: 		 21 patches of 640x640 (normal tissue).
2021-02-01 17:13:30,492 :: INFO <module> :: 		 24 patches of 640x640 (tumor tissue).
2021-02-01 17:13:30,493 :: INFO <module> :: 		 4 patches of 640x640 (background).
2021-02-01 17:14:06,585 :: INFO <module> :: Processing wsi 'TCGA-BB-8601-01Z-00-DX1.A226FE85-28D7-44C1-8330-CC687677EC59_1.png'
2021-02-01 17:14:09,990 :: INFO <module> :: 	 49 patches of 640x640 size.
2021-02-01 17:14:09,991 :: INFO <module> :: 		 10 patches of 640x640 (normal tissue).
2021-02-01 17:14:09,992 :: INFO <module> :: 		 35 patches of 640x640 (tumor tissue).
2021-

2021-02-01 17:25:47,804 :: INFO <module> :: 		 4 patches of 640x640 (background).


# Generate the 512x512 dataset

In [7]:
from orca_dataloader import *

magnification=0.625
scale = get_scale_by_magnification(magnification)
tile_size=20
tile_size_original = int(scale * tile_size)


cls = "tumor"
dataset_types = ["training", "testing"]
for dataset_type in sorted(dataset_types):
    
    logger.info("{} images".format(dataset_type))
    
    orca_dir = "../../datasets/ORCA"
    annotations_dir = "{}/{}/lesion_annotations".format(orca_dir, dataset_type)
    tma_images_dir = "{}/{}/{}/wsi".format(orca_dir, dataset_type, cls)
    
    orca_512x512_dir = "../../datasets/ORCA_512x512"
    resized_annotations_dir = "{}/{}/lesion_annotations".format(orca_512x512_dir, dataset_type)
    resized_tma_images_dir = "{}/{}/{}/tma".format(orca_512x512_dir, dataset_type, cls)
    
    for r, d, f in sorted(os.walk(tma_images_dir)):
        for tma_file in sorted(f):
            
            tma_image_file = "{}/{}".format(r, tma_file)
            tma_image_number = tma_file.replace(".png", "")
            
            file_is_png = tma_image_file.lower().endswith('.png')
            if file_is_png:

                logger.info("Processing tma '{}'".format(tma_file))
                
                # tumor annotations mask
                annotation_file = "{}/{}_mask.png".format(annotations_dir, tma_image_number)
                tma_mask_pil = load_pil_image(annotation_file, gray=True)
                
                # tumor tma image
                tma_image_pil = load_pil_image(tma_image_file, gray=False)
                
                #print("{}/{}_mask.png".format(resized_annotations_dir, tma_image_number))
                resized_tma_mask_np = image_resize(pil_to_np(tma_mask_pil), width=512, height=512)
                np_tumor_mask = np.zeros((512, 512), dtype=bool)
                np_tumor_mask[resized_tma_mask_np == 255] = True
                pil_tumor_mask = np_to_pil(np_tumor_mask)
                pil_tumor_mask.save("{}/{}_mask.png".format(resized_annotations_dir, tma_image_number))
                
                #print("{}/{}".format(resized_tma_images_dir, tma_file))
                resized_tma_image_np = image_resize(pil_to_np(tma_image_pil), width=512, height=512)
                np_to_pil(resized_tma_image_np).save("{}/{}".format(resized_tma_images_dir, tma_file))
                

2021-05-30 10:38:59,104 :: INFO <module> :: testing images
2021-05-30 10:38:59,111 :: INFO <module> :: Processing tma 'TCGA-CN-4723-01Z-00-DX1.13483e7b-9322-4d39-8cd6-91e898bf2ee9_0.png'
2021-05-30 10:39:00,593 :: INFO <module> :: Processing tma 'TCGA-CN-4723-01Z-00-DX1.13483e7b-9322-4d39-8cd6-91e898bf2ee9_1.png'
2021-05-30 10:39:02,502 :: INFO <module> :: Processing tma 'TCGA-CN-4725-01Z-00-DX1.fe518acf-cceb-4be4-b01a-63d66248d6d4_0.png'
2021-05-30 10:39:04,499 :: INFO <module> :: Processing tma 'TCGA-CN-4725-01Z-00-DX1.fe518acf-cceb-4be4-b01a-63d66248d6d4_1.png'
2021-05-30 10:39:06,942 :: INFO <module> :: Processing tma 'TCGA-CN-4726-01Z-00-DX1.0ddf44ae-1cb7-41f1-8b59-a5a689f5a71c_0.png'
2021-05-30 10:39:09,116 :: INFO <module> :: Processing tma 'TCGA-CN-4726-01Z-00-DX1.0ddf44ae-1cb7-41f1-8b59-a5a689f5a71c_1.png'
2021-05-30 10:39:11,293 :: INFO <module> :: Processing tma 'TCGA-CN-4727-01Z-00-DX1.bee9d488-0f05-4579-8c34-aa4197972bb1_0.png'
2021-05-30 10:39:13,618 :: INFO <module> :: P

2021-05-30 10:41:12,236 :: INFO <module> :: Processing tma 'TCGA-CN-6010-01Z-00-DX1.8a82a2d0-c687-4b75-922f-b5b64b5a99df_0.png'
2021-05-30 10:41:14,161 :: INFO <module> :: Processing tma 'TCGA-CN-6010-01Z-00-DX1.8a82a2d0-c687-4b75-922f-b5b64b5a99df_1.png'
2021-05-30 10:41:16,090 :: INFO <module> :: Processing tma 'TCGA-CN-6011-01Z-00-DX1.a3a73ffe-06b8-42ea-820b-9c2824d96585_0.png'
2021-05-30 10:41:18,228 :: INFO <module> :: Processing tma 'TCGA-CN-6011-01Z-00-DX1.a3a73ffe-06b8-42ea-820b-9c2824d96585_1.png'
2021-05-30 10:41:20,475 :: INFO <module> :: Processing tma 'TCGA-CN-6012-01Z-00-DX1.bd9283bb-47c7-41ab-996a-f3f5070cfc73_0.png'
2021-05-30 10:41:22,418 :: INFO <module> :: Processing tma 'TCGA-CN-6012-01Z-00-DX1.bd9283bb-47c7-41ab-996a-f3f5070cfc73_1.png'
2021-05-30 10:41:24,605 :: INFO <module> :: Processing tma 'TCGA-CN-6016-01Z-00-DX1.737054ae-50f4-42c4-b79c-c462220b12f3_0.png'
2021-05-30 10:41:26,687 :: INFO <module> :: Processing tma 'TCGA-CN-6016-01Z-00-DX1.737054ae-50f4-42c4-b

2021-05-30 10:43:13,493 :: INFO <module> :: Processing tma 'TCGA-BA-6869-01Z-00-DX1.6e58648e-3309-47bb-b2c7-b71bcd9dc69b_0.png'
2021-05-30 10:43:15,775 :: INFO <module> :: Processing tma 'TCGA-BA-6869-01Z-00-DX1.6e58648e-3309-47bb-b2c7-b71bcd9dc69b_1.png'
2021-05-30 10:43:17,828 :: INFO <module> :: Processing tma 'TCGA-BA-6871-01Z-00-DX1.4d9ca329-f8d5-4c31-a20f-9d4fba728326_0.png'
2021-05-30 10:43:19,735 :: INFO <module> :: Processing tma 'TCGA-BA-6871-01Z-00-DX1.4d9ca329-f8d5-4c31-a20f-9d4fba728326_1.png'
2021-05-30 10:43:22,134 :: INFO <module> :: Processing tma 'TCGA-BA-6872-01Z-00-DX1.F99F6394-D319-4599-B124-D2A6692FA096_0.png'
2021-05-30 10:43:24,369 :: INFO <module> :: Processing tma 'TCGA-BA-6872-01Z-00-DX1.F99F6394-D319-4599-B124-D2A6692FA096_1.png'
2021-05-30 10:43:26,212 :: INFO <module> :: Processing tma 'TCGA-BA-6873-01Z-00-DX1.F0A84DE2-7A08-44D5-9FD1-A284F55AA8C1_0.png'
2021-05-30 10:43:28,211 :: INFO <module> :: Processing tma 'TCGA-BA-7269-01Z-00-DX1.A759601F-7513-47C6-B

2021-05-30 10:45:31,907 :: INFO <module> :: Processing tma 'TCGA-BB-A6UO-01Z-00-DX1.11D049DC-EFC3-47EB-B390-A694BFD304A2_1.png'
2021-05-30 10:45:34,866 :: INFO <module> :: Processing tma 'TCGA-C9-A47Z-01Z-00-DX1.7999EE26-CB1A-464D-8E88-F3122F8D1A41_0.png'
2021-05-30 10:45:37,199 :: INFO <module> :: Processing tma 'TCGA-C9-A47Z-01Z-00-DX1.7999EE26-CB1A-464D-8E88-F3122F8D1A41_1.png'
2021-05-30 10:45:39,715 :: INFO <module> :: Processing tma 'TCGA-C9-A480-01Z-00-DX1.24793EF4-30F2-46DF-A64E-52021F132EE0_0.png'
2021-05-30 10:45:42,064 :: INFO <module> :: Processing tma 'TCGA-C9-A480-01Z-00-DX1.24793EF4-30F2-46DF-A64E-52021F132EE0_1.png'
2021-05-30 10:45:44,106 :: INFO <module> :: Processing tma 'TCGA-CN-4722-01Z-00-DX1.cf599bd8-c285-4f44-82d0-64f4b453d5e5_0.png'
2021-05-30 10:45:46,531 :: INFO <module> :: Processing tma 'TCGA-CN-4722-01Z-00-DX1.cf599bd8-c285-4f44-82d0-64f4b453d5e5_1.png'
