In [1]:
import argparse
import glob
import os
import random
from importlib.resources import path

import cv2
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import tifffile

# from genericpath import isfile
from openslide import OpenSlide

In [2]:
parent_folder = '/local_storage/High_Risk_Breast_Cancer_2022/bracs_icar/BRACS_WSI/'

In [3]:
def get_slide_file_path(slide_id):
    #train
    data_dir = os.path.join('/local_storage/High_Risk_Breast_Cancer_2022/bracs_icar/BRACS_WSI/')
    # holdout
    #data_dir = os.path.join('/','home','ngsci','datasets','brca-psj-path', 'holdout')
    slide_fp = os.path.join(data_dir,'*', '*', '*', f'{slide_id}.svs')
    return glob.glob(slide_fp)[0]


class PatchGeneratorPixel:
    def __init__(self, slide_path, slide_level) -> None:
        self.slide_path = slide_path
        self.slide_dir_path = os.path.dirname(os.path.realpath(self.slide_path))
        self.slide_name = os.path.splitext(os.path.basename(self.slide_path))[0]
        self.slide_level = slide_level
        self.patch_dir_path = 'bracs_save_level7_npy/'
        os.makedirs(self.patch_dir_path, exist_ok=True)
        
        #os.path.join(self.slide_dir_path, "..", f"patch_by_pixel_level_{self.slide_level}_resized_all")

    def get_slide_name_w_ext(self):
        return os.path.basename(self.slide_path)
    
    def read_lvl7_then_write(self):
        slide_openslide = OpenSlide(self.slide_path)
        img_openslide = slide_openslide.read_region((0, 0), self.slide_level, slide_openslide.level_dimensions[2])
        img_openslide_RGB = img_openslide.convert("RGB")
        img_openslide_np = np.array(img_openslide_RGB)
        print(img_openslide_np.shape)
        
        width, height = (
                int(img_openslide_np.shape[1] // 2**3),
                int(img_openslide_np.shape[0] // 2**3),
            )
        img_openslide_np = cv2.resize(
            img_openslide_np, (width, height), interpolation=cv2.INTER_AREA
        )
            
        print(img_openslide_np.shape)

        save_slide_folder = os.path.join(self.patch_dir_path, self.slide_name)
        print(save_slide_folder)
        np.save( save_slide_folder+'_level'+str(self.slide_level+5)+'.npy', img_openslide_np ) 

In [4]:
get_slide_file_path('BRACS_1356')

'/local_storage/High_Risk_Breast_Cancer_2022/bracs_icar/BRACS_WSI/train/Group_MT/Type_IC/BRACS_1356.svs'

In [5]:
get_slide_file_path('BRACS_1855')

'/local_storage/High_Risk_Breast_Cancer_2022/bracs_icar/BRACS_WSI/test/Group_BT/Type_N/BRACS_1855.svs'

In [6]:
get_slide_file_path('BRACS_1335')

'/local_storage/High_Risk_Breast_Cancer_2022/bracs_icar/BRACS_WSI/train/Group_BT/Type_UDH/BRACS_1335.svs'

In [7]:
slide_openslide = OpenSlide('/local_storage/High_Risk_Breast_Cancer_2022/bracs_icar/BRACS_WSI/train/Group_BT/Type_UDH/BRACS_1335.svs')

In [8]:
slide_openslide.level_dimensions

((107568, 85049), (26892, 21262), (6723, 5315), (3361, 2657))

In [9]:
slide_openslide.level_downsamples

(1.0, 4.0000235161320665, 16.00084666039511, 32.00708479798329)

#### -> levels are: 0, 2, 4, 5

### Process

In [13]:
slide_level = 2 # this is 5 for the competition dataset
    
# ALL OF THE DATA
data_dir = parent_folder #os.path.join('/','home','ngsci','datasets','brca-psj-path')

slides_fp = os.path.join(data_dir,'*','*', '*','*')
slides_list = [ j.split('/')[-1].split('.svs')[0] for j in sorted(glob.glob(slides_fp)) ]

len(slides_list) # number of WSIs 

547

In [14]:
slides_list[:5]

['BRACS_1003691', 'BRACS_1003694', 'BRACS_1228', 'BRACS_1503', 'BRACS_1589']

In [15]:
nr_of_slides_processed = 0

for slide_path in slides_list:
    try:
        print('slide path:', slide_path)
        patch_generator = PatchGeneratorPixel( slide_path=get_slide_file_path(slide_path), 
                                               slide_level=slide_level)
        patch_generator.read_lvl7_then_write()
        print('processed slides:', nr_of_slides_processed)
        nr_of_slides_processed += 1
    except:
        print('passed') # this means no processing if current slide does not have this resolution level
        pass

slide path: BRACS_1003691
(4411, 3867, 3)
(551, 483, 3)
bracs_save_level7_npy/BRACS_1003691
processed slides: 0
slide path: BRACS_1003694
(2620, 7853, 3)
(327, 981, 3)
bracs_save_level7_npy/BRACS_1003694
processed slides: 1
slide path: BRACS_1228
(3824, 6723, 3)
(478, 840, 3)
bracs_save_level7_npy/BRACS_1228
processed slides: 2
slide path: BRACS_1503
(4664, 6225, 3)
(583, 778, 3)
bracs_save_level7_npy/BRACS_1503
processed slides: 3
slide path: BRACS_1589
(5540, 6847, 3)
(692, 855, 3)
bracs_save_level7_npy/BRACS_1589
processed slides: 4
slide path: BRACS_1825
(5421, 5976, 3)
(677, 747, 3)
bracs_save_level7_npy/BRACS_1825
processed slides: 5
slide path: BRACS_1844
(4608, 7345, 3)
(576, 918, 3)
bracs_save_level7_npy/BRACS_1844
processed slides: 6
slide path: BRACS_1849
(5126, 5727, 3)
(640, 715, 3)
bracs_save_level7_npy/BRACS_1849
processed slides: 7
slide path: BRACS_1856
(5529, 9337, 3)
(691, 1167, 3)
bracs_save_level7_npy/BRACS_1856
processed slides: 8
slide path: BRACS_1892
(5541, 659