In [7]:
import os
import numpy as np
import h5py
from datetime import datetime
from skimage.filters import threshold_otsu
import matplotlib.pyplot as plt
from tqdm import tqdm
from skimage.filters import threshold_otsu

from preprocessing.datamodel import SlideManager
from preprocessing.processing import (
    split_negative_slide,
    split_positive_slide,
    rgb2gray,
)

In [8]:
t1 = datetime.now()

In [9]:
CAM_BASE_DIR = "/storage/jitbitan/"
CAM16_DIR = CAM_BASE_DIR + "Camelyon16/"
GENERATED_DATA = CAM_BASE_DIR + "outputs/cam16_tiles/"

In [4]:
sldmgr = SlideManager(cam16_dir=CAM16_DIR)
lvl = 3
tile_size = 256

# lvl = 0
# tile_size = 312

In [5]:
poi_normal = 0.2
poi_tumor = 0.6

overlap_normal = 0
overlap_tumor = tile_size // 2

# MAX_TILES = 1000

In [6]:
t2 = datetime.now()

In [7]:
neg_wsi = 0

for slide in sldmgr.negative_slides:
    arr = np.array(slide.get_full_slide(level=lvl))
    arr_gray = rgb2gray(arr)
    thr = threshold_otsu(arr_gray)

    tile_iter = split_negative_slide(
        slide,
        level=lvl,
        otsu_threshold=thr,
        tile_size=tile_size,
        overlap=overlap_normal,
        poi_threshold=poi_normal,
    )

    tile_count = 0
    all_tiles = []
    for tile, bounds in tile_iter:
        all_tiles.append(tile)
        tile_count += 1

    print(f"Negative WSI {neg_wsi+1}: {slide.name}\tTile Count: {tile_count}")
    neg_wsi += 1

    if tile_count == 0:
        continue

    tiles_data = np.stack(all_tiles)

    filename = "{}{}_tile{}_poiNormal{}_poiTumor{}_level{}.hdf5".format(
        GENERATED_DATA, slide.name, tile_size, poi_normal, poi_tumor, lvl
    )
    with h5py.File(filename, "w") as f:
        f.create_dataset(name=slide.name, data=tiles_data)

Negative WSI 1: normal_001	Tile Count: 185
Negative WSI 2: normal_002	Tile Count: 324
Negative WSI 3: normal_003	Tile Count: 398
Negative WSI 4: normal_004	Tile Count: 111
Negative WSI 5: normal_005	Tile Count: 256
Negative WSI 6: normal_006	Tile Count: 86
Negative WSI 7: normal_007	Tile Count: 320
Negative WSI 8: normal_008	Tile Count: 164
Negative WSI 9: normal_009	Tile Count: 469
Negative WSI 10: normal_010	Tile Count: 201
Negative WSI 11: normal_011	Tile Count: 888
Negative WSI 12: normal_012	Tile Count: 232
Negative WSI 13: normal_013	Tile Count: 145
Negative WSI 14: normal_014	Tile Count: 221
Negative WSI 15: normal_015	Tile Count: 421
Negative WSI 16: normal_016	Tile Count: 77
Negative WSI 17: normal_017	Tile Count: 42
Negative WSI 18: normal_018	Tile Count: 234
Negative WSI 19: normal_019	Tile Count: 23
Negative WSI 20: normal_020	Tile Count: 68
Negative WSI 21: normal_021	Tile Count: 37
Negative WSI 22: normal_022	Tile Count: 501
Negative WSI 23: normal_023	Tile Count: 127
Neg

In [7]:
t3 = datetime.now()

In [13]:
pos_wsi = 0

for slide in sldmgr.annotated_slides:
    tile_iter = split_positive_slide(
        slide,
        level=lvl,
        tile_size=tile_size,
        overlap=overlap_tumor,
        poi_threshold=poi_tumor,
    )

    tile_count = 0
    all_tiles = []
    for tile, bounds in tile_iter:
        all_tiles.append(tile)
        tile_count += 1

    print(f"Annotated WSI {pos_wsi+1}: {slide.name}\tTile Count: {tile_count}")
    pos_wsi += 1

    if tile_count == 0:
        continue

    tiles_data = np.stack(all_tiles)

    filename = "{}{}_tile{}_poiNormal{}_poiTumor{}_level{}.hdf5".format(
        GENERATED_DATA, slide.name, tile_size, poi_normal, poi_tumor, lvl
    )
    with h5py.File(filename, "w") as f:
        f.create_dataset(name=slide.name, data=tiles_data)

Annotated WSI 1: tumor_089	Tile Count: 1586
Annotated WSI 2: tumor_090	Tile Count: 215
Annotated WSI 3: tumor_091	Tile Count: 36
Annotated WSI 4: tumor_092	Tile Count: 13
Annotated WSI 5: tumor_093	Tile Count: 0
Annotated WSI 6: tumor_094	Tile Count: 56
Annotated WSI 7: tumor_095	Tile Count: 838
Annotated WSI 8: tumor_096	Tile Count: 3
Annotated WSI 9: tumor_097	Tile Count: 0
Annotated WSI 10: tumor_098	Tile Count: 0
Annotated WSI 11: tumor_099	Tile Count: 5
Annotated WSI 12: tumor_100	Tile Count: 2
Annotated WSI 13: tumor_101	Tile Count: 160
Annotated WSI 14: tumor_102	Tile Count: 252
Annotated WSI 15: tumor_103	Tile Count: 0
Annotated WSI 16: tumor_104	Tile Count: 81
Annotated WSI 17: tumor_105	Tile Count: 54
Annotated WSI 18: tumor_106	Tile Count: 30
Annotated WSI 19: tumor_107	Tile Count: 4
Annotated WSI 20: tumor_108	Tile Count: 49
Annotated WSI 21: tumor_109	Tile Count: 92
Annotated WSI 22: tumor_110	Tile Count: 1000


In [7]:
t4 = datetime.now()

In [24]:
all_wsi = "{}all_wsi_tile{}_poiNormal{}_poiTumor{}_level{}.hdf5".format(
    GENERATED_DATA,
    tile_size,
    poi_normal,
    poi_tumor,
    lvl,
)
all_files = os.listdir(GENERATED_DATA)
all_files.sort()

with h5py.File(all_wsi, 'w') as f:
    for file in all_files:
        filename = GENERATED_DATA + file
        with h5py.File(filename, "r") as g:
            key = file[:10] if file.startswith("n") else file[:9]
            f.create_dataset(name=key, data=g[key][:])

In [9]:
t5 = datetime.now()

In [10]:
print(f"Processing Negative WSIs:\t{(t3-t2).seconds} seconds")
print(f"Processed Positive WSIs:\t{(t4 - t3).seconds} seconds")
print(f"Merged all WSIs:\t\t{(t5-t4).seconds} seconds")
print(f"Total Execution time:\t\t{(t5-t1).seconds} seconds")

NameError: name 't3' is not defined