In [1]:
from openslide import open_slide

In [2]:
import openslide 

In [4]:
from PIL import Image

In [5]:
import numpy as np

In [1]:
import os
data_path = '/lab/deasylab3/Jung/tiger/'
dir_TIFF_images = data_path + "/wsirois/wsi-level-annotations/images/"


imgs_names = os.listdir(dir_TIFF_images)
imgs_names.sort()
imgs_names = [i for i in imgs_names if i.startswith('TCGA')]  
wsi_path = dir_TIFF_images + imgs_names[0]
wsi_path

'/lab/deasylab3/Jung/tiger//wsirois/wsi-level-annotations/images/TCGA-A1-A0SK-01Z-00-DX1.A44D70FA-4D96-43F4-9DD7-A61535786297.tif'

In [8]:
import tiatoolbox


In [9]:
from tiatoolbox.tools.patchextraction import SlidingWindowPatchExtractor


In [13]:
x_min,y_min,x_max,y_max = 8209,200,59972,34836

In [14]:
from tiatoolbox.tools.patchextraction import get_patch_extractor
from tiatoolbox.wsicore.wsireader import WSIReader
import numpy as np

In [19]:
# 1. Open the WSI
wsi = WSIReader.open(wsi_path)
patch_size=(224, 224)
stride=(224, 224)
from tiatoolbox.tools.patchextraction import SlidingWindowPatchExtractor
from tiatoolbox.wsicore.wsireader import WSIReader

def extract_patches_in_bbox(
    slide_path,
    bbox,
    patch_size=(224, 224),
    stride=(224, 224),
    resolution=0,         # or use 1.25 with units="mpp"
    units="level",
    pad_mode="constant",
    pad_value=255,
    min_mask_ratio=0.0,
):
    x_min, y_min, x_max, y_max = bbox
    region = (x_min, y_min, x_max - x_min, y_max - y_min)

    wsi = WSIReader.open(slide_path)

    extractor = SlidingWindowPatchExtractor(
        input_img=wsi,
        patch_size=patch_size,
        stride=stride,
        resolution=resolution,
        units=units,
        pad_mode=pad_mode,
        pad_constant_values=pad_value,
        min_mask_ratio=min_mask_ratio,
        region=region  # restricts extraction to this area
    )

    return [patch for patch in extractor]


In [21]:
region_width = x_max - x_min
region_height = y_max - y_min

# Crop the WSI at level 0
cropped = wsi.read_rect(
    location=(x_min, y_min),
    size=(region_width, region_height),
    resolution=0,
    units="level"
)



KeyboardInterrupt: 

In [23]:
wsi_dims = wsi.slide_dimensions(resolution=0, units="level")
from tiatoolbox.wsicore.wsireader import WSIReader, VirtualWSIReader
# --- Step 3: Create binary mask from bounding box ---
bbox_mask = np.zeros((wsi_dims[1], wsi_dims[0]), dtype=bool)  # (H, W)
bbox_mask[y_min:y_max, x_min:x_max] = True

# Wrap as a VirtualWSIReader (efficient access at pyramid levels)
mask_reader = VirtualWSIReader(
    bbox_mask,
    info=wsi.info,
    mode="bool"
)

In [25]:
import os
import numpy as np
import pandas as pd
from tqdm import tqdm
from PIL import Image
from tiatoolbox.wsicore.wsireader import WSIReader, VirtualWSIReader
from tiatoolbox.tools.patchextraction import SlidingWindowPatchExtractor


def extract_and_save_patches(
    wsi_path,
    x_min,
    y_min,
    x_max,
    y_max,
    out_dir="tiles",
    patch_size=(224, 224),
    stride=(224, 224),
    min_mask_ratio=0.5,
):
    os.makedirs(out_dir, exist_ok=True)
    csv_path = os.path.join(out_dir, "pipeline_tiles.csv")

    # Step 1: Open WSI
    wsi = WSIReader.open(wsi_path)
    wsi_dims = wsi.slide_dimensions(resolution=0, units="level")  # (W, H)

    # Step 2: Create bounding box binary mask
    bbox_mask = np.zeros((wsi_dims[1], wsi_dims[0]), dtype=bool)  # (H, W)
    bbox_mask[y_min:y_max, x_min:x_max] = True

    # Step 3: Wrap mask with VirtualWSIReader
    mask_reader = VirtualWSIReader(bbox_mask, info=wsi.info, mode="bool")

    # Step 4: Initialize patch extractor
    extractor = SlidingWindowPatchExtractor(
        input_img=wsi,
        patch_size=patch_size,
        stride=stride,
        resolution=0,
        units="level",
        input_mask=mask_reader,
        within_bound=True,
        min_mask_ratio=min_mask_ratio,
    )

    # Step 5: Extract and save patches
    coords = extractor.locations_df[["x", "y"]]
    metadata = []

    for i, patch in enumerate(tqdm(extractor, desc="Extracting patches")):
        x, y = int(coords.iloc[i]["x"]), int(coords.iloc[i]["y"])
        filename = f"tile_{i}_{x}_{y}.png"
        save_path = os.path.join(out_dir, filename)
        Image.fromarray(patch).save(save_path)

        metadata.append({
            "index": i,
            "filename": filename,
            "x": x,
            "y": y
        })

    pd.DataFrame(metadata).to_csv(csv_path, index=False)

    print(f"✅ Saved {len(metadata)} patches to {out_dir}")
    print(f"📄 Patch metadata CSV saved to {csv_path}")


In [31]:
import os
import numpy as np
import pandas as pd
from PIL import Image
from tqdm import tqdm

from skimage.color import rgb2hed
from skimage.filters import gaussian, threshold_otsu
from skimage.morphology import remove_small_objects, remove_small_holes

from tiatoolbox.wsicore.wsireader import WSIReader, VirtualWSIReader
from tiatoolbox.tools.patchextraction import SlidingWindowPatchExtractor


def generate_he_mask(wsi, x_min, y_min, x_max, y_max, sigma=1.5, min_size=300):
    """Generate a tissue mask using H&E deconvolution from a bounding box region."""
    # Read region RGB (as NumPy array)
    region = wsi.read_rect(
        location=(x_min, y_min),
        size=(x_max - x_min, y_max - y_min),
        resolution=0,
        units="level"
    )

    # --- H&E Deconvolution on Hematoxylin Channel
    he = rgb2hed(region)
    hema = -he[:, :, 0]
    hema = (hema - np.min(hema)) / (np.max(hema) - np.min(hema))

    # Gaussian filter and Otsu threshold
    smooth = gaussian(hema, sigma=sigma)
    threshold = threshold_otsu(smooth)
    tissue_mask = smooth > threshold

    # Clean up mask
    tissue_mask = remove_small_objects(tissue_mask, min_size=min_size)
    tissue_mask = remove_small_holes(tissue_mask, area_threshold=min_size)

    # Insert into full-resolution binary mask
    full_mask = np.zeros(wsi.slide_dimensions(resolution=0, units="level")[::-1], dtype=bool)
    full_mask[y_min:y_max, x_min:x_max] = tissue_mask.astype(bool)

    return full_mask


In [32]:
def extract_and_save_patches(
    wsi_path,
    x_min,
    y_min,
    x_max,
    y_max,
    out_dir="tiles",
    patch_size=(224, 224),
    stride=(224, 224),
    min_mask_ratio=0.5,
):
    os.makedirs(out_dir, exist_ok=True)
    csv_path = os.path.join(out_dir, "pipeline_tiles.csv")

    # Load WSI
    wsi = WSIReader.open(wsi_path)

    # Generate H&E-based tissue mask
    tissue_mask = generate_he_mask(wsi, x_min, y_min, x_max, y_max)

    # Wrap mask into VirtualWSIReader
    mask_reader = VirtualWSIReader(tissue_mask, info=wsi.info, mode="bool")

    # Initialize patch extractor with tissue mask
    extractor = SlidingWindowPatchExtractor(
        input_img=wsi,
        patch_size=patch_size,
        stride=stride,
        resolution=0,
        units="level",
        input_mask=mask_reader,
        within_bound=True,
        min_mask_ratio=min_mask_ratio,
    )

    # Extract patches and save
    coords = extractor.locations_df[["x", "y"]]
    metadata = []

    for i, patch in enumerate(tqdm(extractor, desc="Extracting tissue patches")):
        x, y = int(coords.iloc[i]["x"]), int(coords.iloc[i]["y"])
        filename = f"tile_{i}_{x}_{y}.png"
        save_path = os.path.join(out_dir, filename)
        Image.fromarray(patch).save(save_path)

        metadata.append({
            "index": i,
            "filename": filename,
            "x": x,
            "y": y
        })

    pd.DataFrame(metadata).to_csv(csv_path, index=False)
    print(f"✅ Saved {len(metadata)} patches to {out_dir}")
    print(f"📄 Patch metadata CSV saved to {csv_path}")


In [2]:
import os
import numpy as np
from tqdm import tqdm
from PIL import Image
import pandas as pd
from tiatoolbox.wsicore.wsireader import WSIReader
from tiatoolbox.tools.patchextraction import SlidingWindowPatchExtractor
from tiatoolbox.wsicore.wsireader import VirtualWSIReader

# Inputs

output_dir = "tiles"
patch_size = (224, 224)
stride = (224, 224)
min_mask_ratio = 0.5
x_min, y_min, x_max, y_max = 8209, 200, 59972, 34836

# 1. Open the WSI
wsi = WSIReader.open(input_img=wsi_path)

# 2. Crop region + mask
region_width = x_max - x_min
region_height = y_max - y_min
region_img = wsi.read_rect(location=(x_min, y_min), size=(region_width, region_height), resolution=0, units="level")

# 3. Resize tissue mask for cropped region
# You must already have a binary mask (full resolution) as a NumPy array named `full_mask`
region_mask = full_mask[y_min:y_max, x_min:x_max]

# 4. Create VirtualWSIReader for cropped region & mask
wsi_cropped = VirtualWSIReader(region_img, info=wsi.info)
mask_cropped = VirtualWSIReader(region_mask, info=wsi.info, mode="bool")

# 5. Setup patch extractor
extractor = SlidingWindowPatchExtractor(
    input_img=wsi_cropped,
    patch_size=patch_size,
    stride=stride,
    resolution=0,
    units="level",
    input_mask=mask_cropped,
    within_bound=True,
    min_mask_ratio=min_mask_ratio,
)

# 6. Extract and save patches
os.makedirs(output_dir, exist_ok=True)
csv_path = os.path.join(output_dir, "pipeline_tiles.csv")
coords = extractor.locations_df[["x", "y"]]
metadata = []

for i, patch in enumerate(tqdm(extractor, desc="Extracting patches")):
    x, y = int(coords.iloc[i]["x"]), int(coords.iloc[i]["y"])
    filename = f"tile_{i}_{x+x_min:05d}_{y+y_min:05d}.png"
    save_path = os.path.join(output_dir, filename)
    Image.fromarray(patch).save(save_path)
    metadata.append({"index": i, "filename": filename, "x": x + x_min, "y": y + y_min})

# 7. Save metadata
df = pd.DataFrame(metadata)
df.to_csv(csv_path, index=False)
print(f"✅ Saved {len(metadata)} patches to {output_dir}")



  check_for_updates()



NameError: name 'full_mask' is not defined

In [4]:
import os
import numpy as np
import pandas as pd
from tqdm import tqdm
from PIL import Image
from tiatoolbox.wsicore.wsireader import WSIReader, VirtualWSIReader
from tiatoolbox.tools.patchextraction import SlidingWindowPatchExtractor


def extract_and_save_patches(
    wsi_path,
    x_min,
    y_min,
    x_max,
    y_max,
    out_dir="tiles",
    patch_size=(224, 224),
    stride=(224, 224),
    min_mask_ratio=0.5,
):
    os.makedirs(out_dir, exist_ok=True)
    csv_path = os.path.join(out_dir, "pipeline_tiles.csv")

    # Step 1: Open WSI
    wsi = WSIReader.open(wsi_path)
    wsi_dims = wsi.slide_dimensions(resolution=0, units="level")  # (W, H)

    # Step 2: Create bounding box binary mask
    bbox_mask = np.zeros((wsi_dims[1], wsi_dims[0]), dtype=bool)  # (H, W)
    bbox_mask[y_min:y_max, x_min:x_max] = True

    # Step 3: Wrap mask with VirtualWSIReader
    mask_reader = VirtualWSIReader(bbox_mask, info=wsi.info, mode="bool")

    # Step 4: Initialize patch extractor
    extractor = SlidingWindowPatchExtractor(
        input_img=wsi,
        patch_size=patch_size,
        stride=stride,
        resolution=0,
        units="level",
        input_mask=mask_reader,
        within_bound=True,
        min_mask_ratio=min_mask_ratio,
    )

    # Step 5: Extract and save patches
    coords = extractor.locations_df[["x", "y"]]
    metadata = []

    for i, patch in enumerate(tqdm(extractor, desc="Extracting patches")):
        x, y = int(coords.iloc[i]["x"]), int(coords.iloc[i]["y"])
        filename = f"tile_{i}_{x}_{y}.png"
        save_path = os.path.join(out_dir, filename)
        Image.fromarray(patch).save(save_path)

        metadata.append({
            "index": i,
            "filename": filename,
            "x": x,
            "y": y
        })

    pd.DataFrame(metadata).to_csv(csv_path, index=False)

    print(f"✅ Saved {len(metadata)} patches to {out_dir}")
    print(f"📄 Patch metadata CSV saved to {csv_path}")


In [None]:
extract_and_save_patches(
    wsi_path=wsi_path,
    x_min=8209,
    y_min=200,
    x_max=59972,
    y_max=34836,
    out_dir="output5",
    patch_size=(224, 224),
    stride=(224, 224),
    min_mask_ratio=0.5
)


Extracting patches:   0%|▏                                                                                             | 94/35804 [00:17<1:49:31,  5.43it/s]