Notebook for Extracting patches and annotations from Monkey Dataset

In [None]:
import sys

sys.path.append("../")
from tiatoolbox.wsicore.wsireader import WSIReader, VirtualWSIReader
from tiatoolbox.tools.patchextraction import get_patch_extractor
import numpy as np
import matplotlib.pyplot as plt
import os
import json
import re
import cv2
import json
from tqdm.autonotebook import tqdm
from PIL import Image, ImageDraw

In [None]:
def extract_id(file_name: str):
    """
    Give a file name such as 'A_P000001_PAS_CPG.tif',
    Extract the ID: 'A_P000001'
    """
    match = re.match(r"([A-Z]_P\d+)_", file_name, re.IGNORECASE)

    if match:
        return match.group(1)
    else:
        return None


def parse_json_annotations(json_path: str):
    """Extract annotations from json file"""
    with open(json_path, "r") as f:
        annotations = json.load(f)
    return annotations


def filter_points_with_bb(points_dict, bb):
    selected_points = []
    for item in points_dict:
        point = item["point"]
        if (
            bb[0] <= point[0]
            and bb[1] <= point[1]
            and bb[2] >= point[0]
            and bb[3] >= point[1]
        ):
            selected_points.append(point)
    return selected_points


def scale_points(points_dict, scale_factor):
    new_points_dict = []
    for item in points_dict:
        item["point"][0] = int(item["point"][0] / scale_factor)
        item["point"][1] = int(item["point"][1] / scale_factor)
        new_points_dict.append(item)
    return new_points_dict


def extract_rois_coords(annotation_rois: list[dict]):
    polygons = []
    for item in annotation_rois:
        polygons.append(np.array(item["polygon"], np.int32))
    return polygons


def mask_from_poly(
    poly_coords, canvas_width, canvas_height, scale_factor
):
    canvas = np.zeros(
        shape=(canvas_width, canvas_height), dtype=np.uint8
    )
    for i in range(len(poly_coords)):
        poly_coords[i] = (poly_coords[i] // scale_factor).astype(
            np.int32
        )
    cv2.fillPoly(canvas, poly_coords, 1)
    return canvas


def filter_coords_with_mask(xs, ys, binary_mask):
    new_xs, new_ys = [], []
    for i in range(len(xs)):
        x = xs[i]
        y = ys[i]
        try:
            if binary_mask[y, x] == 0:
                continue
            else:
                new_xs.append(x)
                new_ys.append(y)
        except:
            continue
    return new_xs, new_ys


def get_relative_coords(base_coords, bb, mask):
    selected_points = filter_points_with_bb(base_coords, bb)
    relative_x_coords = [
        int(item[0] - bb[0]) for item in selected_points
    ]
    relative_y_coords = [
        int(item[1] - bb[1]) for item in selected_points
    ]

    relative_x_coords, relative_y_coords = filter_coords_with_mask(
        relative_x_coords, relative_y_coords, mask
    )
    return relative_x_coords, relative_y_coords


def save_data(
    file_name,
    patch_image_dir,
    cell_mask_dir,
    json_dir,
    patch_image,
    bb,
    lymphocyte_coords,
    monocyte_coords,
):
    patch_name = f"{file_name}_{bb[0]}_{bb[1]}_{bb[2]}_{bb[3]}.npy"
    patch_save_path = os.path.join(patch_image_dir, patch_name)
    os.makedirs(patch_image_dir, exist_ok=True)
    np.save(patch_save_path, patch_image)

    cell_mask = np.zeros(
        shape=(patch_image.shape[0], patch_image.shape[1]),
        dtype=np.uint8,
    )

    for coord in lymphocyte_coords:
        cell_mask[coord[1], coord[0]] = 1

    for coord in monocyte_coords:
        cell_mask[coord[1], coord[0]] = 2

    cell_mask_save_name = (
        f"{file_name}_{bb[0]}_{bb[1]}_{bb[2]}_{bb[3]}.npy"
    )
    cell_mask_save_path = os.path.join(
        cell_mask_dir, cell_mask_save_name
    )
    os.makedirs(cell_mask_dir, exist_ok=True)
    np.save(cell_mask_save_path, cell_mask)

    annotations = {
        "lymphocytes": lymphocyte_coords,
        "monocytes": monocyte_coords,
    }

    json_save_name = (
        f"{file_name}_{bb[0]}_{bb[1]}_{bb[2]}_{bb[3]}.json"
    )
    json_save_path = os.path.join(json_dir, json_save_name)
    os.makedirs(json_dir, exist_ok=True)
    with open(json_save_path, "w") as file:
        json.dump(annotations, file)


def mask_from_rois(rois: list, canvas_shape: tuple[int, int]):

    polygons_coord = []
    for roi in rois:
        poly = np.array(roi["polygon"])
        poly = np.round(poly / 32)
        poly = poly.astype(int)
        polygons_coord.append(poly)

    mask = np.zeros(
        shape=(canvas_shape[1], canvas_shape[0]), dtype=np.uint8
    )

    # for polygon in polygons_coord:
    mask = cv2.fillPoly(mask, polygons_coord, color=1)
    mask = np.array(mask, dtype=np.uint8)
    return mask

In [7]:
# Path to folder containing all the target WSIs
images_folder = "/home/u1910100/Downloads/Monkey/images/pas-cpg"
# Path to folder containing masks for ROIs for each WSI
# tissue_masks_folder = (
#     "/home/u1910100/Downloads/Monkey/images/tissue-masks"
# )
# Path to folder containing annotation json files
annotations_folder = (
    "/home/u1910100/Downloads/Monkey/annotations/json"
)

patch_image_dir = (
    "/home/u1910100/Documents/Monkey/patches_512/images"
)
cell_mask_dir = "/home/u1910100/Documents/Monkey/patches_512/annotations/masks"
json_dir = (
    "/home/u1910100/Documents/Monkey/patches_512/annotations/json"
)

for wsi_image_name in tqdm(os.listdir(images_folder)):
    wsi_id = extract_id(wsi_image_name)
    print(wsi_id)
    # WSI path and tissue mask path
    wsi_path = os.path.join(images_folder, wsi_image_name)
    mask_name = f"{wsi_id}_mask.tif"
    # mask_path = os.path.join(tissue_masks_folder, mask_name)

    # Read WSI and tissue mask
    wsi_reader = WSIReader.open(wsi_path)
    thumb = wsi_reader.slide_thumbnail()
    # plt.imshow(thumb)
    # plt.show()

    baseline_dims = wsi_reader.slide_dimensions(
        resolution=0, units="level"
    )
    base_mpp = wsi_reader.convert_resolution_units(
        input_res=0, input_unit="level", output_unit="mpp"
    )
    scale_factor = base_mpp[0] / base_mpp[0]

    # Annotation file names and paths
    inflammatory_json_name = f"{wsi_id}_inflammatory-cells.json"
    lymphocyte_json_name = f"{wsi_id}_lymphocytes.json"
    monocyte_json_name = f"{wsi_id}_monocytes.json"

    inflammatory_json_path = os.path.join(
        annotations_folder, inflammatory_json_name
    )
    lymphocytes_json_path = os.path.join(
        annotations_folder, lymphocyte_json_name
    )
    monocyte_json_path = os.path.join(
        annotations_folder, monocyte_json_name
    )

    inflammatory_annotations = parse_json_annotations(
        inflammatory_json_path
    )
    inflammatory_points = inflammatory_annotations["points"]
    inflammatory_points = scale_points(
        inflammatory_points, scale_factor
    )

    lymphocyte_annotations = parse_json_annotations(
        lymphocytes_json_path
    )
    lymphocyte_points = lymphocyte_annotations["points"]
    lymphocyte_points = scale_points(lymphocyte_points, scale_factor)
    monocyte_annotations = parse_json_annotations(monocyte_json_path)
    monocyte_points = monocyte_annotations["points"]
    monocyte_points = scale_points(monocyte_points, scale_factor)

    # Create tissue mask from rois coords
    binary_mask = mask_from_rois(
        inflammatory_annotations["rois"],
        (
            int(np.round(baseline_dims[0] / 32)),
            int(np.round(baseline_dims[1] / 32)),
        ),
    )

    # plt.imshow(binary_mask)
    # plt.show()

    mask_reader = VirtualWSIReader(binary_mask, power=1.25, mpp=8)
    print(mask_reader.info.as_dict())
    # Extract patches
    patch_extractor = get_patch_extractor(
        input_img=wsi_reader,
        input_mask=binary_mask,
        method_name="slidingwindow",
        patch_size=(512, 512),
        stride=(472, 472),
        resolution=0,
        units="level",
        within_bound=True,
        min_mask_ratio=1.0,
    )
    # print(f"Number of Patches: {len(patch_extractor)}")
    for idx, patch in enumerate(patch_extractor):
        patch = patch_extractor[idx]
        # print(idx)
        bb = patch_extractor.coordinate_list[idx]

        mask_patch = np.ones(shape=(512, 512, 1), dtype=np.uint8)

        inflammatory_xs, inflammatory_ys = get_relative_coords(
            inflammatory_points, bb, mask_patch[:, :, 0]
        )
        lymphocyte_xs, lymphocyte_ys = get_relative_coords(
            lymphocyte_points, bb, mask_patch[:, :, 0]
        )
        monocyte_xs, monocyte_ys = get_relative_coords(
            monocyte_points, bb, mask_patch[:, :, 0]
        )
        masked_patch = patch * mask_patch
        # fig, axes = plt.subplots(1,2, figsize=(18,18))
        # axes[0].imshow(masked_patch)
        # axes[0].axis('off')
        # axes[1].imshow(masked_patch)
        # axes[1].scatter(lymphocyte_xs, lymphocyte_ys, c="g")
        # axes[1].scatter(monocyte_xs, monocyte_ys, c="b")
        # axes[1].axis('off')
        # plt.show()
        lympchoyte_coords = []
        lympchoyte_coords = list(zip(lymphocyte_xs, lymphocyte_ys))
        monocyte_coords = []
        monocyte_coords = list(zip(monocyte_xs, monocyte_ys))

        save_data(
            file_name=wsi_id,
            patch_image_dir=patch_image_dir,
            cell_mask_dir=cell_mask_dir,
            json_dir=json_dir,
            patch_image=masked_patch,
            bb=bb,
            lymphocyte_coords=lympchoyte_coords,
            monocyte_coords=monocyte_coords,
        )



A_P000021




{'objective_power': 1.25, 'slide_dimensions': (2784, 4288), 'level_count': 1, 'level_dimensions': ((2784, 4288),), 'level_downsamples': [1.0], 'vendor': 'None', 'mpp': (8, 8), 'file_path': None, 'axes': 'YSX'}




B_P000012




{'objective_power': 1.25, 'slide_dimensions': (2352, 3448), 'level_count': 1, 'level_dimensions': ((2352, 3448),), 'level_downsamples': [1.0], 'vendor': 'None', 'mpp': (8, 8), 'file_path': None, 'axes': 'YSX'}




D_P000005
{'objective_power': 1.25, 'slide_dimensions': (1400, 1824), 'level_count': 1, 'level_dimensions': ((1400, 1824),), 'level_downsamples': [1.0], 'vendor': 'None', 'mpp': (8, 8), 'file_path': None, 'axes': 'YSX'}




A_P000030




{'objective_power': 1.25, 'slide_dimensions': (2696, 4744), 'level_count': 1, 'level_dimensions': ((2696, 4744),), 'level_downsamples': [1.0], 'vendor': 'None', 'mpp': (8, 8), 'file_path': None, 'axes': 'YSX'}




B_P000003




{'objective_power': 1.25, 'slide_dimensions': (2344, 3648), 'level_count': 1, 'level_dimensions': ((2344, 3648),), 'level_downsamples': [1.0], 'vendor': 'None', 'mpp': (8, 8), 'file_path': None, 'axes': 'YSX'}




B_P000002




{'objective_power': 1.25, 'slide_dimensions': (2176, 4360), 'level_count': 1, 'level_dimensions': ((2176, 4360),), 'level_downsamples': [1.0], 'vendor': 'None', 'mpp': (8, 8), 'file_path': None, 'axes': 'YSX'}




C_P000032
{'objective_power': 1.25, 'slide_dimensions': (1320, 4424), 'level_count': 1, 'level_dimensions': ((1320, 4424),), 'level_downsamples': [1.0], 'vendor': 'None', 'mpp': (8, 8), 'file_path': None, 'axes': 'YSX'}




D_P000006
{'objective_power': 1.25, 'slide_dimensions': (1920, 2352), 'level_count': 1, 'level_dimensions': ((1920, 2352),), 'level_downsamples': [1.0], 'vendor': 'None', 'mpp': (8, 8), 'file_path': None, 'axes': 'YSX'}




C_P000023




{'objective_power': 1.25, 'slide_dimensions': (2176, 4936), 'level_count': 1, 'level_dimensions': ((2176, 4936),), 'level_downsamples': [1.0], 'vendor': 'None', 'mpp': (8, 8), 'file_path': None, 'axes': 'YSX'}




A_P000004




{'objective_power': 1.25, 'slide_dimensions': (3056, 3968), 'level_count': 1, 'level_dimensions': ((3056, 3968),), 'level_downsamples': [1.0], 'vendor': 'None', 'mpp': (8, 8), 'file_path': None, 'axes': 'YSX'}




C_P000031
{'objective_power': 1.25, 'slide_dimensions': (1136, 4040), 'level_count': 1, 'level_dimensions': ((1136, 4040),), 'level_downsamples': [1.0], 'vendor': 'None', 'mpp': (8, 8), 'file_path': None, 'axes': 'YSX'}




A_P000011
{'objective_power': 1.25, 'slide_dimensions': (2704, 2152), 'level_count': 1, 'level_dimensions': ((2704, 2152),), 'level_downsamples': [1.0], 'vendor': 'None', 'mpp': (8, 8), 'file_path': None, 'axes': 'YSX'}




B_P000017




{'objective_power': 1.25, 'slide_dimensions': (2008, 4544), 'level_count': 1, 'level_dimensions': ((2008, 4544),), 'level_downsamples': [1.0], 'vendor': 'None', 'mpp': (8, 8), 'file_path': None, 'axes': 'YSX'}




D_P000011
{'objective_power': 1.25, 'slide_dimensions': (1400, 2016), 'level_count': 1, 'level_dimensions': ((1400, 2016),), 'level_downsamples': [1.0], 'vendor': 'None', 'mpp': (8, 8), 'file_path': None, 'axes': 'YSX'}




C_P000024
{'objective_power': 1.25, 'slide_dimensions': (1488, 4552), 'level_count': 1, 'level_dimensions': ((1488, 4552),), 'level_downsamples': [1.0], 'vendor': 'None', 'mpp': (8, 8), 'file_path': None, 'axes': 'YSX'}




D_P000017
{'objective_power': 1.25, 'slide_dimensions': (1392, 2664), 'level_count': 1, 'level_dimensions': ((1392, 2664),), 'level_downsamples': [1.0], 'vendor': 'None', 'mpp': (8, 8), 'file_path': None, 'axes': 'YSX'}




B_P000016
{'objective_power': 1.25, 'slide_dimensions': (1840, 3320), 'level_count': 1, 'level_dimensions': ((1840, 3320),), 'level_downsamples': [1.0], 'vendor': 'None', 'mpp': (8, 8), 'file_path': None, 'axes': 'YSX'}




A_P000002




{'objective_power': 1.25, 'slide_dimensions': (3048, 2800), 'level_count': 1, 'level_dimensions': ((3048, 2800),), 'level_downsamples': [1.0], 'vendor': 'None', 'mpp': (8, 8), 'file_path': None, 'axes': 'YSX'}




A_P000031




{'objective_power': 1.25, 'slide_dimensions': (2960, 4744), 'level_count': 1, 'level_dimensions': ((2960, 4744),), 'level_downsamples': [1.0], 'vendor': 'None', 'mpp': (8, 8), 'file_path': None, 'axes': 'YSX'}




A_P000037




{'objective_power': 1.25, 'slide_dimensions': (2872, 4872), 'level_count': 1, 'level_dimensions': ((2872, 4872),), 'level_downsamples': [1.0], 'vendor': 'None', 'mpp': (8, 8), 'file_path': None, 'axes': 'YSX'}




B_P000011




{'objective_power': 1.25, 'slide_dimensions': (2016, 4616), 'level_count': 1, 'level_dimensions': ((2016, 4616),), 'level_downsamples': [1.0], 'vendor': 'None', 'mpp': (8, 8), 'file_path': None, 'axes': 'YSX'}




D_P000002
{'objective_power': 1.25, 'slide_dimensions': (1744, 2408), 'level_count': 1, 'level_dimensions': ((1744, 2408),), 'level_downsamples': [1.0], 'vendor': 'None', 'mpp': (8, 8), 'file_path': None, 'axes': 'YSX'}




A_P000017




{'objective_power': 1.25, 'slide_dimensions': (2960, 4032), 'level_count': 1, 'level_dimensions': ((2960, 4032),), 'level_downsamples': [1.0], 'vendor': 'None', 'mpp': (8, 8), 'file_path': None, 'axes': 'YSX'}




C_P000033
{'objective_power': 1.25, 'slide_dimensions': (1144, 4296), 'level_count': 1, 'level_dimensions': ((1144, 4296),), 'level_downsamples': [1.0], 'vendor': 'None', 'mpp': (8, 8), 'file_path': None, 'axes': 'YSX'}




A_P000038




{'objective_power': 1.25, 'slide_dimensions': (2696, 5776), 'level_count': 1, 'level_dimensions': ((2696, 5776),), 'level_downsamples': [1.0], 'vendor': 'None', 'mpp': (8, 8), 'file_path': None, 'axes': 'YSX'}




A_P000001




{'objective_power': 1.25, 'slide_dimensions': (3048, 2992), 'level_count': 1, 'level_dimensions': ((3048, 2992),), 'level_downsamples': [1.0], 'vendor': 'None', 'mpp': (8, 8), 'file_path': None, 'axes': 'YSX'}




B_P000007




{'objective_power': 1.25, 'slide_dimensions': (2600, 4160), 'level_count': 1, 'level_dimensions': ((2600, 4160),), 'level_downsamples': [1.0], 'vendor': 'None', 'mpp': (8, 8), 'file_path': None, 'axes': 'YSX'}




A_P000034




{'objective_power': 1.25, 'slide_dimensions': (2784, 5576), 'level_count': 1, 'level_dimensions': ((2784, 5576),), 'level_downsamples': [1.0], 'vendor': 'None', 'mpp': (8, 8), 'file_path': None, 'axes': 'YSX'}




C_P000026
{'objective_power': 1.25, 'slide_dimensions': (1136, 4552), 'level_count': 1, 'level_dimensions': ((1136, 4552),), 'level_downsamples': [1.0], 'vendor': 'None', 'mpp': (8, 8), 'file_path': None, 'axes': 'YSX'}




A_P000035




{'objective_power': 1.25, 'slide_dimensions': (2784, 4936), 'level_count': 1, 'level_dimensions': ((2784, 4936),), 'level_downsamples': [1.0], 'vendor': 'None', 'mpp': (8, 8), 'file_path': None, 'axes': 'YSX'}




D_P000014
{'objective_power': 1.25, 'slide_dimensions': (1576, 2344), 'level_count': 1, 'level_dimensions': ((1576, 2344),), 'level_downsamples': [1.0], 'vendor': 'None', 'mpp': (8, 8), 'file_path': None, 'axes': 'YSX'}




A_P000014




{'objective_power': 1.25, 'slide_dimensions': (2792, 3192), 'level_count': 1, 'level_dimensions': ((2792, 3192),), 'level_downsamples': [1.0], 'vendor': 'None', 'mpp': (8, 8), 'file_path': None, 'axes': 'YSX'}




C_P000022




{'objective_power': 1.25, 'slide_dimensions': (2176, 5272), 'level_count': 1, 'level_dimensions': ((2176, 5272),), 'level_downsamples': [1.0], 'vendor': 'None', 'mpp': (8, 8), 'file_path': None, 'axes': 'YSX'}




B_P000015




{'objective_power': 1.25, 'slide_dimensions': (2368, 5848), 'level_count': 1, 'level_dimensions': ((2368, 5848),), 'level_downsamples': [1.0], 'vendor': 'None', 'mpp': (8, 8), 'file_path': None, 'axes': 'YSX'}




D_P000010
{'objective_power': 1.25, 'slide_dimensions': (1320, 2016), 'level_count': 1, 'level_dimensions': ((1320, 2016),), 'level_downsamples': [1.0], 'vendor': 'None', 'mpp': (8, 8), 'file_path': None, 'axes': 'YSX'}




D_P000015
{'objective_power': 1.25, 'slide_dimensions': (1312, 2024), 'level_count': 1, 'level_dimensions': ((1312, 2024),), 'level_downsamples': [1.0], 'vendor': 'None', 'mpp': (8, 8), 'file_path': None, 'axes': 'YSX'}




D_P000001
{'objective_power': 1.25, 'slide_dimensions': (1312, 2288), 'level_count': 1, 'level_dimensions': ((1312, 2288),), 'level_downsamples': [1.0], 'vendor': 'None', 'mpp': (8, 8), 'file_path': None, 'axes': 'YSX'}




D_P000003
{'objective_power': 1.25, 'slide_dimensions': (1232, 1888), 'level_count': 1, 'level_dimensions': ((1232, 1888),), 'level_downsamples': [1.0], 'vendor': 'None', 'mpp': (8, 8), 'file_path': None, 'axes': 'YSX'}




A_P000029




{'objective_power': 1.25, 'slide_dimensions': (2880, 5064), 'level_count': 1, 'level_dimensions': ((2880, 5064),), 'level_downsamples': [1.0], 'vendor': 'None', 'mpp': (8, 8), 'file_path': None, 'axes': 'YSX'}




A_P000020




{'objective_power': 1.25, 'slide_dimensions': (3056, 2864), 'level_count': 1, 'level_dimensions': ((3056, 2864),), 'level_downsamples': [1.0], 'vendor': 'None', 'mpp': (8, 8), 'file_path': None, 'axes': 'YSX'}




A_P000033




{'objective_power': 1.25, 'slide_dimensions': (2792, 5400), 'level_count': 1, 'level_dimensions': ((2792, 5400),), 'level_downsamples': [1.0], 'vendor': 'None', 'mpp': (8, 8), 'file_path': None, 'axes': 'YSX'}




C_P000038
{'objective_power': 1.25, 'slide_dimensions': (1408, 4488), 'level_count': 1, 'level_dimensions': ((1408, 4488),), 'level_downsamples': [1.0], 'vendor': 'None', 'mpp': (8, 8), 'file_path': None, 'axes': 'YSX'}




C_P000028
{'objective_power': 1.25, 'slide_dimensions': (1304, 4360), 'level_count': 1, 'level_dimensions': ((1304, 4360),), 'level_downsamples': [1.0], 'vendor': 'None', 'mpp': (8, 8), 'file_path': None, 'axes': 'YSX'}




A_P000024




{'objective_power': 1.25, 'slide_dimensions': (3056, 5200), 'level_count': 1, 'level_dimensions': ((3056, 5200),), 'level_downsamples': [1.0], 'vendor': 'None', 'mpp': (8, 8), 'file_path': None, 'axes': 'YSX'}




A_P000005




{'objective_power': 1.25, 'slide_dimensions': (2704, 3056), 'level_count': 1, 'level_dimensions': ((2704, 3056),), 'level_downsamples': [1.0], 'vendor': 'None', 'mpp': (8, 8), 'file_path': None, 'axes': 'YSX'}




B_P000009




{'objective_power': 1.25, 'slide_dimensions': (1496, 5520), 'level_count': 1, 'level_dimensions': ((1496, 5520),), 'level_downsamples': [1.0], 'vendor': 'None', 'mpp': (8, 8), 'file_path': None, 'axes': 'YSX'}




A_P000036




{'objective_power': 1.25, 'slide_dimensions': (2872, 5320), 'level_count': 1, 'level_dimensions': ((2872, 5320),), 'level_downsamples': [1.0], 'vendor': 'None', 'mpp': (8, 8), 'file_path': None, 'axes': 'YSX'}




C_P000021




{'objective_power': 1.25, 'slide_dimensions': (1576, 4880), 'level_count': 1, 'level_dimensions': ((1576, 4880),), 'level_downsamples': [1.0], 'vendor': 'None', 'mpp': (8, 8), 'file_path': None, 'axes': 'YSX'}




C_P000037
{'objective_power': 1.25, 'slide_dimensions': (1320, 4232), 'level_count': 1, 'level_dimensions': ((1320, 4232),), 'level_downsamples': [1.0], 'vendor': 'None', 'mpp': (8, 8), 'file_path': None, 'axes': 'YSX'}




B_P000010
{'objective_power': 1.25, 'slide_dimensions': (1760, 3640), 'level_count': 1, 'level_dimensions': ((1760, 3640),), 'level_downsamples': [1.0], 'vendor': 'None', 'mpp': (8, 8), 'file_path': None, 'axes': 'YSX'}




B_P000020




{'objective_power': 1.25, 'slide_dimensions': (2360, 5648), 'level_count': 1, 'level_dimensions': ((2360, 5648),), 'level_downsamples': [1.0], 'vendor': 'None', 'mpp': (8, 8), 'file_path': None, 'axes': 'YSX'}




B_P000013




{'objective_power': 1.25, 'slide_dimensions': (1760, 4736), 'level_count': 1, 'level_dimensions': ((1760, 4736),), 'level_downsamples': [1.0], 'vendor': 'None', 'mpp': (8, 8), 'file_path': None, 'axes': 'YSX'}




A_P000006




{'objective_power': 1.25, 'slide_dimensions': (2792, 4160), 'level_count': 1, 'level_dimensions': ((2792, 4160),), 'level_downsamples': [1.0], 'vendor': 'None', 'mpp': (8, 8), 'file_path': None, 'axes': 'YSX'}




D_P000016
{'objective_power': 1.25, 'slide_dimensions': (1648, 4160), 'level_count': 1, 'level_dimensions': ((1648, 4160),), 'level_downsamples': [1.0], 'vendor': 'None', 'mpp': (8, 8), 'file_path': None, 'axes': 'YSX'}




D_P000013
{'objective_power': 1.25, 'slide_dimensions': (1320, 2280), 'level_count': 1, 'level_dimensions': ((1320, 2280),), 'level_downsamples': [1.0], 'vendor': 'None', 'mpp': (8, 8), 'file_path': None, 'axes': 'YSX'}




B_P000014




{'objective_power': 1.25, 'slide_dimensions': (2360, 4088), 'level_count': 1, 'level_dimensions': ((2360, 4088),), 'level_downsamples': [1.0], 'vendor': 'None', 'mpp': (8, 8), 'file_path': None, 'axes': 'YSX'}




A_P000022




{'objective_power': 1.25, 'slide_dimensions': (2712, 4424), 'level_count': 1, 'level_dimensions': ((2712, 4424),), 'level_downsamples': [1.0], 'vendor': 'None', 'mpp': (8, 8), 'file_path': None, 'axes': 'YSX'}




B_P000005




{'objective_power': 1.25, 'slide_dimensions': (2344, 5584), 'level_count': 1, 'level_dimensions': ((2344, 5584),), 'level_downsamples': [1.0], 'vendor': 'None', 'mpp': (8, 8), 'file_path': None, 'axes': 'YSX'}




B_P000019




{'objective_power': 1.25, 'slide_dimensions': (1840, 4864), 'level_count': 1, 'level_dimensions': ((1840, 4864),), 'level_downsamples': [1.0], 'vendor': 'None', 'mpp': (8, 8), 'file_path': None, 'axes': 'YSX'}




A_P000032




{'objective_power': 1.25, 'slide_dimensions': (2872, 4608), 'level_count': 1, 'level_dimensions': ((2872, 4608),), 'level_downsamples': [1.0], 'vendor': 'None', 'mpp': (8, 8), 'file_path': None, 'axes': 'YSX'}




D_P000004
{'objective_power': 1.25, 'slide_dimensions': (1400, 2472), 'level_count': 1, 'level_dimensions': ((1400, 2472),), 'level_downsamples': [1.0], 'vendor': 'None', 'mpp': (8, 8), 'file_path': None, 'axes': 'YSX'}




D_P000018
{'objective_power': 1.25, 'slide_dimensions': (1224, 2216), 'level_count': 1, 'level_dimensions': ((1224, 2216),), 'level_downsamples': [1.0], 'vendor': 'None', 'mpp': (8, 8), 'file_path': None, 'axes': 'YSX'}




C_P000039
{'objective_power': 1.25, 'slide_dimensions': (1400, 5008), 'level_count': 1, 'level_dimensions': ((1400, 5008),), 'level_downsamples': [1.0], 'vendor': 'None', 'mpp': (8, 8), 'file_path': None, 'axes': 'YSX'}




B_P000001




{'objective_power': 1.25, 'slide_dimensions': (2528, 3776), 'level_count': 1, 'level_dimensions': ((2528, 3776),), 'level_downsamples': [1.0], 'vendor': 'None', 'mpp': (8, 8), 'file_path': None, 'axes': 'YSX'}




A_P000003




{'objective_power': 1.25, 'slide_dimensions': (2872, 3064), 'level_count': 1, 'level_dimensions': ((2872, 3064),), 'level_downsamples': [1.0], 'vendor': 'None', 'mpp': (8, 8), 'file_path': None, 'axes': 'YSX'}




B_P000004




{'objective_power': 1.25, 'slide_dimensions': (3064, 5904), 'level_count': 1, 'level_dimensions': ((3064, 5904),), 'level_downsamples': [1.0], 'vendor': 'None', 'mpp': (8, 8), 'file_path': None, 'axes': 'YSX'}




C_P000036
{'objective_power': 1.25, 'slide_dimensions': (1232, 4880), 'level_count': 1, 'level_dimensions': ((1232, 4880),), 'level_downsamples': [1.0], 'vendor': 'None', 'mpp': (8, 8), 'file_path': None, 'axes': 'YSX'}




A_P000016
{'objective_power': 1.25, 'slide_dimensions': (2704, 2480), 'level_count': 1, 'level_dimensions': ((2704, 2480),), 'level_downsamples': [1.0], 'vendor': 'None', 'mpp': (8, 8), 'file_path': None, 'axes': 'YSX'}




C_P000040




{'objective_power': 1.25, 'slide_dimensions': (1656, 5720), 'level_count': 1, 'level_dimensions': ((1656, 5720),), 'level_downsamples': [1.0], 'vendor': 'None', 'mpp': (8, 8), 'file_path': None, 'axes': 'YSX'}




A_P000007




{'objective_power': 1.25, 'slide_dimensions': (3056, 3128), 'level_count': 1, 'level_dimensions': ((3056, 3128),), 'level_downsamples': [1.0], 'vendor': 'None', 'mpp': (8, 8), 'file_path': None, 'axes': 'YSX'}




C_P000025




{'objective_power': 1.25, 'slide_dimensions': (2104, 5392), 'level_count': 1, 'level_dimensions': ((2104, 5392),), 'level_downsamples': [1.0], 'vendor': 'None', 'mpp': (8, 8), 'file_path': None, 'axes': 'YSX'}




D_P000012
{'objective_power': 1.25, 'slide_dimensions': (1576, 1568), 'level_count': 1, 'level_dimensions': ((1576, 1568),), 'level_downsamples': [1.0], 'vendor': 'None', 'mpp': (8, 8), 'file_path': None, 'axes': 'YSX'}




D_P000019
{'objective_power': 1.25, 'slide_dimensions': (1320, 2080), 'level_count': 1, 'level_dimensions': ((1320, 2080),), 'level_downsamples': [1.0], 'vendor': 'None', 'mpp': (8, 8), 'file_path': None, 'axes': 'YSX'}




C_P000029




{'objective_power': 1.25, 'slide_dimensions': (1664, 4616), 'level_count': 1, 'level_dimensions': ((1664, 4616),), 'level_downsamples': [1.0], 'vendor': 'None', 'mpp': (8, 8), 'file_path': None, 'axes': 'YSX'}




D_P000009
{'objective_power': 1.25, 'slide_dimensions': (1408, 2672), 'level_count': 1, 'level_dimensions': ((1408, 2672),), 'level_downsamples': [1.0], 'vendor': 'None', 'mpp': (8, 8), 'file_path': None, 'axes': 'YSX'}




C_P000035




{'objective_power': 1.25, 'slide_dimensions': (2272, 4944), 'level_count': 1, 'level_dimensions': ((2272, 4944),), 'level_downsamples': [1.0], 'vendor': 'None', 'mpp': (8, 8), 'file_path': None, 'axes': 'YSX'}




D_P000007
{'objective_power': 1.25, 'slide_dimensions': (1400, 2864), 'level_count': 1, 'level_dimensions': ((1400, 2864),), 'level_downsamples': [1.0], 'vendor': 'None', 'mpp': (8, 8), 'file_path': None, 'axes': 'YSX'}




C_P000030




{'objective_power': 1.25, 'slide_dimensions': (2008, 5328), 'level_count': 1, 'level_dimensions': ((2008, 5328),), 'level_downsamples': [1.0], 'vendor': 'None', 'mpp': (8, 8), 'file_path': None, 'axes': 'YSX'}




A_P000018




{'objective_power': 1.25, 'slide_dimensions': (2960, 3912), 'level_count': 1, 'level_dimensions': ((2960, 3912),), 'level_downsamples': [1.0], 'vendor': 'None', 'mpp': (8, 8), 'file_path': None, 'axes': 'YSX'}




C_P000034




{'objective_power': 1.25, 'slide_dimensions': (1920, 4560), 'level_count': 1, 'level_dimensions': ((1920, 4560),), 'level_downsamples': [1.0], 'vendor': 'None', 'mpp': (8, 8), 'file_path': None, 'axes': 'YSX'}




C_P000027
{'objective_power': 1.25, 'slide_dimensions': (1576, 4232), 'level_count': 1, 'level_dimensions': ((1576, 4232),), 'level_downsamples': [1.0], 'vendor': 'None', 'mpp': (8, 8), 'file_path': None, 'axes': 'YSX'}


100%|██████████| 81/81 [01:09<00:00,  1.17it/s]


Extract Classification Masks

In [None]:
from monkey.data.data_utils import open_json_file, write_json_file


def extract_patch_and_mask(image, mask, coords, size=36, label=1):
    results = []
    extractor = get_patch_extractor(
        "point",
        input_img=image,
        locations_list=np.array(coords),
        patch_size=size,
    )
    mask_reader = VirtualWSIReader.open(mask)
    for i, patch in enumerate(extractor):
        x, y = (
            extractor.locations_df["x"][i],
            extractor.locations_df["y"][i],
        )
        mask_patch = mask_reader.read_rect((x, y), (size, size))[
            :, :, 0
        ]
        mask_patch = np.where(mask_patch == label, 1, 0)

        result_patch = np.zeros(shape=(size, size, 4), dtype=np.uint8)
        result_patch[:, :, 0:3] = patch
        result_patch[:, :, 3] = mask_patch
        results.append(result_patch)
    return results


# Path to folder containing all the target WSIs
nuclick_folder = "/home/u1910100/Documents/Monkey/patches_256/annotations/nuclick_hovernext"
json_annotation_folder = (
    "/home/u1910100/Documents/Monkey/patches_256/annotations/json"
)

save_dir = "/home/u1910100/Documents/Monkey/classification/patches"

data_labels = {}

files = os.listdir(nuclick_folder)

for i in tqdm(range(len(files)), leave=False):
    file_name = files[i]

    file_path = os.path.join(nuclick_folder, file_name)

    file_name_without_ext = os.path.splitext(file_name)[0]

    json_path = os.path.join(
        json_annotation_folder, f"{file_name_without_ext}.json"
    )
    annotation = open_json_file(json_path)
    lymphocyte_coords = annotation["lymphocytes"]
    monocyte_coords = annotation["monocytes"]

    data = np.load(file_path)
    data = data.astype(np.uint8)
    img = data[:, :, 0:3]

    mask = data[:, :, 4]

    if len(lymphocyte_coords) > 0:
        lymph_results = []
        lymph_results = extract_patch_and_mask(
            img, mask, lymphocyte_coords, 36
        )
        for i, data in enumerate(lymph_results):
            save_name = f"{file_name_without_ext}_lymph_{i+1}"
            save_path = os.path.join(save_dir, save_name)
            np.save(save_path, data)
            data_labels[save_name] = 1

    if len(monocyte_coords) > 0:
        mono_results = []
        mono_results = extract_patch_and_mask(
            img, mask, monocyte_coords, 36, label=2
        )
        for i, data in enumerate(mono_results):
            save_name = f"{file_name_without_ext}_mono_{i+1}"
            save_path = os.path.join(save_dir, save_name)
            np.save(save_path, data)
            data_labels[save_name] = 2

data_labels_save_path = (
    "/home/u1910100/Documents/Monkey/classification/labels.json"
)
write_json_file(data_labels_save_path, data_labels)