In [None]:
!apt update && apt install -y openslide-tools
!pip install openslide-python
!pip install shapely

In [None]:
from google.colab import drive

# Mount the Google Drive to access the files
drive.mount("/content/gdrive/")

In [1]:
# Set the working directory
work_directory = r"/Volumes/T7 Red/wsi_segmentation/"

In [2]:
import os
import sys
import openslide
from tqdm.auto import tqdm
from shapely.geometry import shape
import json

# Add the path to your project root directory
if work_directory not in sys.path:
    sys.path.append(work_directory)

# my utility functions
from utils.general import read_path, create_directory
from utils.drawing_functions import draw_segmentation_mask, annotate_images

In [3]:
DATASETS_PATH = os.path.join(work_directory, "datasets")
INTERIM_PATH = os.path.join(DATASETS_PATH, "interim")
CP_RAW_PATH = os.path.join("/Volumes/T7 Red/cerrahpasa-wsi", "raw")
CP_ANNOTATIONS_PATH = os.path.join("/Volumes/T7 Red/cerrahpasa-wsi", "annotations")
RAW_PATH = os.path.join("/Volumes/T7 Red/wsi_code/datasets", "raw")
ANNOTATIONS_PATH = os.path.join("/Volumes/T7 Red/wsi_code/datasets", "annotations")

In [4]:
slides = read_path(os.path.join(RAW_PATH, "**/*.svs"), recursive=True)
slides.extend(read_path(os.path.join(CP_RAW_PATH, "**/*.svs"), recursive=True))
slides.sort()
annotations = read_path(os.path.join(ANNOTATIONS_PATH, "**/*.geojson"), recursive=True)
annotations.extend(
    read_path(os.path.join(CP_ANNOTATIONS_PATH, "**/*.geojson"), recursive=True)
)
annotations.sort()

In [5]:
print(len(slides), slides[:5])
print(len(annotations), annotations[:5])

56 ['/Volumes/T7 Red/cerrahpasa-wsi/raw/lms/B-1088-2022-1 YD-YK.svs', '/Volumes/T7 Red/cerrahpasa-wsi/raw/lms/B-1088-2022-2 YD-YK.svs', '/Volumes/T7 Red/cerrahpasa-wsi/raw/lms/B-1088-2022-4 YD-YK.svs', '/Volumes/T7 Red/cerrahpasa-wsi/raw/lms/B-1586-21-1 YK.svs', '/Volumes/T7 Red/cerrahpasa-wsi/raw/lms/B-1586-21-11-YK-YD.svs']
56 ['/Volumes/T7 Red/cerrahpasa-wsi/annotations/lms_co/B-1088-2022-1 YD-YK.geojson', '/Volumes/T7 Red/cerrahpasa-wsi/annotations/lms_co/B-1088-2022-2 YD-YK.geojson', '/Volumes/T7 Red/cerrahpasa-wsi/annotations/lms_co/B-1088-2022-4 YD-YK.geojson', '/Volumes/T7 Red/cerrahpasa-wsi/annotations/lms_co/B-1586-21-1 YK.geojson', '/Volumes/T7 Red/cerrahpasa-wsi/annotations/lms_co/B-1586-21-11-YK-YD.geojson']


In [6]:
X1_EXTRACT_PATH = os.path.join(INTERIM_PATH, "x64")
X1_MASKS_EXTRACT_PATH = os.path.join(INTERIM_PATH, "x64_masks")
X1_THUMB_EXTRACT_PATH = os.path.join(INTERIM_PATH, "x64_thumbs")
X4_EXTRACT_PATH = os.path.join(INTERIM_PATH, "x16")
X4_MASKS_EXTRACT_PATH = os.path.join(INTERIM_PATH, "x16_masks")

create_directory(X1_EXTRACT_PATH)
create_directory(X1_MASKS_EXTRACT_PATH)
create_directory(X1_THUMB_EXTRACT_PATH)
create_directory(X4_EXTRACT_PATH)
create_directory(X4_MASKS_EXTRACT_PATH)

Directory already exists: /Volumes/T7 Red/wsi_segmentation/datasets/interim/x64
Directory already exists: /Volumes/T7 Red/wsi_segmentation/datasets/interim/x64_masks
Directory already exists: /Volumes/T7 Red/wsi_segmentation/datasets/interim/x64_thumbs
Directory already exists: /Volumes/T7 Red/wsi_segmentation/datasets/interim/x16
Directory already exists: /Volumes/T7 Red/wsi_segmentation/datasets/interim/x16_masks


In [7]:
def process_slide(slide, annotations):
    slide_name = os.path.splitext(os.path.basename(slide))[0]
    case_name = slide.split("/")[-2]
    print(f"Processing slide: {slide_name} from case: {case_name}")

    # Read the annotations
    with open(annotations, "r") as json_file:
        geojson_data = json.load(json_file)

    geojson_polygons = [
        shape(feature["geometry"]) for feature in geojson_data["features"]
    ]

    json_file.close()

    # Read the slide
    sld = openslide.OpenSlide(slide)
    # print_slide_info(sld)

    # Extract the x1 and x4 images
    scale_x1 = sld.level_downsamples[-1]
    scale_x4 = sld.level_downsamples[-2]

    for scale in [scale_x1, scale_x4]:
        if scale == scale_x1:
            level = sld.level_count - 1
        elif scale == scale_x4:
            level = sld.level_count - 2

        rgb_slide = sld.read_region((0, 0), level, sld.level_dimensions[level])
        seg_mask = draw_segmentation_mask(rgb_slide.size, geojson_polygons, scale)
        rgb_slide = rgb_slide.convert("RGB")

        output_dir = X1_EXTRACT_PATH if scale == scale_x1 else X4_EXTRACT_PATH
        mask_output_dir = (
            X1_MASKS_EXTRACT_PATH if scale == scale_x1 else X4_MASKS_EXTRACT_PATH
        )

        output_path = os.path.join(output_dir, f"{slide_name}_x{int(scale)}.png")
        mask_output_path = os.path.join(
            mask_output_dir, f"{slide_name}_x{int(scale)}_mask.png"
        )
        thumb_output_path = os.path.join(
            X1_THUMB_EXTRACT_PATH, f"{slide_name}_x{int(scale)}_thumb.png"
        )

        if not os.path.exists(output_path):
            rgb_slide.save(output_path, "PNG")
            print(f"Saved image: {output_path}")
        else:
            print(f"Image already exists: {output_path}")

        if not os.path.exists(mask_output_path):
            seg_mask.save(mask_output_path, "PNG")
            print(f"Saved mask: {mask_output_path}")
        else:
            print(f"Mask already exists: {mask_output_path}")

        if not os.path.exists(thumb_output_path) and scale == scale_x1:
            thumb = annotate_images(
                rgb_slide, geojson_polygons, scale, fill_color=(0, 255, 0, 51)
            )
            thumb.save(thumb_output_path, "PNG")
            print(f"Saved thumb: {thumb_output_path}")
        else:
            print(f"Thumb already exists: {thumb_output_path}")

    sld.close()

In [8]:
# Loop through slides and annotations
for slide, anno in tqdm(zip(slides, annotations), total=len(slides)):
    process_slide(slide, anno)

  0%|          | 0/56 [00:00<?, ?it/s]

Processing slide: B-1088-2022-1 YD-YK from case: lms
Image already exists: /Volumes/T7 Red/wsi_segmentation/datasets/interim/x64/B-1088-2022-1 YD-YK_x64.png
Mask already exists: /Volumes/T7 Red/wsi_segmentation/datasets/interim/x64_masks/B-1088-2022-1 YD-YK_x64_mask.png
Thumb already exists: /Volumes/T7 Red/wsi_segmentation/datasets/interim/x64_thumbs/B-1088-2022-1 YD-YK_x64_thumb.png
Image already exists: /Volumes/T7 Red/wsi_segmentation/datasets/interim/x16/B-1088-2022-1 YD-YK_x16.png
Mask already exists: /Volumes/T7 Red/wsi_segmentation/datasets/interim/x16_masks/B-1088-2022-1 YD-YK_x16_mask.png
Thumb already exists: /Volumes/T7 Red/wsi_segmentation/datasets/interim/x64_thumbs/B-1088-2022-1 YD-YK_x16_thumb.png
Processing slide: B-1088-2022-2 YD-YK from case: lms
Image already exists: /Volumes/T7 Red/wsi_segmentation/datasets/interim/x64/B-1088-2022-2 YD-YK_x64.png
Mask already exists: /Volumes/T7 Red/wsi_segmentation/datasets/interim/x64_masks/B-1088-2022-2 YD-YK_x64_mask.png
Thumb 