In [None]:
!apt update && apt install -y openslide-tools
!pip install openslide-python
!pip install shapely

In [None]:
from google.colab import drive

# Mount the Google Drive to access the files
drive.mount("/content/gdrive/")

In [1]:
# Set the working directory
work_directory = r"/Volumes/T7/wsi_code"

In [2]:
import os
import sys
import openslide
from tqdm.auto import tqdm
from shapely.geometry import shape
import json
from datetime import datetime

# Add the path to your project root directory
if work_directory not in sys.path:
    sys.path.append(work_directory)

# my utility functions
from utils.general import read_path

In [3]:
DATASETS_PATH = os.path.join(work_directory, "datasets")
INTERIM_PATH = os.path.join(DATASETS_PATH, "interim")
RAW_PATH = os.path.join(DATASETS_PATH, "raw")
ANNOTATIONS_PATH = os.path.join(DATASETS_PATH, "annotations")

In [4]:
slides = read_path(os.path.join(RAW_PATH, "**/*.svs"), recursive=True)
annotations = read_path(os.path.join(ANNOTATIONS_PATH, "**/*.geojson"), recursive=True)

In [5]:
slides[:5]

['/Volumes/T7/wsi_code/datasets/raw/liposarkom/TCGA-3B-A9HI-01Z-00-DX1.FF553011-934A-4E3E-AA53-B87FC307E095.svs',
 '/Volumes/T7/wsi_code/datasets/raw/liposarkom/TCGA-DX-AB36-01Z-00-DX2.B0293A74-52AC-490B-A5F0-CA96F4B36252.svs',
 '/Volumes/T7/wsi_code/datasets/raw/liposarkom/TCGA-DX-AB37-01Z-00-DX1.B778136D-9699-48FA-91F2-16BD16569571.svs',
 '/Volumes/T7/wsi_code/datasets/raw/liposarkom/TCGA-DX-AB37-01Z-00-DX3.AE53E163-C3F2-4BF1-8A2E-232842C590F1.svs',
 '/Volumes/T7/wsi_code/datasets/raw/liposarkom/TCGA-Z4-A9VC-01Z-00-DX1.42D1CACE-2FFB-4CE4-B8D6-8C0D5BC3B3F2.svs']

In [6]:
illness_dict = {"liposarkom": "00", "lms": "01", "mpskt": "02", "ss": "03"}

case_dict = {
    # lipo
    "TCGA-3B-A9HI-01Z-00-DX1.FF553011-934A-4E3E-AA53-B87FC307E095": "00",
    "TCGA-DX-AB36-01Z-00-DX2.B0293A74-52AC-490B-A5F0-CA96F4B36252": "01",
    "TCGA-DX-AB37-01Z-00-DX1.B778136D-9699-48FA-91F2-16BD16569571": "02",
    "TCGA-DX-AB37-01Z-00-DX3.AE53E163-C3F2-4BF1-8A2E-232842C590F1": "03",
    "TCGA-Z4-A9VC-01Z-00-DX1.42D1CACE-2FFB-4CE4-B8D6-8C0D5BC3B3F2": "04",
    # lms
    "TCGA-DX-A3U7-01Z-00-DX1.FD7076CA-39BC-4330-B91C-6DF7F0751D57": "00",
    "TCGA-DX-A48L-01Z-00-DX1.656CE0A0-F442-4715-8250-C7B71A73FBCC": "01",
    "TCGA-IF-A4AK-01Z-00-DX1.A39253B8-4899-4360-BFF2-3538AEF7A970": "02",
    "TCGA-X6-A7WC-01Z-00-DX1.A1B72EE7-D7BD-4D23-A38F-39CC4E1C32A8": "03",
    "TCGA-X6-A7WD-01Z-00-DX2.37790492-072A-4392-8D7B-904286F6C805": "04",
    # mpskt
    "TCGA-QQ-A8VG-01Z-00-DX1.A9A10DBA-09AE-4C1A-A126-2180536400ED": "00",
    "TCGA-QQ-A8VG-01Z-00-DX2.9C8082B3-7E59-46C8-88B6-73DD4D2D29E7": "01",
    "TCGA-RN-AAAQ-01Z-00-DX1.493F5285-F6E5-435F-902F-E384E4440C53": "02",
    "TCGA-SI-A71O-01Z-00-DX5.DAF40BD0-4B92-4201-8B03-B0BFAA14CBBC": "03",
    "TCGA-SI-A71Q-01Z-00-DX3.746B592F-98CD-41CA-837E-E4E0B12F4020": "04",
    # ss
    "TCGA-DX-A7EQ-01Z-00-DX1.6E243B4A-CE79-4B31-B98B-24B89E7C2FB4": "00",
    "TCGA-DX-AB3B-01Z-00-DX1.454F6EDB-796F-4BB0-A92E-E4F5D592E897": "01",
    "TCGA-DX-AB3B-01Z-00-DX2.2BC397E2-2F44-4C2D-87C2-439A052C8B0F": "02",
    "TCGA-DX-AB3C-01Z-00-DX2.CBA90EC0-A148-400F-9DFA-870F637E2958": "03",
    "TCGA-MJ-A850-01Z-00-DX1.67DDD01B-0D67-4A0F-B535-B9A1E1BE65EA": "04",
}

In [7]:
EXTRACT_PATH = os.path.join(DATASETS_PATH, "slides_info")
# lipo = os.path.join(EXTRACT_PATH, "lipo")
# lms = os.path.join(EXTRACT_PATH, "lms")
# mpskt = os.path.join(EXTRACT_PATH, "mpskt")
# ss = os.path.join(EXTRACT_PATH, "ss")

# create_directory(lipo)
# create_directory(lms)
# create_directory(mpskt)
# create_directory(ss)

In [37]:
for slide, anno in tqdm(zip(slides, annotations), total=len(slides)):
    # print(slide, anno)
    slide_name = os.path.splitext(os.path.basename(slide))[0]
    case_name = slide.split("/")[-2]

    sld = openslide.OpenSlide(slide)
    txt_file = os.path.join(
        EXTRACT_PATH, f"{illness_dict[case_name]}_{case_dict[slide_name]}_info.txt"
    )

    with open(txt_file, "w") as f:
        current_datetime = datetime.now().strftime("%d/%m/%Y %H:%M:%S")
        f.write(f"Date: {current_datetime}\n")
        f.write(f"Slide: {slide_name}\n")
        f.write(f"Code: {illness_dict[case_name]}_{case_dict[slide_name]}\n")
        f.write(f"Dimensions: {sld.level_dimensions}\n")

    f.close()

    print(sld.associated_images.keys())

    # print(sld.level_dimensions)
    sld.associated_images["thumbnail"].save(
        os.path.join(
            EXTRACT_PATH,
            f"{illness_dict[case_name]}_{case_dict[slide_name]}_thumbnail.png",
        )
    )

    rgb_slide = sld.read_region((0, 0), sld.level_count - 2, sld.level_dimensions[-2])
    rgb_slide = rgb_slide.convert("RGB")
    # rgb_slide.save(os.path.join(EXTRACT_PATH, f"{illness_dict[case_name]}_{case_dict[slide_name]}.jpeg"))

    # # sld.associated_images['thumbnail'].show()
    # sld.associated_images['macro'].save(os.path.join(EXTRACT_PATH, f"{illness_dict[case_name]}_{case_dict[slide_name]}_macro.png"))
    # # sld.associated_images['macro'].show()
    # sld.associated_images['label'].save(os.path.join(EXTRACT_PATH, f"{illness_dict[case_name]}_{case_dict[slide_name]}_label.png"))
    # sld.associated_images['label'].show()

    sld.close()

  0%|          | 0/20 [00:00<?, ?it/s]

KeysView(<_AssociatedImageMap {'thumbnail': <PIL.Image.Image image mode=RGBA size=1024x523 at 0x10CD122F0>}>)
KeysView(<_AssociatedImageMap {'thumbnail': <PIL.Image.Image image mode=RGBA size=978x768 at 0x10CD122F0>}>)
KeysView(<_AssociatedImageMap {'thumbnail': <PIL.Image.Image image mode=RGBA size=1024x569 at 0x10CD122F0>}>)
KeysView(<_AssociatedImageMap {'thumbnail': <PIL.Image.Image image mode=RGBA size=1009x768 at 0x10CD12C20>}>)
KeysView(<_AssociatedImageMap {'thumbnail': <PIL.Image.Image image mode=RGBA size=985x768 at 0x10CD12C20>}>)
KeysView(<_AssociatedImageMap {'thumbnail': <PIL.Image.Image image mode=RGBA size=952x768 at 0x10CD11D80>}>)
KeysView(<_AssociatedImageMap {'thumbnail': <PIL.Image.Image image mode=RGBA size=610x768 at 0x10F1EBBE0>}>)
KeysView(<_AssociatedImageMap {'thumbnail': <PIL.Image.Image image mode=RGBA size=1024x695 at 0x10F1EBD90>}>)
KeysView(<_AssociatedImageMap {'thumbnail': <PIL.Image.Image image mode=RGBA size=1024x661 at 0x10F1E8430>}>)
KeysView(<_Ass

In [8]:
from PIL import ImageDraw


def draw_geojson_objects(
    image, geojson_polygons, scale, outline_color=(0, 255, 0, 255), fill_color=None
):
    """
    Draw GeoJSON objects on an RGB image.

    Parameters:
    - image: PIL Image object (RGB mode) to draw on.
    - geojson_features: List of GeoJSON features with geometries to draw.
    - outline_color: Tuple (R, G, B, A) representing the outline color (default: green with full opacity).
    - fill_color: Tuple (R, G, B, A) representing the fill color (default: None, no fill).

    Returns:
    - PIL Image object with drawn GeoJSON objects.
    """
    draw = ImageDraw.Draw(image, "RGBA")

    for polygon in geojson_polygons:
        # Convert geometry coordinates to image coordinates
        if image.mode != "RGB":
            raise ValueError("Image must be in RGB mode.")

        polygon_list = [(x / scale, y / scale) for x, y in polygon.exterior.coords]

        # Draw the polygon outline
        draw.polygon(polygon_list, outline=outline_color, fill=fill_color)

    return image

In [11]:
for slide, anno in tqdm(zip(slides, annotations), total=len(slides)):
    slide_name = os.path.splitext(os.path.basename(slide))[0]
    case_name = slide.split("/")[-2]

    with open(anno, "r") as json_file:
        geojson_data = json.load(json_file)

    geojson_polygons = [
        shape(feature["geometry"]) for feature in geojson_data["features"]
    ]

    json_file.close()

    sld = openslide.OpenSlide(slide)
    scale = sld.level_downsamples[-2]

    # Annotate the slide using the geojson polygons
    rgb_slide = sld.read_region((0, 0), sld.level_count - 2, sld.level_dimensions[-2])
    rgb_slide = rgb_slide.convert("RGB")

    output_path = os.path.join(
        EXTRACT_PATH, f"{illness_dict[case_name]}_{case_dict[slide_name]}.jpg"
    )
    rgb_slide.save(
        output_path, format="JPEG", quality=75
    )  # Adjust quality as needed (0-100)

    # rgb_slide = draw_geojson_objects(rgb_slide, geojson_polygons, scale=sld.level_downsamples[-2], fill_color=(0, 255, 0, 51))

    # rgb_slide.save(os.path.join(EXTRACT_PATH, f"{illness_dict[case_name]}_{case_dict[slide_name]}_annotated.png"), optimize=True)

    # Save the annotated image as JPEG with compression
    # output_path = os.path.join(EXTRACT_PATH, f"{illness_dict[case_name]}_{case_dict[slide_name]}_annotated.jpg")
    # rgb_slide.save(output_path, format='JPEG', quality=75)  # Adjust quality as needed (0-100)

    rgb_slide.close()
    sld.close()

  0%|          | 0/20 [00:00<?, ?it/s]